From b41752aa84a291b710f1521c8d551dde84f09f8b Mon Sep 17 00:00:00 2001 From: castano Date: Sat, 16 Feb 2008 09:53:07 +0000 Subject: [PATCH] Apply singlecolorpatch.diff from Amir Ebrahimi. Add single color compressor to CUDA implementation. --- src/nvimage/ColorBlock.cpp | 13 ++++++++++++ src/nvimage/ColorBlock.h | 1 + src/nvtt/CMakeLists.txt | 3 +++ src/nvtt/CompressDXT.cpp | 23 +++++++++++++++----- src/nvtt/SingleColorLookup.h | 14 +++++++++++-- src/nvtt/cuda/CompressKernel.cu | 37 +++++++++++++++++++++++++++------ src/nvtt/cuda/CudaMath.h | 34 ++++++++++++++++++++++++++---- src/nvtt/tests/stress.cpp | 4 ++-- 8 files changed, 110 insertions(+), 19 deletions(-) diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp index cdc7d3c..16faf15 100644 --- a/src/nvimage/ColorBlock.cpp +++ b/src/nvimage/ColorBlock.cpp @@ -110,6 +110,19 @@ void ColorBlock::splatY() } } +/// Returns true if the block has a single color. +bool ColorBlock::isSingleColor() const +{ + for(int i = 1; i < 16; i++) + { + if (m_color[0] != m_color[i]) + { + return false; + } + } + + return true; +} /// Count number of unique colors in this color block. uint ColorBlock::countUniqueColors() const diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h index 9087029..cdda4d6 100644 --- a/src/nvimage/ColorBlock.h +++ b/src/nvimage/ColorBlock.h @@ -24,6 +24,7 @@ namespace nv void splatX(); void splatY(); + bool isSingleColor() const; uint countUniqueColors() const; Color32 averageColor() const; bool hasAlpha() const; diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt index 7465bf6..36d9cdd 100644 --- a/src/nvtt/CMakeLists.txt +++ b/src/nvtt/CMakeLists.txt @@ -79,6 +79,9 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage) ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h) TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage) +ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h) +TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt) + ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h) TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage) diff --git a/src/nvtt/CompressDXT.cpp b/src/nvtt/CompressDXT.cpp index 5287f13..06af8c2 100644 --- a/src/nvtt/CompressDXT.cpp +++ b/src/nvtt/CompressDXT.cpp @@ -69,7 +69,14 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou for (uint x = 0; x < w; x += 4) { rgba.init(image, x, y); - QuickCompress::compressDXT1(rgba, &block); + if (rgba.isSingleColor()) + { + QuickCompress::compressDXT1(rgba.color(0), &block); + } + else + { + QuickCompress::compressDXT1(rgba, &block); + } if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); @@ -214,10 +221,16 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output rgba.init(image, x, y); - // Compress color. - squish::ColourSet colours((uint8 *)rgba.colors(), 0); - fit.SetColourSet(&colours, squish::kDxt1); - fit.Compress(&block); + if (rgba.isSingleColor()) + { + QuickCompress::compressDXT1(rgba.color(0), &block); + } + else + { + squish::ColourSet colours((uint8 *)rgba.colors(), 0); + fit.SetColourSet(&colours, squish::kDxt1); + fit.Compress(&block); + } if (outputOptions.outputHandler != NULL) { outputOptions.outputHandler->writeData(&block, sizeof(block)); diff --git a/src/nvtt/SingleColorLookup.h b/src/nvtt/SingleColorLookup.h index 1eab018..63d0462 100644 --- a/src/nvtt/SingleColorLookup.h +++ b/src/nvtt/SingleColorLookup.h @@ -48,7 +48,12 @@ void initTables() }; */ -const static uint8 OMatch5[256][2] = +#if __CUDACC__ +__constant__ unsigned short +#else +const static uint8 +#endif +OMatch5[256][2] = { {0x00, 0x00}, {0x00, 0x00}, @@ -308,7 +313,12 @@ const static uint8 OMatch5[256][2] = {0x1F, 0x1F}, }; -const static uint8 OMatch6[256][2] = +#if __CUDACC__ +__constant__ unsigned short +#else +const static uint8 +#endif +OMatch6[256][2] = { {0x00, 0x00}, {0x00, 0x01}, diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu index 22394be..42c3144 100644 --- a/src/nvtt/cuda/CompressKernel.cu +++ b/src/nvtt/cuda/CompressKernel.cu @@ -28,6 +28,8 @@ #include "CudaMath.h" +#include "../SingleColorLookup.h" + #define NUM_THREADS 64 // Number of threads per block. #if __DEVICE_EMULATION__ @@ -117,7 +119,7 @@ __device__ void sortColors(const float * values, int * cmp) //////////////////////////////////////////////////////////////////////////////// // Load color block to shared mem //////////////////////////////////////////////////////////////////////////////// -__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16]) +__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor) { const int bid = blockIdx.x; const int idx = threadIdx.x; @@ -128,7 +130,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum { // Read color and copy to shared mem. uint c = image[(bid) * 16 + idx]; - + colors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f); colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f); colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f); @@ -137,11 +139,13 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum #if __DEVICE_EMULATION__ } __debugsync(); if (idx < 16) { #endif - + // Sort colors along the best fit line. colorSums(colors, sums); float3 axis = bestFitLine(colors, sums[0], kColorMetric); + *sameColor = (axis == make_float3(0, 0, 0)); + dps[idx] = dot(colors[idx], axis); #if __DEVICE_EMULATION__ @@ -997,6 +1001,20 @@ __device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xr saveBlockDXT1(start, end, permutation, xrefs, result); } +__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result) +{ + const int bid = blockIdx.x; + + int r = color.x * 255; + int g = color.y * 255; + int b = color.z * 255; + + ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0]; + ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1]; + + result[bid].x = (color1 << 16) | color0; + result[bid].y = 0xaaaaaaaa; +} //////////////////////////////////////////////////////////////////////////////// @@ -1007,9 +1025,16 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint __shared__ float3 colors[16]; __shared__ float3 sums[16]; __shared__ int xrefs[16]; + __shared__ int sameColor; - loadColorBlock(image, colors, sums, xrefs); - + loadColorBlock(image, colors, sums, xrefs, &sameColor); + + if (sameColor) + { + if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); + return; + } + __syncthreads(); ushort bestStart, bestEnd; @@ -1269,7 +1294,7 @@ __global__ void compressDXT5(const uint * permutations, const uint * image, uint __shared__ int xrefs[16]; loadColorBlock(image, colors, sums, weights, xrefs); - + __syncthreads(); compressAlpha(weights, result); diff --git a/src/nvtt/cuda/CudaMath.h b/src/nvtt/cuda/CudaMath.h index cd6dd3c..13e27df 100644 --- a/src/nvtt/cuda/CudaMath.h +++ b/src/nvtt/cuda/CudaMath.h @@ -82,6 +82,12 @@ inline __device__ __host__ void operator /=(float3 & b, float f) b.z *= inv; } +inline __device__ __host__ bool operator ==(float3 a, float3 b) +{ + return a.x == b.x && a.y == b.y && a.z == b.z; +} + + // float2 operators inline __device__ __host__ float2 operator *(float2 a, float2 b) { @@ -187,15 +193,37 @@ inline __device__ __host__ float3 firstEigenVector( float matrix[6] ) float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5]; float m = max(max(x, y), z); float iv = 1.0f / m; - #if __DEVICE_EMULATION__ if (m == 0.0f) iv = 0.0f; - #endif v = make_float3(x*iv, y*iv, z*iv); } return v; } +inline __device__ bool singleColor(const float3 * colors) +{ +#if __DEVICE_EMULATION__ + bool sameColor = false; + for (int i = 0; i < 16; i++) + { + sameColor &= (colors[idx] == colors[0]); + } + return sameColor; +#else + __shared__ int sameColor[16]; + + const int idx = threadIdx.x; + + sameColor[idx] = (colors[idx] == colors[0]); + sameColor[idx] &= sameColor[idx^8]; + sameColor[idx] &= sameColor[idx^4]; + sameColor[idx] &= sameColor[idx^2]; + sameColor[idx] &= sameColor[idx^1]; + + return sameColor[0]; +#endif +} + inline __device__ void colorSums(const float3 * colors, float3 * sums) { #if __DEVICE_EMULATION__ @@ -284,9 +312,7 @@ inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] ) float y = v.x * matrix[1] + v.y * matrix[2]; float m = max(x, y); float iv = 1.0f / m; - #if __DEVICE_EMULATION__ if (m == 0.0f) iv = 0.0f; - #endif v = make_float2(x*iv, y*iv); } diff --git a/src/nvtt/tests/stress.cpp b/src/nvtt/tests/stress.cpp index 28e33eb..526bcfa 100644 --- a/src/nvtt/tests/stress.cpp +++ b/src/nvtt/tests/stress.cpp @@ -47,7 +47,7 @@ struct MyOutputHandler : public nvtt::OutputHandler virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) { - assert(size == OUTPUT_SIZE); + assert(size == sizeof(int) * OUTPUT_SIZE); assert(width == WIDTH); assert(height == HEIGHT); assert(depth == 1); @@ -177,7 +177,7 @@ void precomp() int main(int argc, char *argv[]) { - precomp(); + //precomp(); nvtt::InputOptions inputOptions; inputOptions.setTextureLayout(nvtt::TextureType_2D, 1024, 1024);