Reenable QuickCompress for DXT5n.

Fix end of lines.
2008-02-01 02:33:51 +00:00 · 2008-02-01 02:33:51 +00:00 · d44b8e9358
commit d44b8e9358
parent 05a760b2f6
4 changed files with 86 additions and 17 deletions
--- a/src/nvtt/CompressDXT.cpp
+++ b/src/nvtt/CompressDXT.cpp
@ -349,8 +349,7 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
 			}
 			
 			// Compress Y.
-		//	QuickCompress::compressDXT1G(rgba, &block.color);
-			compressGreenBlock_BruteForce(rgba, &block.color);
+			QuickCompress::compressDXT1G(rgba, &block.color);
 			
 			if (outputOptions.outputHandler != NULL) {
 				outputOptions.outputHandler->writeData(&block, sizeof(block));
--- a/src/nvtt/InputOptions.cpp
+++ b/src/nvtt/InputOptions.cpp
@ -381,14 +381,14 @@ int InputOptions::Private::realMipmapCount() const
 }


-const Image * InputOptions::Private::image(uint face, uint mipmap) const
-{
-	nvDebugCheck(face < faceCount);
-	nvDebugCheck(mipmap < mipmapCount);
-
-	const InputImage & image = this->images[face * mipmapCount + mipmap];
-	nvDebugCheck(image.face == face);
-	nvDebugCheck(image.mipLevel == mipmap);
-
-	return image.data.ptr();
-}
+const Image * InputOptions::Private::image(uint face, uint mipmap) const
+{
+	nvDebugCheck(face < faceCount);
+	nvDebugCheck(mipmap < mipmapCount);
+
+	const InputImage & image = this->images[face * mipmapCount + mipmap];
+	nvDebugCheck(image.face == face);
+	nvDebugCheck(image.mipLevel == mipmap);
+
+	return image.data.ptr();
+}
--- a/src/nvtt/QuickCompressDXT.cpp
+++ b/src/nvtt/QuickCompressDXT.cpp
@ -435,10 +435,10 @@ static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
 	nvDebugCheck(block != NULL);

 	int palette[4];
-	palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
-	palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
-	palette[2] = (2 * palette[0] + palette[1]) / 3;
-	palette[3] = (2 * palette[1] + palette[0]) / 3;
+	palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
+	palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
+	palette[2] = (2 * palette[0] + palette[1]) / 3;
+	palette[3] = (2 * palette[1] + palette[0]) / 3;

 	int totalError = 0;

--- a/src/nvtt/cuda/ConvolveKernel.cu
+++ b/src/nvtt/cuda/ConvolveKernel.cu
@ -45,6 +45,76 @@
 #define __debugsync()
 #endif

+#define TN            256
+#define WARP_COUNT    (TN / 32)
+#define HWARP_COUNT   (TN / 16)
+
+// Window size
+#define WS            20
+
+
+
+struct WrapClamp
+{
+	int operator()(int i, int h)
+	{
+		i = min(max(i, 0), h-1);
+	}
+};
+
+struct WrapRepeat
+{
+	int operator()(int i, int h)
+	{
+		i = abs(i) % h;	// :( Non power of two!
+	}
+};
+
+struct WrapMirror
+{
+	int operator()(int i, int h)
+	{
+		i = abs(i);
+		while (i >= h) i = 2 * w - i - 2;
+	}
+};
+
+
+// Vertical convolution filter that processes vertical strips.
+__global__ void convolveStrip(float * d_channel, float * d_kernel, int width, int height)
+{
+	__shared__ float s_kernel[32 * WS];
+
+	// Preload kernel in shared memory.
+	for (int i = 0; i < 32 * WS / TN; i++)
+	{
+		int idx = i * TN + tid;
+		if (idx < 32 * WS) s_kernel[idx] = d_kernel[idx];
+	}
+
+	__shared__ float s_strip[32 * WS];	// TN/32
+	
+	int wid = tid / 32 - WS/2;
+
+	Mirror wrap;
+	int row = wrap(wid);
+
+	// Preload image block.
+	for (int i = 0; i < 32 * WS / TN; i++)
+	{
+	}
+
+	// @@ Apply kernel to TN/32 rows.
+
+	// @@ Load 
+
+
+}
+
+
+
+
+

 __constant__ float inputGamma, outputInverseGamma;
 __constant__ float kernel[MAX_KERNEL_WIDTH];