DXT1a single color compressor. Fixes issue 131.

Init single color tables at startup.
2010-07-22 09:23:17 +00:00
parent da548fd03a
commit ac7c017c35
12 changed files with 1384 additions and 1853 deletions
--- a/src/nvtt/cuda/BitmapTable.h
+++ b/src/nvtt/cuda/BitmapTable.h
--- a/src/nvtt/cuda/CompressKernel.cu
+++ b/src/nvtt/cuda/CompressKernel.cu
@ -26,7 +26,6 @@

 #include "CudaMath.h"

-#include "../SingleColorLookup.h"

 #define NUM_THREADS 64		// Number of threads per block.

@ -48,6 +47,9 @@ __device__ inline void swap(T & a, T & b)
 	b = tmp;
 }

+__constant__ uchar OMatch5[256][2];
+__constant__ uchar OMatch6[256][2];
+
 __constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f };
 __constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f };

--- a/src/nvtt/cuda/CudaCompressorDXT.cpp
+++ b/src/nvtt/cuda/CudaCompressorDXT.cpp
@ -24,31 +24,24 @@
 #include "CudaCompressorDXT.h"
 #include "CudaUtils.h"

-#include <nvcore/Debug.h>
-#include <nvmath/Color.h>
-#include <nvimage/Image.h>
-#include <nvimage/ColorBlock.h>
-#include <nvimage/BlockDXT.h>
-#include <nvtt/CompressionOptions.h>
-#include <nvtt/OutputOptions.h>
-#include <nvtt/QuickCompressDXT.h>
-#include <nvtt/OptimalCompressDXT.h>
+#include "nvcore/Debug.h"
+#include "nvmath/Color.h"
+#include "nvimage/Image.h"
+#include "nvimage/ColorBlock.h"
+#include "nvimage/BlockDXT.h"
+#include "nvtt/CompressionOptions.h"
+#include "nvtt/OutputOptions.h"
+#include "nvtt/QuickCompressDXT.h"
+#include "nvtt/OptimalCompressDXT.h"


-#if defined HAVE_CUDA
-#include <cuda_runtime_api.h>
-#endif
-
 #include <time.h>
 #include <stdio.h>

-using namespace nv;
-using namespace nvtt;
-
 #if defined HAVE_CUDA
+#include <cuda_runtime_api.h>

-#define MAX_BLOCKS 8192U // 32768, 65535
-
+#define MAX_BLOCKS 8192U // 32768, 65535 // @@ Limit number of blocks on slow devices to prevent hitting the watchdog timer.

 extern "C" void setupCompressKernel(const float weights[3]);
 extern "C" void bindTextureToArray(cudaArray * d_data);
@ -62,31 +55,13 @@ extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint


 #include "BitmapTable.h"
-
-/*
-// Convert linear image to block linear.
-static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
-{
-	const uint w = (image->width() + 3) / 4;
-	const uint h = (image->height() + 3) / 4;
-
-	for(uint by = 0; by < h; by++) {
-		for(uint bx = 0; bx < w; bx++) {
-			const uint bw = min(image->width() - bx * 4, 4U);
-			const uint bh = min(image->height() - by * 4, 4U);
-
-			for (uint i = 0; i < 16; i++) {
-				const int x = (i % 4) % bw;
-				const int y = (i / 4) % bh;
-				blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
-			}
-		}
-	}
-}
-*/
+#include "nvtt/SingleColorLookup.h"

 #endif

+using namespace nv;
+using namespace nvtt;
+

 CudaContext::CudaContext() : 
 	bitmapTable(NULL), 
@ -111,6 +86,11 @@ CudaContext::CudaContext() :
 	// Allocate scratch buffers.
    cudaMalloc((void**) &data, MAX_BLOCKS * 64U);
    cudaMalloc((void**) &result, MAX_BLOCKS * 8U);
+
+    // Init single color lookup contant tables.
+    cudaMemcpyToSymbol("OMatch5", OMatch5, sizeof(OMatch5), 0, cudaMemcpyHostToDevice);
+    cudaMemcpyToSymbol("OMatch6", OMatch6, sizeof(OMatch6), 0, cudaMemcpyHostToDevice);
+
 #endif
 }

@ -300,6 +280,30 @@ void CudaCompressorDXT5::compressBlocks(uint first, uint count, uint w, uint h,

 #if 0

+
+/*
+// Convert linear image to block linear.
+static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
+{
+	const uint w = (image->width() + 3) / 4;
+	const uint h = (image->height() + 3) / 4;
+
+	for(uint by = 0; by < h; by++) {
+		for(uint bx = 0; bx < w; bx++) {
+			const uint bw = min(image->width() - bx * 4, 4U);
+			const uint bh = min(image->height() - by * 4, 4U);
+
+			for (uint i = 0; i < 16; i++) {
+				const int x = (i % 4) % bw;
+				const int y = (i / 4) % bh;
+				blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
+			}
+		}
+	}
+}
+*/
+
+
 /// Compress image using CUDA.
 void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
 {