DXT1a single color compressor. Fixes issue 131.
Init single color tables at startup.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -26,7 +26,6 @@
|
||||
|
||||
#include "CudaMath.h"
|
||||
|
||||
#include "../SingleColorLookup.h"
|
||||
|
||||
#define NUM_THREADS 64 // Number of threads per block.
|
||||
|
||||
@ -48,6 +47,9 @@ __device__ inline void swap(T & a, T & b)
|
||||
b = tmp;
|
||||
}
|
||||
|
||||
__constant__ uchar OMatch5[256][2];
|
||||
__constant__ uchar OMatch6[256][2];
|
||||
|
||||
__constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f };
|
||||
__constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f };
|
||||
|
||||
|
@ -24,31 +24,24 @@
|
||||
#include "CudaCompressorDXT.h"
|
||||
#include "CudaUtils.h"
|
||||
|
||||
#include <nvcore/Debug.h>
|
||||
#include <nvmath/Color.h>
|
||||
#include <nvimage/Image.h>
|
||||
#include <nvimage/ColorBlock.h>
|
||||
#include <nvimage/BlockDXT.h>
|
||||
#include <nvtt/CompressionOptions.h>
|
||||
#include <nvtt/OutputOptions.h>
|
||||
#include <nvtt/QuickCompressDXT.h>
|
||||
#include <nvtt/OptimalCompressDXT.h>
|
||||
#include "nvcore/Debug.h"
|
||||
#include "nvmath/Color.h"
|
||||
#include "nvimage/Image.h"
|
||||
#include "nvimage/ColorBlock.h"
|
||||
#include "nvimage/BlockDXT.h"
|
||||
#include "nvtt/CompressionOptions.h"
|
||||
#include "nvtt/OutputOptions.h"
|
||||
#include "nvtt/QuickCompressDXT.h"
|
||||
#include "nvtt/OptimalCompressDXT.h"
|
||||
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
#include <cuda_runtime_api.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace nv;
|
||||
using namespace nvtt;
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#define MAX_BLOCKS 8192U // 32768, 65535
|
||||
|
||||
#define MAX_BLOCKS 8192U // 32768, 65535 // @@ Limit number of blocks on slow devices to prevent hitting the watchdog timer.
|
||||
|
||||
extern "C" void setupCompressKernel(const float weights[3]);
|
||||
extern "C" void bindTextureToArray(cudaArray * d_data);
|
||||
@ -62,31 +55,13 @@ extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint
|
||||
|
||||
|
||||
#include "BitmapTable.h"
|
||||
|
||||
/*
|
||||
// Convert linear image to block linear.
|
||||
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
|
||||
{
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
|
||||
for(uint by = 0; by < h; by++) {
|
||||
for(uint bx = 0; bx < w; bx++) {
|
||||
const uint bw = min(image->width() - bx * 4, 4U);
|
||||
const uint bh = min(image->height() - by * 4, 4U);
|
||||
|
||||
for (uint i = 0; i < 16; i++) {
|
||||
const int x = (i % 4) % bw;
|
||||
const int y = (i / 4) % bh;
|
||||
blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
#include "nvtt/SingleColorLookup.h"
|
||||
|
||||
#endif
|
||||
|
||||
using namespace nv;
|
||||
using namespace nvtt;
|
||||
|
||||
|
||||
CudaContext::CudaContext() :
|
||||
bitmapTable(NULL),
|
||||
@ -111,6 +86,11 @@ CudaContext::CudaContext() :
|
||||
// Allocate scratch buffers.
|
||||
cudaMalloc((void**) &data, MAX_BLOCKS * 64U);
|
||||
cudaMalloc((void**) &result, MAX_BLOCKS * 8U);
|
||||
|
||||
// Init single color lookup contant tables.
|
||||
cudaMemcpyToSymbol("OMatch5", OMatch5, sizeof(OMatch5), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol("OMatch6", OMatch6, sizeof(OMatch6), 0, cudaMemcpyHostToDevice);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -300,6 +280,30 @@ void CudaCompressorDXT5::compressBlocks(uint first, uint count, uint w, uint h,
|
||||
|
||||
#if 0
|
||||
|
||||
|
||||
/*
|
||||
// Convert linear image to block linear.
|
||||
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
|
||||
{
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
|
||||
for(uint by = 0; by < h; by++) {
|
||||
for(uint bx = 0; bx < w; bx++) {
|
||||
const uint bw = min(image->width() - bx * 4, 4U);
|
||||
const uint bh = min(image->height() - by * 4, 4U);
|
||||
|
||||
for (uint i = 0; i < 16; i++) {
|
||||
const int x = (i % 4) % bw;
|
||||
const int y = (i / 4) % bh;
|
||||
blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
|
Reference in New Issue
Block a user