From 0b17e3095ac2b9a296e99c8f34134340babdc248 Mon Sep 17 00:00:00 2001 From: castano Date: Mon, 9 Jun 2008 19:05:49 +0000 Subject: [PATCH] Update 2.0 project file. Fix backported cuda kernel. --- project/vc8/nvtt/nvtt.vcproj | 18 ++++++++--------- src/nvtt/cuda/CompressKernel.cu | 35 ++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/project/vc8/nvtt/nvtt.vcproj b/project/vc8/nvtt/nvtt.vcproj index 051d12b..2db3b50 100644 --- a/project/vc8/nvtt/nvtt.vcproj +++ b/project/vc8/nvtt/nvtt.vcproj @@ -711,7 +711,7 @@ > @@ -849,10 +849,6 @@ RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp" > - - @@ -865,6 +861,10 @@ RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp" > + + @@ -911,10 +911,6 @@ RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h" > - - @@ -927,6 +923,10 @@ RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h" > + + diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu index f522b2c..916e3a3 100644 --- a/src/nvtt/cuda/CompressKernel.cu +++ b/src/nvtt/cuda/CompressKernel.cu @@ -594,6 +594,40 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights } */ +__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) +{ + const int idx = threadIdx.x; + + float bestError = FLT_MAX; + + for(int i = 0; i < 16; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 992) break; + + ushort start, end; + uint permutation = permutations[pidx]; + + float error = evalPermutation4(colors, colorSum, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + } + } + + if (bestStart < bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= 0x55555555; // Flip indices. + } + + errors[idx] = bestError; +} + __device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) { const int idx = threadIdx.x; @@ -629,7 +663,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig } - //////////////////////////////////////////////////////////////////////////////// // Find index with minimum error ////////////////////////////////////////////////////////////////////////////////