Update 2.0 project file.
Fix backported cuda kernel.
This commit is contained in:
parent
6e1a87129f
commit
0b17e3095a
@ -711,7 +711,7 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -m32 -ccbin "$(VCInstallDir)bin" -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu"
|
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -m32 -ccbin "$(VCInstallDir)bin" -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I"$(CUDA_INC_PATH)" -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu
"
|
||||||
AdditionalDependencies="CudaMath.h"
|
AdditionalDependencies="CudaMath.h"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
@ -849,10 +849,6 @@
|
|||||||
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
|
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
|
||||||
RelativePath="..\..\..\src\nvtt\FastCompressDXT.cpp"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
|
RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
|
||||||
>
|
>
|
||||||
@ -865,6 +861,10 @@
|
|||||||
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
|
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
|
RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
|
||||||
>
|
>
|
||||||
@ -911,10 +911,6 @@
|
|||||||
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
|
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
|
||||||
RelativePath="..\..\..\src\nvtt\FastCompressDXT.h"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\src\nvtt\InputOptions.h"
|
RelativePath="..\..\..\src\nvtt\InputOptions.h"
|
||||||
>
|
>
|
||||||
@ -927,6 +923,10 @@
|
|||||||
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
|
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\src\nvtt\OutputOptions.h"
|
RelativePath="..\..\..\src\nvtt\OutputOptions.h"
|
||||||
>
|
>
|
||||||
|
@ -594,6 +594,40 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
|
||||||
|
{
|
||||||
|
const int idx = threadIdx.x;
|
||||||
|
|
||||||
|
float bestError = FLT_MAX;
|
||||||
|
|
||||||
|
for(int i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
int pidx = idx + NUM_THREADS * i;
|
||||||
|
if (pidx >= 992) break;
|
||||||
|
|
||||||
|
ushort start, end;
|
||||||
|
uint permutation = permutations[pidx];
|
||||||
|
|
||||||
|
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
|
||||||
|
|
||||||
|
if (error < bestError)
|
||||||
|
{
|
||||||
|
bestError = error;
|
||||||
|
bestPermutation = permutation;
|
||||||
|
bestStart = start;
|
||||||
|
bestEnd = end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestStart < bestEnd)
|
||||||
|
{
|
||||||
|
swap(bestEnd, bestStart);
|
||||||
|
bestPermutation ^= 0x55555555; // Flip indices.
|
||||||
|
}
|
||||||
|
|
||||||
|
errors[idx] = bestError;
|
||||||
|
}
|
||||||
|
|
||||||
__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
|
__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
|
||||||
{
|
{
|
||||||
const int idx = threadIdx.x;
|
const int idx = threadIdx.x;
|
||||||
@ -629,7 +663,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Find index with minimum error
|
// Find index with minimum error
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
Loading…
Reference in New Issue
Block a user