Upgrade CMP Core.
parent
1e06539012
commit
4ff7af50ca
@ -0,0 +1,99 @@
|
|||||||
|
//=====================================================================
|
||||||
|
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files(the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions :
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
// File: BC1_Encode_kernel.hlsl
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
// Licensed under the MIT License.
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
#ifndef ASPM_HLSL
|
||||||
|
#define ASPM_HLSL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cbuffer cbCS : register( b0 )
|
||||||
|
{
|
||||||
|
uint g_tex_width;
|
||||||
|
uint g_num_block_x;
|
||||||
|
uint g_format;
|
||||||
|
uint g_mode_id;
|
||||||
|
uint g_start_block_id;
|
||||||
|
uint g_num_total_blocks;
|
||||||
|
float g_alpha_weight;
|
||||||
|
float g_quality;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "BCn_Common_Kernel.h"
|
||||||
|
|
||||||
|
// Source Data
|
||||||
|
Texture2D g_Input : register( t0 );
|
||||||
|
StructuredBuffer<uint4> g_InBuff : register( t1 );
|
||||||
|
|
||||||
|
// Compressed Output Data
|
||||||
|
RWStructuredBuffer<uint2> g_OutBuff : register( u0 );
|
||||||
|
|
||||||
|
// Processing multiple blocks at a time
|
||||||
|
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
|
||||||
|
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
|
||||||
|
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
|
||||||
|
#define BLOCK_SIZE_Y 4
|
||||||
|
#define BLOCK_SIZE_X 4
|
||||||
|
|
||||||
|
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
|
||||||
|
|
||||||
|
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
|
||||||
|
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
|
||||||
|
{
|
||||||
|
// we process 4 BC blocks per thread group
|
||||||
|
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
|
||||||
|
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
|
||||||
|
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
|
||||||
|
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
|
||||||
|
|
||||||
|
|
||||||
|
uint block_y = blockID / g_num_block_x;
|
||||||
|
uint block_x = blockID - block_y * g_num_block_x;
|
||||||
|
uint base_x = block_x * BLOCK_SIZE_X;
|
||||||
|
uint base_y = block_y * BLOCK_SIZE_Y;
|
||||||
|
|
||||||
|
|
||||||
|
// Load up the pixels
|
||||||
|
if (pixelInBlock < 16)
|
||||||
|
{
|
||||||
|
// load pixels (0..1)
|
||||||
|
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
|
||||||
|
}
|
||||||
|
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
|
||||||
|
// Process and save s
|
||||||
|
if (pixelInBlock == 0)
|
||||||
|
{
|
||||||
|
float3 block[16];
|
||||||
|
for (int i = 0; i < 16; i++ )
|
||||||
|
{
|
||||||
|
block[i].x = shared_temp[pixelBase + i].x;
|
||||||
|
block[i].y = shared_temp[pixelBase + i].y;
|
||||||
|
block[i].z = shared_temp[pixelBase + i].z;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_OutBuff[blockID] = CompressBlockBC1_UNORM(block,g_quality,false);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,101 @@
|
|||||||
|
//=====================================================================
|
||||||
|
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files(the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions :
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
// File: BC1Encode.hlsl
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
// Licensed under the MIT License.
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
#ifndef ASPM_HLSL
|
||||||
|
#define ASPM_HLSL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
cbuffer cbCS : register( b0 )
|
||||||
|
{
|
||||||
|
uint g_tex_width;
|
||||||
|
uint g_num_block_x;
|
||||||
|
uint g_format;
|
||||||
|
uint g_mode_id;
|
||||||
|
uint g_start_block_id;
|
||||||
|
uint g_num_total_blocks;
|
||||||
|
float g_alpha_weight;
|
||||||
|
float g_quality;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "BCn_Common_Kernel.h"
|
||||||
|
|
||||||
|
// Source Data
|
||||||
|
Texture2D g_Input : register( t0 );
|
||||||
|
StructuredBuffer<uint4> g_InBuff : register( t1 );
|
||||||
|
|
||||||
|
// Compressed Output Data
|
||||||
|
RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
|
||||||
|
|
||||||
|
// Processing multiple blocks at a time
|
||||||
|
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
|
||||||
|
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
|
||||||
|
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
|
||||||
|
#define BLOCK_SIZE_Y 4
|
||||||
|
#define BLOCK_SIZE_X 4
|
||||||
|
|
||||||
|
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
|
||||||
|
|
||||||
|
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
|
||||||
|
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
|
||||||
|
{
|
||||||
|
// we process 4 BC blocks per thread group
|
||||||
|
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
|
||||||
|
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
|
||||||
|
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
|
||||||
|
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
|
||||||
|
|
||||||
|
|
||||||
|
uint block_y = blockID / g_num_block_x;
|
||||||
|
uint block_x = blockID - block_y * g_num_block_x;
|
||||||
|
uint base_x = block_x * BLOCK_SIZE_X;
|
||||||
|
uint base_y = block_y * BLOCK_SIZE_Y;
|
||||||
|
|
||||||
|
|
||||||
|
// Load up the pixels
|
||||||
|
if (pixelInBlock < 16)
|
||||||
|
{
|
||||||
|
// load pixels (0..1)
|
||||||
|
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
|
||||||
|
}
|
||||||
|
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
|
||||||
|
// Process and save s
|
||||||
|
if (pixelInBlock == 0)
|
||||||
|
{
|
||||||
|
float3 blockRGB[16];
|
||||||
|
float blockA[16];
|
||||||
|
for (int i = 0; i < 16; i++ )
|
||||||
|
{
|
||||||
|
blockRGB[i].x = shared_temp[pixelBase + i].x;
|
||||||
|
blockRGB[i].y = shared_temp[pixelBase + i].y;
|
||||||
|
blockRGB[i].z = shared_temp[pixelBase + i].z;
|
||||||
|
blockA[i] = shared_temp[pixelBase + i].w;
|
||||||
|
}
|
||||||
|
g_OutBuff[blockID] = CompressBlockBC2_UNORM(blockRGB,blockA,g_quality,false);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,101 @@
|
|||||||
|
//=====================================================================
|
||||||
|
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files(the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions :
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
// File: BC1Encode.hlsl
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
// Licensed under the MIT License.
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
#ifndef ASPM_HLSL
|
||||||
|
#define ASPM_HLSL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cbuffer cbCS : register( b0 )
|
||||||
|
{
|
||||||
|
uint g_tex_width;
|
||||||
|
uint g_num_block_x;
|
||||||
|
uint g_format;
|
||||||
|
uint g_mode_id;
|
||||||
|
uint g_start_block_id;
|
||||||
|
uint g_num_total_blocks;
|
||||||
|
float g_alpha_weight;
|
||||||
|
float g_quality;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "BCn_Common_Kernel.h"
|
||||||
|
|
||||||
|
// Source Data
|
||||||
|
Texture2D g_Input : register( t0 );
|
||||||
|
StructuredBuffer<uint4> g_InBuff : register( t1 );
|
||||||
|
|
||||||
|
// Compressed Output Data
|
||||||
|
RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
|
||||||
|
|
||||||
|
// Processing multiple blocks at a time
|
||||||
|
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
|
||||||
|
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
|
||||||
|
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
|
||||||
|
#define BLOCK_SIZE_Y 4
|
||||||
|
#define BLOCK_SIZE_X 4
|
||||||
|
|
||||||
|
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
|
||||||
|
|
||||||
|
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
|
||||||
|
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
|
||||||
|
{
|
||||||
|
// we process 4 BC blocks per thread group
|
||||||
|
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
|
||||||
|
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
|
||||||
|
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
|
||||||
|
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
|
||||||
|
|
||||||
|
|
||||||
|
uint block_y = blockID / g_num_block_x;
|
||||||
|
uint block_x = blockID - block_y * g_num_block_x;
|
||||||
|
uint base_x = block_x * BLOCK_SIZE_X;
|
||||||
|
uint base_y = block_y * BLOCK_SIZE_Y;
|
||||||
|
|
||||||
|
|
||||||
|
// Load up the pixels
|
||||||
|
if (pixelInBlock < 16)
|
||||||
|
{
|
||||||
|
// load pixels (0..1)
|
||||||
|
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
|
||||||
|
}
|
||||||
|
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
|
||||||
|
// Process and save s
|
||||||
|
if (pixelInBlock == 0)
|
||||||
|
{
|
||||||
|
float3 blockRGB[16];
|
||||||
|
float blockA[16];
|
||||||
|
for (int i = 0; i < 16; i++ )
|
||||||
|
{
|
||||||
|
blockRGB[i].x = shared_temp[pixelBase + i].x;
|
||||||
|
blockRGB[i].y = shared_temp[pixelBase + i].y;
|
||||||
|
blockRGB[i].z = shared_temp[pixelBase + i].z;
|
||||||
|
blockA[i] = shared_temp[pixelBase + i].w;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_OutBuff[blockID] = CompressBlockBC3_UNORM(blockRGB,blockA, g_quality,false);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,97 @@
|
|||||||
|
//=====================================================================
|
||||||
|
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files(the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions :
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
// File: BC4Encode.hlsl
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
// Licensed under the MIT License.
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
#ifndef ASPM_HLSL
|
||||||
|
#define ASPM_HLSL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cbuffer cbCS : register( b0 )
|
||||||
|
{
|
||||||
|
uint g_tex_width;
|
||||||
|
uint g_num_block_x;
|
||||||
|
uint g_format;
|
||||||
|
uint g_mode_id;
|
||||||
|
uint g_start_block_id;
|
||||||
|
uint g_num_total_blocks;
|
||||||
|
float g_alpha_weight;
|
||||||
|
float g_quality;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "BCn_Common_Kernel.h"
|
||||||
|
|
||||||
|
// Source Data
|
||||||
|
Texture2D g_Input : register( t0 );
|
||||||
|
StructuredBuffer<uint4> g_InBuff : register( t1 );
|
||||||
|
|
||||||
|
// Compressed Output Data
|
||||||
|
RWStructuredBuffer<uint2> g_OutBuff : register( u0 );
|
||||||
|
|
||||||
|
// Processing multiple blocks at a time
|
||||||
|
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
|
||||||
|
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
|
||||||
|
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
|
||||||
|
#define BLOCK_SIZE_Y 4
|
||||||
|
#define BLOCK_SIZE_X 4
|
||||||
|
|
||||||
|
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
|
||||||
|
|
||||||
|
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
|
||||||
|
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
|
||||||
|
{
|
||||||
|
// we process 4 BC blocks per thread group
|
||||||
|
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
|
||||||
|
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
|
||||||
|
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
|
||||||
|
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
|
||||||
|
|
||||||
|
|
||||||
|
uint block_y = blockID / g_num_block_x;
|
||||||
|
uint block_x = blockID - block_y * g_num_block_x;
|
||||||
|
uint base_x = block_x * BLOCK_SIZE_X;
|
||||||
|
uint base_y = block_y * BLOCK_SIZE_Y;
|
||||||
|
|
||||||
|
|
||||||
|
// Load up the pixels
|
||||||
|
if (pixelInBlock < 16)
|
||||||
|
{
|
||||||
|
// load pixels (0..1)
|
||||||
|
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
|
||||||
|
}
|
||||||
|
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
|
||||||
|
// Process and save s
|
||||||
|
if (pixelInBlock == 0)
|
||||||
|
{
|
||||||
|
float block[16];
|
||||||
|
// covert back to UV for processing!!
|
||||||
|
for ( uint i = 0; i < 16; i ++ )
|
||||||
|
{
|
||||||
|
block[i].x = shared_temp[pixelBase + i].x;
|
||||||
|
}
|
||||||
|
g_OutBuff[blockID] = CompressBlockBC4_UNORM(block, g_quality);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,98 @@
|
|||||||
|
//=====================================================================
|
||||||
|
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files(the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions :
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
// File: BC1Encode.hlsl
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
// Licensed under the MIT License.
|
||||||
|
//--------------------------------------------------------------------------------------
|
||||||
|
#ifndef ASPM_HLSL
|
||||||
|
#define ASPM_HLSL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cbuffer cbCS : register( b0 )
|
||||||
|
{
|
||||||
|
uint g_tex_width;
|
||||||
|
uint g_num_block_x;
|
||||||
|
uint g_format;
|
||||||
|
uint g_mode_id;
|
||||||
|
uint g_start_block_id;
|
||||||
|
uint g_num_total_blocks;
|
||||||
|
float g_alpha_weight;
|
||||||
|
float g_quality;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "BCn_Common_Kernel.h"
|
||||||
|
|
||||||
|
// Source Data
|
||||||
|
Texture2D g_Input : register( t0 );
|
||||||
|
StructuredBuffer<uint4> g_InBuff : register( t1 );
|
||||||
|
|
||||||
|
// Compressed Output Data
|
||||||
|
RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
|
||||||
|
|
||||||
|
// Processing multiple blocks at a time
|
||||||
|
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
|
||||||
|
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
|
||||||
|
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
|
||||||
|
#define BLOCK_SIZE_Y 4
|
||||||
|
#define BLOCK_SIZE_X 4
|
||||||
|
|
||||||
|
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
|
||||||
|
|
||||||
|
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
|
||||||
|
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
|
||||||
|
{
|
||||||
|
// we process 4 BC blocks per thread group
|
||||||
|
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
|
||||||
|
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
|
||||||
|
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
|
||||||
|
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
|
||||||
|
|
||||||
|
|
||||||
|
uint block_y = blockID / g_num_block_x;
|
||||||
|
uint block_x = blockID - block_y * g_num_block_x;
|
||||||
|
uint base_x = block_x * BLOCK_SIZE_X;
|
||||||
|
uint base_y = block_y * BLOCK_SIZE_Y;
|
||||||
|
|
||||||
|
|
||||||
|
// Load up the pixels
|
||||||
|
if (pixelInBlock < 16)
|
||||||
|
{
|
||||||
|
// load pixels (0..1)
|
||||||
|
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
|
||||||
|
}
|
||||||
|
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
|
||||||
|
// Process and save s
|
||||||
|
if (pixelInBlock == 0)
|
||||||
|
{
|
||||||
|
float blockU[16];
|
||||||
|
float blockV[16];
|
||||||
|
for ( uint i = 0; i < 16; i ++ )
|
||||||
|
{
|
||||||
|
blockU[i] = shared_temp[pixelBase + i].x;
|
||||||
|
blockV[i] = shared_temp[pixelBase + i].y;
|
||||||
|
}
|
||||||
|
g_OutBuff[blockID] = CompressBlockBC5_UNORM(blockU,blockV,g_quality);
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,143 @@
|
|||||||
|
//=====================================================================
|
||||||
|
// Copyright 2020 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files(the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions :
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
//=====================================================================
|
||||||
|
#ifndef CMP_MATH_FUNC_H
|
||||||
|
#define CMP_MATH_FUNC_H
|
||||||
|
|
||||||
|
|
||||||
|
#include "Common_Def.h"
|
||||||
|
|
||||||
|
#ifndef ASPM_GPU
|
||||||
|
|
||||||
|
//============================================================================
|
||||||
|
// Core API which have have GPU equivalents, defined here for HPC_CPU usage
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
static CGU_INT QSortFCmp(const void *Elem1, const void *Elem2) {
|
||||||
|
CGU_INT ret = 0;
|
||||||
|
|
||||||
|
if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 < 0.)
|
||||||
|
ret = -1;
|
||||||
|
else if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 > 0.)
|
||||||
|
ret = 1;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int QSortIntCmp(const void *Elem1, const void *Elem2)
|
||||||
|
{
|
||||||
|
return (*(CGU_INT32 *)Elem1 - *(CGU_INT32 *)Elem2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_FLOAT dot(CMP_IN CGU_Vec3f Color,CMP_IN CGU_Vec3f Color2)
|
||||||
|
{
|
||||||
|
CGU_FLOAT ColorDot;
|
||||||
|
ColorDot = (Color.x * Color2.x) + (Color.y * Color2.y) + (Color.z * Color2.z);
|
||||||
|
return ColorDot;
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_FLOAT dot(CMP_IN CGU_Vec2f Color,CMP_IN CGU_Vec2f Color2)
|
||||||
|
{
|
||||||
|
CGU_FLOAT ColorDot;
|
||||||
|
ColorDot = Color.x * Color2.x + Color.y * Color2.y;
|
||||||
|
return ColorDot;
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_Vec2f abs(CMP_IN CGU_Vec2f Color)
|
||||||
|
{
|
||||||
|
CGU_Vec2f ColorAbs;
|
||||||
|
ColorAbs.x = std::abs(Color.x);
|
||||||
|
ColorAbs.y = std::abs(Color.y);
|
||||||
|
return ColorAbs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_Vec3f fabs(CMP_IN CGU_Vec3f Color)
|
||||||
|
{
|
||||||
|
CGU_Vec3f ColorAbs;
|
||||||
|
ColorAbs.x = std::abs(Color.x);
|
||||||
|
ColorAbs.y = std::abs(Color.y);
|
||||||
|
ColorAbs.z = std::abs(Color.z);
|
||||||
|
return ColorAbs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_Vec3f round(CMP_IN CGU_Vec3f Color)
|
||||||
|
{
|
||||||
|
CGU_Vec3f ColorRound;
|
||||||
|
ColorRound.x = std::round(Color.x);
|
||||||
|
ColorRound.y = std::round(Color.y);
|
||||||
|
ColorRound.z = std::round(Color.z);
|
||||||
|
return ColorRound;
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_Vec2f round(CMP_IN CGU_Vec2f Color)
|
||||||
|
{
|
||||||
|
CGU_Vec2f ColorRound;
|
||||||
|
ColorRound.x = std::round(Color.x);
|
||||||
|
ColorRound.y = std::round(Color.y);
|
||||||
|
return ColorRound;
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_Vec3f ceil(CMP_IN CGU_Vec3f Color)
|
||||||
|
{
|
||||||
|
CGU_Vec3f ColorCeil;
|
||||||
|
ColorCeil.x = std::ceil(Color.x);
|
||||||
|
ColorCeil.y = std::ceil(Color.y);
|
||||||
|
ColorCeil.z = std::ceil(Color.z);
|
||||||
|
return ColorCeil;
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_Vec3f floor(CMP_IN CGU_Vec3f Color)
|
||||||
|
{
|
||||||
|
CGU_Vec3f Colorfloor;
|
||||||
|
Colorfloor.x = std::floor(Color.x);
|
||||||
|
Colorfloor.y = std::floor(Color.y);
|
||||||
|
Colorfloor.z = std::floor(Color.z);
|
||||||
|
return Colorfloor;
|
||||||
|
}
|
||||||
|
|
||||||
|
static CGU_Vec3f saturate(CGU_Vec3f value)
|
||||||
|
{
|
||||||
|
if (value.x > 1.0f) value.x = 1.0f;
|
||||||
|
else
|
||||||
|
if (value.x < 0.0f) value.x = 0.0f;
|
||||||
|
|
||||||
|
if (value.y > 1.0f) value.y = 1.0f;
|
||||||
|
else
|
||||||
|
if (value.y < 0.0f) value.y = 0.0f;
|
||||||
|
|
||||||
|
if (value.z > 1.0f) value.z = 1.0f;
|
||||||
|
else
|
||||||
|
if (value.z < 0.0f) value.z = 0.0f;
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//============================================================================
|
||||||
|
// Core API which are shared between GPU & CPU
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#endif // Header Guard
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue