Upgrade CMP Core.

This commit is contained in:
Ignacio Castano 2020-07-05 23:05:07 -07:00
parent 1e06539012
commit 4ff7af50ca
30 changed files with 10082 additions and 3060 deletions

View File

@ -1,5 +1,5 @@
//===================================================================== //==============================================================================
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -19,397 +19,117 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE. // THE SOFTWARE.
// //
//===================================================================== //===============================================================================
#include "BC1_Encode_kernel.h"
//============================================== BC1 INTERFACES ======================================================= // Heat Mapping
void CompressBlockBC1_Fast( // This is code that compares quality of two similar or equal codecs with varying quality settings
CMP_Vec4uc srcBlockTemp[16], // A resulting compressed codec data block is colored according to three colors conditions
CMP_GLOBAL CGU_UINT32 compressedBlock[2]) // The base codec, lowest quality is colored green and the varying quality code is colored red.
{ // If the quality of the base matches that of the varying codec then the color is set to blue
int i, k; // Base codecs can be local to CMP_Core or imported using a external set of files, the base codec
CMP_Vec3f rgb; #ifndef TEST_HEATMAP
CMP_Vec3f average_rgb; // The centrepoint of the axis //#define TEST_HEATMAP // Enable this to run heat map tests on BC1 codec
CMP_Vec3f v_rgb; // The axis
CMP_Vec3f uniques[16]; // The list of unique colours
int unique_pixels; // The number of unique pixels
CGU_FLOAT unique_recip; // Reciprocal of the above for fast multiplication
int index_map[16]; // The map of source pixels to unique indices
CGU_FLOAT pos_on_axis[16]; // The distance each unique falls along the compression axis
CGU_FLOAT dist_from_axis[16]; // The distance each unique falls from the compression axis
CGU_FLOAT left = 0, right = 0, centre = 0; // The extremities and centre (average of left/right) of uniques along the compression axis
CGU_FLOAT axis_mapping_error = 0; // The total computed error in mapping pixels to the axis
int swap; // Indicator if the RGB values need swapping to generate an opaque result
// -------------------------------------------------------------------------------------
// (3) Find the array of unique pixel values and sum them to find their average position
// -------------------------------------------------------------------------------------
{
// Find the array of unique pixel values and sum them to find their average position
int current_pixel, firstdiff;
current_pixel = unique_pixels = 0;
average_rgb = 0.0f;
firstdiff = -1;
for (i = 0; i<16; i++)
{
for (k = 0; k<i; k++)
if ((((srcBlockTemp[k].x ^ srcBlockTemp[i].x) & 0xf8) == 0) && (((srcBlockTemp[k].y ^ srcBlockTemp[i].y) & 0xfc) == 0) && (((srcBlockTemp[k].z ^ srcBlockTemp[i].z) & 0xf8) == 0))
break;
index_map[i] = current_pixel++;
//pixel_count[i] = 1;
CMP_Vec3f trgb;
rgb.x = (CGU_FLOAT)((srcBlockTemp[i].x) & 0xff);
rgb.y = (CGU_FLOAT)((srcBlockTemp[i].y) & 0xff);
rgb.z = (CGU_FLOAT)((srcBlockTemp[i].z) & 0xff);
trgb.x = CS_RED(rgb.x, rgb.y, rgb.z);
trgb.y = CS_GREEN(rgb.x, rgb.y, rgb.z);
trgb.z = CS_BLUE(rgb.x, rgb.y, rgb.z);
uniques[i] = trgb;
if (k == i)
{
unique_pixels++;
if ((i != 0) && (firstdiff < 0)) firstdiff = i;
}
average_rgb = average_rgb + trgb;
}
unique_pixels = 16;
// Compute average of the uniques
unique_recip = 1.0f / (CGU_FLOAT)unique_pixels;
average_rgb = average_rgb * unique_recip;
}
// -------------------------------------------------------------------------------------
// (4) For each component, reflect points about the average so all lie on the same side
// of the average, and compute the new average - this gives a second point that defines the axis
// To compute the sign of the axis sum the positive differences of G for each of R and B (the
// G axis is always positive in this implementation
// -------------------------------------------------------------------------------------
// An interesting situation occurs if the G axis contains no information, in which case the RB
// axis is also compared. I am not entirely sure if this is the correct implementation - should
// the priority axis be determined by magnitude?
{
CGU_FLOAT rg_pos, bg_pos, rb_pos;
v_rgb = 0.0f;
rg_pos = bg_pos = rb_pos = 0;
for (i = 0; i < unique_pixels; i++)
{
rgb = uniques[i] - average_rgb;
#ifndef ASPM_GPU
v_rgb.x += (CGU_FLOAT)fabs(rgb.x);
v_rgb.y += (CGU_FLOAT)fabs(rgb.y);
v_rgb.z += (CGU_FLOAT)fabs(rgb.z);
#else
v_rgb = v_rgb + fabs(rgb);
#endif #endif
if (rgb.x > 0) { rg_pos += rgb.y; rb_pos += rgb.z; } #include "BC1_Encode_kernel.h"
if (rgb.z > 0) bg_pos += rgb.y;
}
v_rgb = v_rgb*unique_recip;
if (rg_pos < 0) v_rgb.x = -v_rgb.x;
if (bg_pos < 0) v_rgb.z = -v_rgb.z;
if ((rg_pos == bg_pos) && (rg_pos == 0))
if (rb_pos < 0) v_rgb.z = -v_rgb.z;
}
// ------------------------------------------------------------------------------------- #ifdef TEST_HEATMAP
// (5) Axis projection and remapping #include "ExternCodec.h" // Use external codec for testing
// ------------------------------------------------------------------------------------- #endif
{ //============================================== BC1 INTERFACES =======================================================
CGU_FLOAT v2_recip;
// Normalise the axis for simplicity of future calculation #ifndef ASPM_HLSL
v2_recip = (v_rgb.x*v_rgb.x + v_rgb.y*v_rgb.y + v_rgb.z*v_rgb.z);
if (v2_recip > 0)
v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip);
else
v2_recip = 1.0f;
v_rgb = v_rgb*v2_recip;
}
// ------------------------------------------------------------------------------------- void CompressBlockBC1_Internal(
// (6) Map the axis const CMP_Vec4uc srcBlockTemp[16],
// -------------------------------------------------------------------------------------
// the line joining (and extended on either side of) average and axis
// defines the axis onto which the points will be projected
// Project all the points onto the axis, calculate the distance along
// the axis from the centre of the axis (average)
// From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is
// P + ((R-P).v) / (v.v))v
// The distance along v is therefore (R-P).v / (v.v)
// (v.v) is 1 if v is a unit vector.
//
// Calculate the extremities at the same time - these need to be reasonably accurately
// represented in all cases
//
// In this first calculation, also find the error of mapping the points to the axis - this
// is our major indicator of whether or not the block has compressed well - if the points
// map well onto the axis then most of the noise introduced is high-frequency noise
{
left = 10000.0f;
right = -10000.0f;
axis_mapping_error = 0;
for (i = 0; i < unique_pixels; i++)
{
// Compute the distance along the axis of the point of closest approach
CMP_Vec3f temp = (uniques[i] - average_rgb);
pos_on_axis[i] = (temp.x * v_rgb.x) + (temp.y * v_rgb.y) + (temp.z * v_rgb.z);
// Compute the actual point and thence the mapping error
rgb = uniques[i] - (average_rgb + (v_rgb * pos_on_axis[i]));
dist_from_axis[i] = rgb.x*rgb.x + rgb.y*rgb.y + rgb.z*rgb.z;
axis_mapping_error += dist_from_axis[i];
// Work out the extremities
if (pos_on_axis[i] < left)
left = pos_on_axis[i];
if (pos_on_axis[i] > right)
right = pos_on_axis[i];
}
}
// -------------------------------------------------------------------------------------
// (7) Now we have a good axis and the basic information about how the points are mapped
// to it
// Our initial guess is to represent the endpoints accurately, by moving the average
// to the centre and recalculating the point positions along the line
// -------------------------------------------------------------------------------------
{
centre = (left + right) / 2;
average_rgb = average_rgb + (v_rgb*centre);
for (i = 0; i<unique_pixels; i++)
pos_on_axis[i] -= centre;
right -= centre;
left -= centre;
// Accumulate our final resultant error
axis_mapping_error *= unique_recip * (1 / 255.0f);
}
// -------------------------------------------------------------------------------------
// (8) Calculate the high and low output colour values
// Involved in this is a rounding procedure which is undoubtedly slightly twitchy. A
// straight rounded average is not correct, as the decompressor 'unrounds' by replicating
// the top bits to the bottom.
// In order to take account of this process, we don't just apply a straight rounding correction,
// but base our rounding on the input value (a straight rounding is actually pretty good in terms of
// error measure, but creates a visual colour and/or brightness shift relative to the original image)
// The method used here is to apply a centre-biased rounding dependent on the input value, which was
// (mostly by experiment) found to give minimum MSE while preserving the visual characteristics of
// the image.
// rgb = (average_rgb + (left|right)*v_rgb);
// -------------------------------------------------------------------------------------
{
CGU_UINT32 c0, c1, t;
int rd, gd, bd;
rgb = (average_rgb + (v_rgb * left));
rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
ROUND_AND_CLAMP(rd, 5);
ROUND_AND_CLAMP(gd, 6);
ROUND_AND_CLAMP(bd, 5);
c0 = ((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3);
rgb = average_rgb + (v_rgb * right);
rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
ROUND_AND_CLAMP(rd, 5);
ROUND_AND_CLAMP(gd, 6);
ROUND_AND_CLAMP(bd, 5);
c1 = (((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3));
// Force to be a 4-colour opaque block - in which case, c0 is greater than c1
// blocktype == 4
{
if (c0 < c1)
{
t = c0;
c0 = c1;
c1 = t;
swap = 1;
}
else if (c0 == c1)
{
// This block will always be encoded in 3-colour mode
// Need to ensure that only one of the two points gets used,
// avoiding accidentally setting some transparent pixels into the block
for (i = 0; i<unique_pixels; i++)
pos_on_axis[i] = left;
swap = 0;
}
else
swap = 0;
}
compressedBlock[0] = c0 | (c1 << 16);
}
// -------------------------------------------------------------------------------------
// (9) Final clustering, creating the 2-bit values that define the output
// -------------------------------------------------------------------------------------
{
CGU_UINT32 bit;
CGU_FLOAT division;
CGU_FLOAT cluster_x[4];
CGU_FLOAT cluster_y[4];
int cluster_count[4];
// (blocktype == 4)
{
compressedBlock[1] = 0;
division = right*2.0f / 3.0f;
centre = (left + right) / 2; // Actually, this code only works if centre is 0 or approximately so
for (i = 0; i<4; i++)
{
cluster_x[i] = cluster_y[i] = 0.0f;
cluster_count[i] = 0;
}
for (i = 0; i<16; i++)
{
rgb.z = pos_on_axis[index_map[i]];
// Endpoints (indicated by block > average) are 0 and 1, while
// interpolants are 2 and 3
if (fabs(rgb.z) >= division)
bit = 0;
else
bit = 2;
// Positive is in the latter half of the block
if (rgb.z >= centre)
bit += 1;
// Set the output, taking swapping into account
compressedBlock[1] |= ((bit^swap) << (2 * i));
// Average the X and Y locations for each cluster
cluster_x[bit] += (CGU_FLOAT)(i & 3);
cluster_y[bit] += (CGU_FLOAT)(i >> 2);
cluster_count[bit]++;
}
for (i = 0; i<4; i++)
{
CGU_FLOAT cr;
if (cluster_count[i])
{
cr = 1.0f / cluster_count[i];
cluster_x[i] *= cr;
cluster_y[i] *= cr;
}
else
{
cluster_x[i] = cluster_y[i] = -1;
}
}
// patterns in axis position detection
// (same algorithm as used in the SSE version)
if ((compressedBlock[0] & 0xffff) != (compressedBlock[0] >> 16))
{
CGU_UINT32 i1, k1;
CGU_UINT32 x = 0, y = 0;
int xstep = 0, ystep = 0;
// Find a corner to search from
for (k1 = 0; k1<4; k1++)
{
switch (k1)
{
case 0:
x = 0; y = 0; xstep = 1; ystep = 1;
break;
case 1:
x = 0; y = 3; xstep = 1; ystep = -1;
break;
case 2:
x = 3; y = 0; xstep = -1; ystep = 1;
break;
case 3:
x = 3; y = 3; xstep = -1; ystep = -1;
break;
}
for (i1 = 0; i1<4; i1++)
{
if ((POS(x, y + ystep*i1) < POS(x + xstep, y + ystep*i1)) ||
(POS(x + xstep, y + ystep*i1) < POS(x + 2 * xstep, y + ystep*i1)) ||
(POS(x + 2 * xstep, y + ystep*i1) < POS(x + 3 * xstep, y + ystep*i1))
)
break;
if ((POS(x + xstep*i1, y) < POS(x + xstep*i1, y + ystep)) ||
(POS(x + xstep*i1, y + ystep) < POS(x + xstep*i1, y + 2 * ystep)) ||
(POS(x + xstep*i1, y + 2 * ystep) < POS(x + xstep*i1, y + 3 * ystep))
)
break;
}
if (i1 == 4)
break;
}
}
}
}
// done
}
INLINE void store_uint8(CMP_GLOBAL CGU_UINT8 u_dstptr[8], CGU_UINT32 data[2])
{
int shift = 0;
for (CGU_INT k=0; k<4; k++)
{
u_dstptr[k] = (data[0] >> shift)&0xFF;
shift += 8;
}
shift = 0;
for (CGU_INT k=4; k<8; k++)
{
u_dstptr[k] = (data[1] >> shift)&0xFF;
shift += 8;
}
}
void CompressBlockBC1_Internal(
const CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[2], CMP_GLOBAL CGU_UINT32 compressedBlock[2],
CMP_GLOBAL const CMP_BC15Options *BC15options) CMP_GLOBAL CMP_BC15Options *BC15options)
{ {
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0; CGU_UINT8 srcindex = 0;
CGU_UINT8 rgbBlock[64]; CGU_FLOAT BlockA[16];
CGU_Vec3f rgbBlockUV[16];
for ( CGU_INT32 j = 0; j < 4; j++) { for ( CGU_INT32 j = 0; j < 4; j++) {
for ( CGU_INT32 i = 0; i < 4; i++) { for ( CGU_INT32 i = 0; i < 4; i++) {
rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B rgbBlockUV[srcindex].x = (CGU_FLOAT)(srcBlockTemp[srcindex].x & 0xFF)/ 255.0f; // R
rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G rgbBlockUV[srcindex].y = (CGU_FLOAT)(srcBlockTemp[srcindex].y & 0xFF)/ 255.0f; // G
rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R rgbBlockUV[srcindex].z = (CGU_FLOAT)(srcBlockTemp[srcindex].z & 0xFF)/ 255.0f; // B
rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A
srcindex++; srcindex++;
} }
} }
CMP_BC15Options internalOptions = *BC15options; CMP_BC15Options internalOptions = *BC15options;
CalculateColourWeightings(rgbBlock, &internalOptions); internalOptions = CalculateColourWeightings3f(rgbBlockUV,internalOptions);
CGU_Vec3f channelWeights = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]};
CGU_BOOL isSRGB = internalOptions.m_bIsSRGB; // feature not supported in this section of code until v4.1
CGU_Vec2ui cmpBlock = 0;
//#define CMP_PRINTRESULTS
#ifdef TEST_HEATMAP
#ifdef CMP_PRINTRESULTS
static int q1= 0,q2= 0,same = 0;
static int testnum = 0;
printf("%4d ",testnum);
#endif
{
// Heatmap test: See BCn_Common_Kernel for details
CGU_Vec2ui red = {0xf800f800,0};
CGU_Vec2ui green = {0x07e007e0,0};
CGU_Vec2ui blue = {0x001f001f,0};
CGU_Vec2ui comp1;
CGU_Vec2ui comp2;
float err ;
comp1 = (BC15options->m_fquality < 0.3)?CompressBC1Block_SRGB(rgbBlockUV):CompressBC1Block(rgbBlockUV);
comp2 = CompressBlockBC1_UNORM(rgbBlockUV, BC15options->m_fquality,BC15options->m_fquality < 0.3?true:false);
if ((comp1.x == comp2.x)&&(comp1.y == comp2.y)) err = 0.0f;
else {
float err1 = CMP_RGBBlockError(rgbBlockUV,comp1,(BC15options->m_fquality < 0.3)?true:false);
float err2 = CMP_RGBBlockError(rgbBlockUV,comp2,(BC15options->m_fquality < 0.3)?true:false);
err = err1-err2;
}
if (err > 0.0f)
{
cmpBlock = red;
}
else if (err < 0.0f) {
cmpBlock = green;
}
else {
cmpBlock = blue;
}
}
#ifdef CMP_PRINTRESULTS
printf("Q1 [%4X:%4X] %.3f, ",cmpBlockQ1.x,cmpBlockQ1.y,err1);
printf("Q2 [%4X:%4X] %.3f, ",cmpBlock.x,cmpBlock.y ,err2);
testnum++;
#endif
#else
// printf("q = %f\n",internalOptions.m_fquality);
cmpBlock = CompressBlockBC1_RGBA_Internal(
rgbBlockUV,
BlockA,
channelWeights,
0, //internalOptions.m_nAlphaThreshold, bug to investigate in debug is ok release has issue!
1,
internalOptions.m_fquality,
isSRGB
);
#endif
compressedBlock[0] = cmpBlock.x;
compressedBlock[1] = cmpBlock.y;
CompressRGBBlock(rgbBlock,
compressedBlock,
&internalOptions,
TRUE,
FALSE,
internalOptions.m_nAlphaThreshold);
} }
#endif
//============================================== USER INTERFACES ======================================================== //============================================== CPU USER INTERFACES ========================================================
#ifndef ASPM_GPU #ifndef ASPM_GPU
int CMP_CDECL CreateOptionsBC1(void **options) int CMP_CDECL CreateOptionsBC1(void **options)
{ {
@ -528,15 +248,27 @@ int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8],
BC15options = &BC15optionsDefault; BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options); SetDefaultBC15Options(BC15options);
} }
DecompressDXTRGB_Internal(srcBlock, ( CGU_UINT32 *)cmpBlock, BC15options);
CGU_Vec2ui compBlock;
compBlock.x = (CGU_UINT32)cmpBlock[3] << 24 |
(CGU_UINT32)cmpBlock[2] << 16 |
(CGU_UINT32)cmpBlock[1] << 8 |
(CGU_UINT32)cmpBlock[0];
compBlock.y = (CGU_UINT32)cmpBlock[7] << 24 |
(CGU_UINT32)cmpBlock[6] << 16 |
(CGU_UINT32)cmpBlock[5] << 8 |
(CGU_UINT32)cmpBlock[4];
cmp_decompressDXTRGBA_Internal(srcBlock, compBlock, BC15options->m_mapDecodeRGBA);
return CGU_CORE_OK; return CGU_CORE_OK;
} }
#endif #endif
//============================================== OpenCL USER INTERFACE ======================================================== //============================================== OpenCL USER INTERFACE ========================================================
#ifdef ASPM_GPU #ifdef ASPM_OPENCL
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL const CMP_Vec4uc* ImageSource, CMP_GLOBAL const CMP_Vec4uc* ImageSource,
CMP_GLOBAL CGU_UINT8* ImageDestination, CMP_GLOBAL CGU_UINT8* ImageDestination,
@ -547,14 +279,10 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CGU_UINT32 xID; CGU_UINT32 xID;
CGU_UINT32 yID; CGU_UINT32 yID;
//printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality); //printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality);
#ifdef ASPM_GPU
xID = get_global_id(0); xID = get_global_id(0);
yID = get_global_id(1); yID = get_global_id(1);
#else
xID = 0;
yID = 0;
#endif
if (xID >= (SourceInfo->m_src_width / BlockX)) return; if (xID >= (SourceInfo->m_src_width / BlockX)) return;
if (yID >= (SourceInfo->m_src_height / BlockX)) return; if (yID >= (SourceInfo->m_src_height / BlockX)) return;
@ -572,11 +300,6 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
} }
srcindex += srcWidth; srcindex += srcWidth;
} }
CompressBlockBC1_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
// fast low quality mode that matches v3.1 code
if (SourceInfo->m_fquality <= 0.04f)
CompressBlockBC1_Fast(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI]);
else
CompressBlockBC1_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
} }
#endif #endif

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -26,23 +26,5 @@
#include "Common_Def.h" #include "Common_Def.h"
#include "BCn_Common_Kernel.h" #include "BCn_Common_Kernel.h"
#define CS_RED(r, g, b) (r)
#define CS_GREEN(r, g, b) (g)
#define CS_BLUE(r, g, b) ((b+g)*0.5f)
#define DCS_RED(r, g, b) (r)
#define DCS_GREEN(r, g, b) (g)
#define DCS_BLUE(r, g, b) ((2.0f*b)-g)
#define BYTEPP 4
#define BC1CompBlockSize 8
#define ROUND_AND_CLAMP(v, shift) \
{\
if (v < 0) v = 0;\
else if (v > 255) v = 255;\
else v += (0x80>>shift) - (v>>shift);\
}
#define POS(x,y) (pos_on_axis[(x)+(y)*4])
#endif #endif

View File

@ -0,0 +1,99 @@
//=====================================================================
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// File: BC1_Encode_kernel.hlsl
//--------------------------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//--------------------------------------------------------------------------------------
#ifndef ASPM_HLSL
#define ASPM_HLSL
#endif
cbuffer cbCS : register( b0 )
{
uint g_tex_width;
uint g_num_block_x;
uint g_format;
uint g_mode_id;
uint g_start_block_id;
uint g_num_total_blocks;
float g_alpha_weight;
float g_quality;
};
#include "BCn_Common_Kernel.h"
// Source Data
Texture2D g_Input : register( t0 );
StructuredBuffer<uint4> g_InBuff : register( t1 );
// Compressed Output Data
RWStructuredBuffer<uint2> g_OutBuff : register( u0 );
// Processing multiple blocks at a time
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
#define BLOCK_SIZE_Y 4
#define BLOCK_SIZE_X 4
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
{
// we process 4 BC blocks per thread group
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
uint block_y = blockID / g_num_block_x;
uint block_x = blockID - block_y * g_num_block_x;
uint base_x = block_x * BLOCK_SIZE_X;
uint base_y = block_y * BLOCK_SIZE_Y;
// Load up the pixels
if (pixelInBlock < 16)
{
// load pixels (0..1)
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
}
GroupMemoryBarrierWithGroupSync();
// Process and save s
if (pixelInBlock == 0)
{
float3 block[16];
for (int i = 0; i < 16; i++ )
{
block[i].x = shared_temp[pixelBase + i].x;
block[i].y = shared_temp[pixelBase + i].y;
block[i].z = shared_temp[pixelBase + i].z;
}
g_OutBuff[blockID] = CompressBlockBC1_UNORM(block,g_quality,false);
}
}

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -20,70 +20,50 @@
// THE SOFTWARE. // THE SOFTWARE.
// //
//===================================================================== //=====================================================================
#include "BC2_Encode_kernel.h" #include "BC2_Encode_kernel.h"
//============================================== BC2 INTERFACES ======================================================= //============================================== BC2 INTERFACES =======================================================
void DXTCV11CompressExplicitAlphaBlock(const CGU_UINT8 block_8[16], CMP_GLOBAL CGU_UINT32 block_dxtc[2])
{
CGU_UINT8 i;
block_dxtc[0] = block_dxtc[1] = 0;
for (i = 0; i < 16; i++)
{
int v = block_8[i];
v = (v + 7 - (v >> 4));
v >>= 4;
if (v < 0)
v = 0;
if (v > 0xf)
v = 0xf;
if (i < 8)
block_dxtc[0] |= v << (4 * i);
else
block_dxtc[1] |= v << (4 * (i - 8));
}
}
#define EXPLICIT_ALPHA_PIXEL_MASK 0xf
#define EXPLICIT_ALPHA_PIXEL_BPP 4
CGU_INT CompressExplicitAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4],
CMP_GLOBAL CGU_UINT32 compressedBlock[2])
{
DXTCV11CompressExplicitAlphaBlock(alphaBlock, compressedBlock);
return CGU_CORE_OK;
}
void CompressBlockBC2_Internal(const CMP_Vec4uc srcBlockTemp[16], void CompressBlockBC2_Internal(const CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[4], CMP_GLOBAL CGU_UINT32 compressedBlock[4],
CMP_GLOBAL const CMP_BC15Options *BC15options) CMP_GLOBAL const CMP_BC15Options *BC15options)
{ {
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0; CGU_Vec2ui cmpBlock;
CGU_UINT8 rgbaBlock[64]; CGU_Vec3f rgbBlock[16];
for (CGU_INT32 j = 0; j < 4; j++) { CGU_FLOAT BlockA[16];
for (CGU_INT32 i = 0; i < 4; i++) {
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B for (CGU_INT32 i = 0; i < 16; i++) {
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G rgbBlock[i].x = (CGU_FLOAT)(srcBlockTemp[i].x & 0xFF)/255.0f; // R
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R rgbBlock[i].y = (CGU_FLOAT)(srcBlockTemp[i].y & 0xFF)/255.0f; // G
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A rgbBlock[i].z = (CGU_FLOAT)(srcBlockTemp[i].z & 0xFF)/255.0f; // B
srcindex++; BlockA[i] = (CGU_FLOAT)(srcBlockTemp[i].w & 0xFF)/255.0f; // A
}
} }
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; cmpBlock = cmp_compressExplicitAlphaBlock(BlockA);
for (CGU_INT32 i = 0; i < 16; i++) compressedBlock[DXTC_OFFSET_ALPHA ] = cmpBlock.x;
alphaBlock[i] = (CGU_UINT8)(((CGU_INT32*)rgbaBlock)[i] >> RGBA8888_OFFSET_A); compressedBlock[DXTC_OFFSET_ALPHA+1] = cmpBlock.y;
// Need a copy, as CalculateColourWeightings sets variables in the BC15options // Need a copy, as CalculateColourWeightings sets variables in the BC15options
CMP_BC15Options internalOptions = *BC15options; CMP_BC15Options internalOptions = *BC15options;
CalculateColourWeightings(rgbaBlock, &internalOptions); internalOptions = CalculateColourWeightings3f(rgbBlock, internalOptions);
internalOptions.m_bUseAlpha = false;
CGU_Vec3f channelWeights = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]};
CGU_Vec3f MinColor = {0,0,0}, MaxColor={0,0,0};
CGU_INT err = CompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); cmpBlock = CompressBlockBC1_RGBA_Internal(
if (err != 0) rgbBlock,
return; BlockA,
channelWeights,
0,//internalOptions.m_nAlphaThreshold,
1, //internalOptions.m_nRefinementSteps
internalOptions.m_fquality,
FALSE);
compressedBlock[DXTC_OFFSET_RGB] = cmpBlock.x;
compressedBlock[DXTC_OFFSET_RGB+1] = cmpBlock.y;
CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,FALSE,FALSE,0);
} }
//============================================== USER INTERFACES ======================================================== //============================================== USER INTERFACES ========================================================
@ -141,6 +121,9 @@ int CMP_CDECL SetChannelWeightsBC2(void *options,
return CGU_CORE_OK; return CGU_CORE_OK;
} }
#define EXPLICIT_ALPHA_PIXEL_MASK 0xf
#define EXPLICIT_ALPHA_PIXEL_BPP 4
// Decompresses an explicit alpha block (DXT3) // Decompresses an explicit alpha block (DXT3)
void DecompressExplicitAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], void DecompressExplicitAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4],
const CGU_UINT32 compressedBlock[2]) const CGU_UINT32 compressedBlock[2])
@ -160,7 +143,13 @@ void DecompressBC2_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[BLOCK_SIZE_4X4X4],
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
DecompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); DecompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
CGU_Vec2ui compBlock;
compBlock.x = compressedBlock[DXTC_OFFSET_RGB];
compBlock.y = compressedBlock[DXTC_OFFSET_RGB+1];
cmp_decompressDXTRGBA_Internal(rgbaBlock, compBlock,BC15options->m_mapDecodeRGBA);
for (CGU_UINT32 i = 0; i < 16; i++) for (CGU_UINT32 i = 0; i < 16; i++)
((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] = (alphaBlock[i] << RGBA8888_OFFSET_A) | (((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] & ~(BYTE_MASK << RGBA8888_OFFSET_A)); ((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] = (alphaBlock[i] << RGBA8888_OFFSET_A) | (((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] & ~(BYTE_MASK << RGBA8888_OFFSET_A));
@ -219,7 +208,7 @@ int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16],
#endif #endif
//============================================== OpenCL USER INTERFACE ======================================================== //============================================== OpenCL USER INTERFACE ========================================================
#ifdef ASPM_GPU #ifdef ASPM_OPENCL
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL const CMP_Vec4uc* ImageSource, CMP_GLOBAL const CMP_Vec4uc* ImageSource,
CMP_GLOBAL CGU_UINT8* ImageDestination, CMP_GLOBAL CGU_UINT8* ImageDestination,

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal

View File

@ -0,0 +1,101 @@
//=====================================================================
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// File: BC1Encode.hlsl
//--------------------------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//--------------------------------------------------------------------------------------
#ifndef ASPM_HLSL
#define ASPM_HLSL
#endif
cbuffer cbCS : register( b0 )
{
uint g_tex_width;
uint g_num_block_x;
uint g_format;
uint g_mode_id;
uint g_start_block_id;
uint g_num_total_blocks;
float g_alpha_weight;
float g_quality;
};
#include "BCn_Common_Kernel.h"
// Source Data
Texture2D g_Input : register( t0 );
StructuredBuffer<uint4> g_InBuff : register( t1 );
// Compressed Output Data
RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
// Processing multiple blocks at a time
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
#define BLOCK_SIZE_Y 4
#define BLOCK_SIZE_X 4
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
{
// we process 4 BC blocks per thread group
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
uint block_y = blockID / g_num_block_x;
uint block_x = blockID - block_y * g_num_block_x;
uint base_x = block_x * BLOCK_SIZE_X;
uint base_y = block_y * BLOCK_SIZE_Y;
// Load up the pixels
if (pixelInBlock < 16)
{
// load pixels (0..1)
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
}
GroupMemoryBarrierWithGroupSync();
// Process and save s
if (pixelInBlock == 0)
{
float3 blockRGB[16];
float blockA[16];
for (int i = 0; i < 16; i++ )
{
blockRGB[i].x = shared_temp[pixelBase + i].x;
blockRGB[i].y = shared_temp[pixelBase + i].y;
blockRGB[i].z = shared_temp[pixelBase + i].z;
blockA[i] = shared_temp[pixelBase + i].w;
}
g_OutBuff[blockID] = CompressBlockBC2_UNORM(blockRGB,blockA,g_quality,false);
}
}

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -23,37 +23,50 @@
#include "BC3_Encode_kernel.h" #include "BC3_Encode_kernel.h"
//============================================== BC3 INTERFACES ======================================================= //============================================== BC3 INTERFACES =======================================================
#ifndef ASPM_HLSL
void CompressBlockBC3_Internal(const CMP_Vec4uc srcBlockTemp[16], void CompressBlockBC3_Internal(const CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[4], CMP_GLOBAL CGU_UINT32 compressedBlock[4],
CMP_GLOBAL const CMP_BC15Options *BC15options) { CMP_GLOBAL CMP_BC15Options *BC15options) {
CGU_UINT8 blkindex = 0; CGU_Vec3f rgbBlock[16];
CGU_UINT8 srcindex = 0; CGU_FLOAT alphaBlock[BLOCK_SIZE_4X4];
CGU_UINT8 rgbaBlock[64];
for (CGU_INT32 j = 0; j < 4; j++) { for (CGU_INT32 i = 0; i < 16; i++) {
for (CGU_INT32 i = 0; i < 4; i++) { rgbBlock[i].x = (CGU_FLOAT)(srcBlockTemp[i].x & 0xFF)/255; // R
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B rgbBlock[i].y = (CGU_FLOAT)(srcBlockTemp[i].y & 0xFF)/255; // G
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G rgbBlock[i].z = (CGU_FLOAT)(srcBlockTemp[i].z & 0xFF)/255; // B
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R alphaBlock[i] = (CGU_FLOAT)(srcBlockTemp[i].w) / 255.0f;
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A
srcindex++;
}
} }
CMP_BC15Options internalOptions = *BC15options; CMP_BC15Options internalOptions = *BC15options;
CalculateColourWeightings(rgbaBlock, &internalOptions);
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; CGU_Vec2ui cmpBlock;
for (CGU_INT32 i = 0; i < 16; i++)
alphaBlock[i] =
(CGU_UINT8)(((CGU_INT32 *)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
CGU_INT err = CompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); cmpBlock = cmp_compressAlphaBlock(alphaBlock,internalOptions.m_fquality);
if (err != 0) return; compressedBlock[0] = cmpBlock.x;
compressedBlock[1] = cmpBlock.y;
CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions, for (CGU_INT32 i = 0; i < 16; i++) {
FALSE, FALSE, 0); alphaBlock[i] = (CGU_FLOAT)(srcBlockTemp[i].w);
}
internalOptions = CalculateColourWeightings3f(rgbBlock, internalOptions);
CGU_Vec3f channelWeights = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]};
cmpBlock = CompressBlockBC1_RGBA_Internal(
rgbBlock,
alphaBlock,
channelWeights,
0, // internalOptions.m_nAlphaThreshold,
1, // internalOptions.m_nRefinementSteps
internalOptions.m_fquality,
FALSE);
compressedBlock[2] = cmpBlock.x;
compressedBlock[3] = cmpBlock.y;
} }
#endif
//============================================== USER INTERFACES ======================================================== //============================================== USER INTERFACES ========================================================
#ifndef ASPM_GPU #ifndef ASPM_GPU
@ -117,8 +130,12 @@ void DecompressBC3_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
const CMP_BC15Options *BC15options) { const CMP_BC15Options *BC15options) {
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
DecompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); cmp_decompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
CGU_Vec2ui compBlock;
compBlock.x = compressedBlock[DXTC_OFFSET_RGB];
compBlock.y = compressedBlock[DXTC_OFFSET_RGB+1];
cmp_decompressDXTRGBA_Internal(rgbaBlock, compBlock,BC15options->m_mapDecodeRGBA);
for (CGU_UINT32 i = 0; i < 16; i++) for (CGU_UINT32 i = 0; i < 16; i++)
((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] = ((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] =
@ -178,7 +195,7 @@ int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16],
#endif #endif
//============================================== OpenCL USER INTERFACE ==================================================== //============================================== OpenCL USER INTERFACE ====================================================
#ifdef ASPM_GPU #ifdef ASPM_OPENCL
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL const CMP_Vec4uc *ImageSource, CMP_GLOBAL const CMP_Vec4uc *ImageSource,
CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo, CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal

View File

@ -0,0 +1,101 @@
//=====================================================================
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// File: BC1Encode.hlsl
//--------------------------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//--------------------------------------------------------------------------------------
#ifndef ASPM_HLSL
#define ASPM_HLSL
#endif
cbuffer cbCS : register( b0 )
{
uint g_tex_width;
uint g_num_block_x;
uint g_format;
uint g_mode_id;
uint g_start_block_id;
uint g_num_total_blocks;
float g_alpha_weight;
float g_quality;
};
#include "BCn_Common_Kernel.h"
// Source Data
Texture2D g_Input : register( t0 );
StructuredBuffer<uint4> g_InBuff : register( t1 );
// Compressed Output Data
RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
// Processing multiple blocks at a time
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
#define BLOCK_SIZE_Y 4
#define BLOCK_SIZE_X 4
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
{
// we process 4 BC blocks per thread group
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
uint block_y = blockID / g_num_block_x;
uint block_x = blockID - block_y * g_num_block_x;
uint base_x = block_x * BLOCK_SIZE_X;
uint base_y = block_y * BLOCK_SIZE_Y;
// Load up the pixels
if (pixelInBlock < 16)
{
// load pixels (0..1)
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
}
GroupMemoryBarrierWithGroupSync();
// Process and save s
if (pixelInBlock == 0)
{
float3 blockRGB[16];
float blockA[16];
for (int i = 0; i < 16; i++ )
{
blockRGB[i].x = shared_temp[pixelBase + i].x;
blockRGB[i].y = shared_temp[pixelBase + i].y;
blockRGB[i].z = shared_temp[pixelBase + i].z;
blockA[i] = shared_temp[pixelBase + i].w;
}
g_OutBuff[blockID] = CompressBlockBC3_UNORM(blockRGB,blockA, g_quality,false);
}
}

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -32,15 +32,19 @@ void CompressBlockBC4_Internal(const CMP_Vec4uc srcBlockTemp[16],
} }
CGU_UINT8 blkindex = 0; CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0; CGU_UINT8 srcindex = 0;
CGU_UINT8 alphaBlock[16]; CGU_FLOAT alphaBlock[16];
for (CGU_INT32 j = 0; j < 4; j++) { for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) { for (CGU_INT32 i = 0; i < 4; i++) {
alphaBlock[blkindex++] = alphaBlock[blkindex++] = srcBlockTemp[srcindex].x / 255.0f; // Red channel
(CGU_UINT8)srcBlockTemp[srcindex].x; // Red channel
srcindex++; srcindex++;
} }
} }
CompressAlphaBlock(alphaBlock, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
CGU_Vec2ui cmpBlock;
cmpBlock = cmp_compressAlphaBlock(alphaBlock,BC15options->m_fquality);
compressedBlock[0] = cmpBlock.x;
compressedBlock[1] = cmpBlock.y;
} }
void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
@ -48,7 +52,7 @@ void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
const CMP_BC15Options *BC15options) { const CMP_BC15Options *BC15options) {
if (BC15options) {} if (BC15options) {}
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
DecompressAlphaBlock(alphaBlock, compressedBlock); cmp_decompressAlphaBlock(alphaBlock, compressedBlock);
CGU_UINT8 blkindex = 0; CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0; CGU_UINT8 srcindex = 0;
@ -63,18 +67,27 @@ void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
} }
} }
void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[16], void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[BLOCK_SIZE_4X4],
CMP_GLOBAL CGU_UINT32 compressedBlock[2], CMP_GLOBAL CGU_UINT32 compressedBlock[2],
CMP_GLOBAL const CMP_BC15Options *BC15options) { CMP_GLOBAL const CMP_BC15Options *BC15options) {
if (BC15options) {} if (BC15options) {}
CompressAlphaBlock(srcBlockTemp, (CMP_GLOBAL CGU_UINT32 *)compressedBlock); CGU_FLOAT alphaBlock[BLOCK_SIZE_4X4];
for (CGU_INT32 i = 0; i < BLOCK_SIZE_4X4; i++) alphaBlock[i] = (srcBlockTemp[i] / 255.0f);
CGU_Vec2ui cmpBlock;
cmpBlock = cmp_compressAlphaBlock(alphaBlock,BC15options->m_fquality);
compressedBlock[0] = cmpBlock.x;
compressedBlock[1] = cmpBlock.y;
} }
void DecompressBlockBC4_SingleChannel(CGU_UINT8 srcBlockTemp[16], void DecompressBlockBC4_SingleChannel(CGU_UINT8 srcBlockTemp[16],
const CGU_UINT32 compressedBlock[2], const CGU_UINT32 compressedBlock[2],
const CMP_BC15Options *BC15options) { const CMP_BC15Options *BC15options) {
if (BC15options) {} if (BC15options) {}
DecompressAlphaBlock(srcBlockTemp, compressedBlock); cmp_decompressAlphaBlock(srcBlockTemp, compressedBlock);
} }
//============================================== USER INTERFACES ======================================================== //============================================== USER INTERFACES ========================================================
@ -161,7 +174,7 @@ int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8],
#endif #endif
//============================================== OpenCL USER INTERFACE ==================================================== //============================================== OpenCL USER INTERFACE ====================================================
#ifdef ASPM_GPU #ifdef ASPM_OPENCL
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL const CMP_Vec4uc *ImageSource, CMP_GLOBAL const CMP_Vec4uc *ImageSource,
CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo, CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal

View File

@ -0,0 +1,97 @@
//=====================================================================
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// File: BC4Encode.hlsl
//--------------------------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//--------------------------------------------------------------------------------------
#ifndef ASPM_HLSL
#define ASPM_HLSL
#endif
cbuffer cbCS : register( b0 )
{
uint g_tex_width;
uint g_num_block_x;
uint g_format;
uint g_mode_id;
uint g_start_block_id;
uint g_num_total_blocks;
float g_alpha_weight;
float g_quality;
};
#include "BCn_Common_Kernel.h"
// Source Data
Texture2D g_Input : register( t0 );
StructuredBuffer<uint4> g_InBuff : register( t1 );
// Compressed Output Data
RWStructuredBuffer<uint2> g_OutBuff : register( u0 );
// Processing multiple blocks at a time
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
#define BLOCK_SIZE_Y 4
#define BLOCK_SIZE_X 4
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
{
// we process 4 BC blocks per thread group
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
uint block_y = blockID / g_num_block_x;
uint block_x = blockID - block_y * g_num_block_x;
uint base_x = block_x * BLOCK_SIZE_X;
uint base_y = block_y * BLOCK_SIZE_Y;
// Load up the pixels
if (pixelInBlock < 16)
{
// load pixels (0..1)
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
}
GroupMemoryBarrierWithGroupSync();
// Process and save s
if (pixelInBlock == 0)
{
float block[16];
// covert back to UV for processing!!
for ( uint i = 0; i < 16; i ++ )
{
block[i].x = shared_temp[pixelBase + i].x;
}
g_OutBuff[blockID] = CompressBlockBC4_UNORM(block, g_quality);
}
}

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -24,36 +24,45 @@
//============================================== BC5 INTERFACES ======================================================= //============================================== BC5 INTERFACES =======================================================
void CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16], CGU_Vec4ui CompressBC5Block_Internal(CMP_IN CGU_FLOAT aBlockU[16], CMP_IN CGU_FLOAT aBlockV[16], CMP_IN CGU_FLOAT fquality)
{
CGU_Vec4ui compBlock;
CGU_Vec2ui cmpBlock;
cmpBlock = cmp_compressAlphaBlock(aBlockU,fquality);
compBlock.x = cmpBlock.x;
compBlock.y = cmpBlock.y;
cmpBlock = cmp_compressAlphaBlock(aBlockV,fquality);
compBlock.z = cmpBlock.x;
compBlock.w = cmpBlock.y;
return compBlock;
}
#ifndef ASPM_HLSL
void CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16], // range 0 to 255
CMP_GLOBAL CGU_UINT32 compressedBlock[4], CMP_GLOBAL CGU_UINT32 compressedBlock[4],
CMP_GLOBAL CMP_BC15Options *BC15options) CMP_GLOBAL CMP_BC15Options *BC15options)
{ {
if (BC15options->m_fquality) { CGU_Vec4ui cmpBlock;
// Resreved CGU_FLOAT alphaBlockU[16];
} CGU_FLOAT alphaBlockV[16];
CGU_UINT8 blkindex = 0; CGU_UINT32 i;
CGU_UINT8 srcindex = 0;
CGU_UINT8 alphaBlock[16];
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // Red channel
srcindex++;
}
}
CompressAlphaBlock(alphaBlock,&compressedBlock[0]);
blkindex = 0; for (i = 0; i < 16; i++) {
srcindex = 0; alphaBlockU[i] = srcBlockTemp[i].x / 255.0f;
for (CGU_INT32 j = 0; j < 4; j++) { alphaBlockV[i] = srcBlockTemp[i].y / 255.0f;
for (CGU_INT32 i = 0; i < 4; i++) {
alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // Green channel
srcindex++;
}
} }
CompressAlphaBlock(alphaBlock,&compressedBlock[2]);
cmpBlock = CompressBC5Block_Internal(alphaBlockU, alphaBlockV,BC15options->m_fquality);
compressedBlock[0] = cmpBlock.x;
compressedBlock[1] = cmpBlock.y;
compressedBlock[2] = cmpBlock.z;
compressedBlock[3] = cmpBlock.w;
} }
#endif
#ifndef ASPM_GPU
void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
CGU_UINT32 compressedBlock[4], CGU_UINT32 compressedBlock[4],
CMP_BC15Options *BC15options) CMP_BC15Options *BC15options)
@ -61,8 +70,8 @@ void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4]; CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4];
CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4]; CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4];
DecompressAlphaBlock(alphaBlockR, &compressedBlock[0]); cmp_decompressAlphaBlock(alphaBlockR, &compressedBlock[0]);
DecompressAlphaBlock(alphaBlockG, &compressedBlock[2]); cmp_decompressAlphaBlock(alphaBlockG, &compressedBlock[2]);
CGU_UINT8 blkindex = 0; CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0; CGU_UINT8 srcindex = 0;
@ -94,15 +103,29 @@ void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
} }
void CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[16], void CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[16],
const CGU_UINT8 srcBlockG[16], const CGU_UINT8 srcBlockG[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[4], CMP_GLOBAL CGU_UINT32 compressedBlock[4],
CMP_GLOBAL const CMP_BC15Options *BC15options) CMP_GLOBAL const CMP_BC15Options *BC15options)
{ {
if (BC15options) {} if (BC15options) {}
CompressAlphaBlock(srcBlockR,&compressedBlock[0]); CGU_Vec2ui cmpBlock;
CompressAlphaBlock(srcBlockG,&compressedBlock[2]); CGU_FLOAT srcAlphaRF[16];
CGU_FLOAT srcAlphaGF[16];
for (CGU_INT i =0; i< 16; i++)
{
srcAlphaRF[i] = srcBlockR[i];
srcAlphaGF[i] = srcBlockG[i];
}
cmpBlock = cmp_compressAlphaBlock(srcAlphaRF,BC15options->m_fquality);
compressedBlock[0] = cmpBlock.x;
compressedBlock[1] = cmpBlock.y;
cmpBlock = cmp_compressAlphaBlock(srcAlphaGF,BC15options->m_fquality);
compressedBlock[2] = cmpBlock.x;
compressedBlock[3] = cmpBlock.y;
} }
void DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16], void DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16],
@ -111,10 +134,10 @@ void DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16],
const CMP_BC15Options *BC15options) const CMP_BC15Options *BC15options)
{ {
if (BC15options) {} if (BC15options) {}
DecompressAlphaBlock(srcBlockR, &compressedBlock[0]); cmp_decompressAlphaBlock(srcBlockR, &compressedBlock[0]);
DecompressAlphaBlock(srcBlockG, &compressedBlock[2]); cmp_decompressAlphaBlock(srcBlockG, &compressedBlock[2]);
} }
#endif
//============================================== USER INTERFACES ======================================================== //============================================== USER INTERFACES ========================================================
#ifndef ASPM_GPU #ifndef ASPM_GPU
@ -224,7 +247,7 @@ int CMP_CDECL DecompressBlockBC5(const CGU_UINT8 cmpBlock[16],
#endif #endif
//============================================== OpenCL USER INTERFACE ==================================================== //============================================== OpenCL USER INTERFACE ====================================================
#ifdef ASPM_GPU #ifdef ASPM_OPENCL
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL const CMP_Vec4uc* ImageSource, CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL const CMP_Vec4uc* ImageSource,
CMP_GLOBAL CGU_UINT8* ImageDestination, CMP_GLOBAL CGU_UINT8* ImageDestination,
CMP_GLOBAL Source_Info* SourceInfo, CMP_GLOBAL Source_Info* SourceInfo,

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal

View File

@ -0,0 +1,98 @@
//=====================================================================
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// File: BC1Encode.hlsl
//--------------------------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//--------------------------------------------------------------------------------------
#ifndef ASPM_HLSL
#define ASPM_HLSL
#endif
cbuffer cbCS : register( b0 )
{
uint g_tex_width;
uint g_num_block_x;
uint g_format;
uint g_mode_id;
uint g_start_block_id;
uint g_num_total_blocks;
float g_alpha_weight;
float g_quality;
};
#include "BCn_Common_Kernel.h"
// Source Data
Texture2D g_Input : register( t0 );
StructuredBuffer<uint4> g_InBuff : register( t1 );
// Compressed Output Data
RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
// Processing multiple blocks at a time
#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block
#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4
#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
#define BLOCK_SIZE_Y 4
#define BLOCK_SIZE_X 4
groupshared float4 shared_temp[THREAD_GROUP_SIZE];
[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
{
// we process 4 BC blocks per thread group
uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block
uint block_y = blockID / g_num_block_x;
uint block_x = blockID - block_y * g_num_block_x;
uint base_x = block_x * BLOCK_SIZE_X;
uint base_y = block_y * BLOCK_SIZE_Y;
// Load up the pixels
if (pixelInBlock < 16)
{
// load pixels (0..1)
shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
}
GroupMemoryBarrierWithGroupSync();
// Process and save s
if (pixelInBlock == 0)
{
float blockU[16];
float blockV[16];
for ( uint i = 0; i < 16; i ++ )
{
blockU[i] = shared_temp[pixelBase + i].x;
blockV[i] = shared_temp[pixelBase + i].y;
}
g_OutBuff[blockID] = CompressBlockBC5_UNORM(blockU,blockV,g_quality);
}
}

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -29,11 +29,11 @@ void memset(CGU_UINT8 *srcdata, CGU_UINT8 value, CGU_INT size)
*srcdata++ = value; *srcdata++ = value;
} }
void memcpy(CGU_UINT8 *srcdata, CGU_UINT8 *dstdata, CGU_INT size) void memcpy(CGU_UINT8 *dstdata, CGU_UINT8 *srcdata, CGU_INT size)
{ {
for (CGU_INT i = 0; i < size; i++) for (CGU_INT i = 0; i < size; i++)
{ {
*srcdata = *dstdata; *dstdata = *srcdata;
srcdata++; srcdata++;
dstdata++; dstdata++;
} }
@ -509,7 +509,7 @@ CGU_FLOAT totalError_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_FLOAT
// index, uncentered, in the range 0..k-1 // index, uncentered, in the range 0..k-1
// //
void quant_AnD_Shell(CGU_FLOAT* v_, CGU_INT k, CGU_INT n, CGU_INT *idx) void quant_AnD_Shell(CGU_FLOAT* v_, CGU_INT k, CGU_INT n, CGU_INT idx[MAX_ENTRIES])
{ {
#define MAX_BLOCK MAX_ENTRIES #define MAX_BLOCK MAX_ENTRIES
CGU_INT i, j; CGU_INT i, j;
@ -2530,7 +2530,7 @@ CGU_INT Unquantize(CGU_INT comp, unsigned char uBitsPerComp, CGU_BOOL bSigned)
return unq; return unq;
} }
CGU_INT finish_unquantizeF16(CGU_INT q, CGU_BOOL isSigned) CGU_INT finish_unquantizef16(CGU_INT q, CGU_BOOL isSigned)
{ {
// Is it F16 Signed else F16 Unsigned // Is it F16 Signed else F16 Unsigned
if (isSigned) if (isSigned)
@ -2565,8 +2565,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
// F16 format // F16 format
outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
} }
} }
else else
@ -2581,8 +2581,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
// F16 format // F16 format
outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
} }
} }
@ -2602,8 +2602,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
// F16 format // F16 format
outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
} }
} }
else else
@ -2618,8 +2618,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
// F16 format // F16 format
outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
} }
} }
} }
@ -2659,10 +2659,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, true); out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, true);
// F16 format // F16 format
outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], true); outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], true);
outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], true); outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], true);
outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], true); outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], true);
outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], true); outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], true);
} }
} }
@ -2682,10 +2682,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);
// nbits to F16 format // nbits to F16 format
outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);
} }
} }
@ -2713,10 +2713,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);
// nbits to F16 format // nbits to F16 format
outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);
} }
} }
@ -2736,10 +2736,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);
// nbits to F16 format // nbits to F16 format
outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);
} }
} }
} }
@ -3906,7 +3906,7 @@ int CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16],
#endif // !ASPM_GPU #endif // !ASPM_GPU
//============================================== OpenCL USER INTERFACE ==================================================== //============================================== OpenCL USER INTERFACE ====================================================
#ifdef ASPM_GPU #ifdef ASPM_OPENCL
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL CGU_UINT8* p_source_pixels, CMP_GLOBAL CGU_UINT8* p_source_pixels,
CMP_GLOBAL CGU_UINT8* p_encoded_blocks, CMP_GLOBAL CGU_UINT8* p_encoded_blocks,

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -23,6 +23,8 @@
#ifndef BC6_ENCODE_KERNEL_H #ifndef BC6_ENCODE_KERNEL_H
#define BC6_ENCODE_KERNEL_H #define BC6_ENCODE_KERNEL_H
#pragma warning(disable:4505) // disable warnings on unreferenced local function has been removed
#include "Common_Def.h" #include "Common_Def.h"
#define MAX_TRACE 10 #define MAX_TRACE 10
@ -127,25 +129,25 @@ typedef struct
__constant ModePartitions ModePartition[MAX_BC6H_MODES + 1] = __constant ModePartitions ModePartition[MAX_BC6H_MODES + 1] =
{ {
0, 0,0,0, 0, 0, 0, 0, 0, // Mode = Invaild {0, {0,0,0}, 0, 0, 0, 0, 0}, // Mode = Invaild
// Two region Partition // Two region Partition
10, 5,5,5, 1, 2, 3, 0x00, 31, // Mode = 1 { 10, {5,5,5}, 1, 2, 3, 0x00, 31 }, // Mode = 1
7, 6,6,6, 1, 2, 3, 0x01, 248, // Mode = 2 { 7, {6,6,6}, 1, 2, 3, 0x01, 248}, // Mode = 2
11, 5,4,4, 1, 5, 3, 0x02, 15, // Mode = 3 { 11, {5,4,4}, 1, 5, 3, 0x02, 15 }, // Mode = 3
11, 4,5,4, 1, 5, 3, 0x06, 15, // Mode = 4 { 11, {4,5,4}, 1, 5, 3, 0x06, 15 }, // Mode = 4
11, 4,4,5, 1, 5, 3, 0x0a, 15, // Mode = 5 { 11, {4,4,5}, 1, 5, 3, 0x0a, 15 }, // Mode = 5
9, 5,5,5, 1, 5, 3, 0x0e, 62, // Mode = 6 { 9, {5,5,5}, 1, 5, 3, 0x0e, 62 }, // Mode = 6
8, 6,5,5, 1, 5, 3, 0x12, 124, // Mode = 7 { 8, {6,5,5}, 1, 5, 3, 0x12, 124}, // Mode = 7
8, 5,6,5, 1, 5, 3, 0x16, 124, // Mode = 8 { 8, {5,6,5}, 1, 5, 3, 0x16, 124}, // Mode = 8
8, 5,5,6, 1, 5, 3, 0x1a, 124, // Mode = 9 { 8, {5,5,6}, 1, 5, 3, 0x1a, 124}, // Mode = 9
6, 6,6,6, 0, 5, 3, 0x1e, 496, // Mode = 10 { 6, {6,6,6}, 0, 5, 3, 0x1e, 496}, // Mode = 10
// One region Partition // One region Partition
10, 10,10,10, 0, 5, 4, 0x03, 31, // Mode = 11 {10, {10,10,10}, 0, 5, 4, 0x03, 31}, // Mode = 11
11, 9,9,9, 1, 5, 4, 0x07, 15, // Mode = 12 {11, {9,9,9 }, 1, 5, 4, 0x07, 15}, // Mode = 12
12, 8,8,8, 1, 5, 4, 0x0b, 7, // Mode = 13 {12, {8,8,8 }, 1, 5, 4, 0x0b, 7 }, // Mode = 13
16, 4,4,4, 1, 5, 4, 0x0f, 1, // Mode = 14 {16, {4,4,4 }, 1, 5, 4, 0x0f, 1 } // Mode = 14
}; };
//================================================ //================================================

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
//===================================================================== //=====================================================================
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights // in the Software without restriction, including without limitation the rights
@ -106,6 +106,7 @@ CGU_INT expandbits(CGU_INT bits, CGU_INT v)
} }
CMP_EXPORT CGU_INT bc7_isa() { CMP_EXPORT CGU_INT bc7_isa() {
#ifndef ASPM_GPU
#if defined(ISPC_TARGET_SSE2) #if defined(ISPC_TARGET_SSE2)
ASPM_PRINT(("SSE2")); ASPM_PRINT(("SSE2"));
return 0; return 0;
@ -120,8 +121,9 @@ CMP_EXPORT CGU_INT bc7_isa() {
return 3; return 3;
#else #else
ASPM_PRINT(("CPU")); ASPM_PRINT(("CPU"));
return -1;
#endif #endif
#endif
return -1;
} }
CMP_EXPORT void init_BC7ramps() CMP_EXPORT void init_BC7ramps()
@ -528,139 +530,6 @@ void GetProjecedImage(
INLINE CGV_UINT8 get_partition_subset(CGV_INT part_id, CGU_INT maxSubsets, CGV_INT index) INLINE CGV_UINT8 get_partition_subset(CGV_INT part_id, CGU_INT maxSubsets, CGV_INT index)
{ {
CMP_STATIC uniform CMP_CONSTANT CGU_UINT32 subset_mask_table[] = {
// 2 subset region patterns
0x0000CCCCu, // 0 1100 1100 1100 1100 (MSB..LSB)
0x00008888u, // 1 1000 1000 1000 1000
0x0000EEEEu, // 2 1110 1110 1110 1110
0x0000ECC8u, // 3 1110 1100 1100 1000
0x0000C880u, // 4 1100 1000 1000 0000
0x0000FEECu, // 5 1111 1110 1110 1100
0x0000FEC8u, // 6 1111 1110 1100 1000
0x0000EC80u, // 7 1110 1100 1000 0000
0x0000C800u, // 8 1100 1000 0000 0000
0x0000FFECu, // 9 1111 1111 1110 1100
0x0000FE80u, // 10 1111 1110 1000 0000
0x0000E800u, // 11 1110 1000 0000 0000
0x0000FFE8u, // 12 1111 1111 1110 1000
0x0000FF00u, // 13 1111 1111 0000 0000
0x0000FFF0u, // 14 1111 1111 1111 0000
0x0000F000u, // 15 1111 0000 0000 0000
0x0000F710u, // 16 1111 0111 0001 0000
0x0000008Eu, // 17 0000 0000 1000 1110
0x00007100u, // 18 0111 0001 0000 0000
0x000008CEu, // 19 0000 1000 1100 1110
0x0000008Cu, // 20 0000 0000 1000 1100
0x00007310u, // 21 0111 0011 0001 0000
0x00003100u, // 22 0011 0001 0000 0000
0x00008CCEu, // 23 1000 1100 1100 1110
0x0000088Cu, // 24 0000 1000 1000 1100
0x00003110u, // 25 0011 0001 0001 0000
0x00006666u, // 26 0110 0110 0110 0110
0x0000366Cu, // 27 0011 0110 0110 1100
0x000017E8u, // 28 0001 0111 1110 1000
0x00000FF0u, // 29 0000 1111 1111 0000
0x0000718Eu, // 30 0111 0001 1000 1110
0x0000399Cu, // 31 0011 1001 1001 1100
0x0000AAAAu, // 32 1010 1010 1010 1010
0x0000F0F0u, // 33 1111 0000 1111 0000
0x00005A5Au, // 34 0101 1010 0101 1010
0x000033CCu, // 35 0011 0011 1100 1100
0x00003C3Cu, // 36 0011 1100 0011 1100
0x000055AAu, // 37 0101 0101 1010 1010
0x00009696u, // 38 1001 0110 1001 0110
0x0000A55Au, // 39 1010 0101 0101 1010
0x000073CEu, // 40 0111 0011 1100 1110
0x000013C8u, // 41 0001 0011 1100 1000
0x0000324Cu, // 42 0011 0010 0100 1100
0x00003BDCu, // 43 0011 1011 1101 1100
0x00006996u, // 44 0110 1001 1001 0110
0x0000C33Cu, // 45 1100 0011 0011 1100
0x00009966u, // 46 1001 1001 0110 0110
0x00000660u, // 47 0000 0110 0110 0000
0x00000272u, // 48 0000 0010 0111 0010
0x000004E4u, // 49 0000 0100 1110 0100
0x00004E40u, // 50 0100 1110 0100 0000
0x00002720u, // 51 0010 0111 0010 0000
0x0000C936u, // 52 1100 1001 0011 0110
0x0000936Cu, // 53 1001 0011 0110 1100
0x000039C6u, // 54 0011 1001 1100 0110
0x0000639Cu, // 55 0110 0011 1001 1100
0x00009336u, // 56 1001 0011 0011 0110
0x00009CC6u, // 57 1001 1100 1100 0110
0x0000817Eu, // 58 1000 0001 0111 1110
0x0000E718u, // 59 1110 0111 0001 1000
0x0000CCF0u, // 60 1100 1100 1111 0000
0x00000FCCu, // 61 0000 1111 1100 1100
0x00007744u, // 62 0111 0111 0100 0100
0x0000EE22u, // 63 1110 1110 0010 0010
// 3 Subset region patterns
0xF60008CCu,// 0 1111 0110 0000 0000 : 0000 1000 1100 1100 = 2222122011001100 (MSB...LSB)
0x73008CC8u,// 1 0111 0011 0000 0000 : 1000 1100 1100 1000 = 1222112211001000
0x3310CC80u,// 2 0011 0011 0001 0000 : 1100 1100 1000 0000 = 1122112210020000
0x00CEEC00u,// 3 0000 0000 1100 1110 : 1110 1100 0000 0000 = 1110110022002220
0xCC003300u,// 4 1100 1100 0000 0000 : 0011 0011 0000 0000 = 2211221100000000
0xCC0000CCu,// 5 1100 1100 0000 0000 : 0000 0000 1100 1100 = 2200220011001100
0x00CCFF00u,// 6 0000 0000 1100 1100 : 1111 1111 0000 0000 = 1111111122002200
0x3300CCCCu,// 7 0011 0011 0000 0000 : 1100 1100 1100 1100 = 1122112211001100
0xF0000F00u,// 8 1111 0000 0000 0000 : 0000 1111 0000 0000 = 2222111100000000
0xF0000FF0u,// 9 1111 0000 0000 0000 : 0000 1111 1111 0000 = 2222111111110000
0xFF0000F0u,// 10 1111 1111 0000 0000 : 0000 0000 1111 0000 = 2222222211110000
0x88884444u,// 11 1000 1000 1000 1000 : 0100 0100 0100 0100 = 2100210021002100
0x88886666u,// 12 1000 1000 1000 1000 : 0110 0110 0110 0110 = 2110211021102110
0xCCCC2222u,// 13 1100 1100 1100 1100 : 0010 0010 0010 0010 = 2210221022102210
0xEC80136Cu,// 14 1110 1100 1000 0000 : 0001 0011 0110 1100 = 2221221121101100
0x7310008Cu,// 15 0111 0011 0001 0000 : 0000 0000 1000 1100 = 0222002210021100
0xC80036C8u,// 16 1100 1000 0000 0000 : 0011 0110 1100 1000 = 2211211011001000
0x310008CEu,// 17 0011 0001 0000 0000 : 0000 1000 1100 1110 = 0022100211001110
0xCCC03330u,// 18 1100 1100 1100 0000 : 0011 0011 0011 0000 = 2211221122110000
0x0CCCF000u,// 19 0000 1100 1100 1100 : 1111 0000 0000 0000 = 1111220022002200
0xEE0000EEu,// 20 1110 1110 0000 0000 : 0000 0000 1110 1110 = 2220222011101110
0x77008888u,// 21 0111 0111 0000 0000 : 1000 1000 1000 1000 = 1222122210001000
0xCC0022C0u,// 22 1100 1100 0000 0000 : 0010 0010 1100 0000 = 2210221011000000
0x33004430u,// 23 0011 0011 0000 0000 : 0100 0100 0011 0000 = 0122012200110000
0x00CC0C22u,// 24 0000 0000 1100 1100 : 0000 1100 0010 0010 = 0000110022102210
0xFC880344u,// 25 1111 1100 1000 1000 : 0000 0011 0100 0100 = 2222221121002100
0x06606996u,// 26 0000 0110 0110 0000 : 0110 1001 1001 0110 = 0110122112210110
0x66009960u,// 27 0110 0110 0000 0000 : 1001 1001 0110 0000 = 1221122101100000
0xC88C0330u,// 28 1100 1000 1000 1100 : 0000 0011 0011 0000 = 2200201120112200
0xF9000066u,// 29 1111 1001 0000 0000 : 0000 0000 0110 0110 = 2222200201100110
0x0CC0C22Cu,// 30 0000 1100 1100 0000 : 1100 0010 0010 1100 = 1100221022101100
0x73108C00u,// 31 0111 0011 0001 0000 : 1000 1100 0000 0000 = 1222112200020000
0xEC801300u,// 32 1110 1100 1000 0000 : 0001 0011 0000 0000 = 2221221120000000
0x08CEC400u,// 33 0000 1000 1100 1110 : 1100 0100 0000 0000 = 1100210022002220
0xEC80004Cu,// 34 1110 1100 1000 0000 : 0000 0000 0100 1100 = 2220220021001100
0x44442222u,// 35 0100 0100 0100 0100 : 0010 0010 0010 0010 = 0210021002100210
0x0F0000F0u,// 36 0000 1111 0000 0000 : 0000 0000 1111 0000 = 0000222211110000
0x49242492u,// 37 0100 1001 0010 0100 : 0010 0100 1001 0010 = 0210210210210210
0x42942942u,// 38 0100 0010 1001 0100 : 0010 1001 0100 0010 = 0210102121020210
0x0C30C30Cu,// 39 0000 1100 0011 0000 : 1100 0011 0000 1100 = 1100221100221100
0x03C0C03Cu,// 40 0000 0011 1100 0000 : 1100 0000 0011 1100 = 1100002222111100
0xFF0000AAu,// 41 1111 1111 0000 0000 : 0000 0000 1010 1010 = 2222222210101010
0x5500AA00u,// 42 0101 0101 0000 0000 : 1010 1010 0000 0000 = 1212121200000000
0xCCCC3030u,// 43 1100 1100 1100 1100 : 0011 0000 0011 0000 = 2211220022112200
0x0C0CC0C0u,// 44 0000 1100 0000 1100 : 1100 0000 1100 0000 = 1100220011002200
0x66669090u,// 45 0110 0110 0110 0110 : 1001 0000 1001 0000 = 1221022012210220
0x0FF0A00Au,// 46 0000 1111 1111 0000 : 1010 0000 0000 1010 = 1010222222221010
0x5550AAA0u,// 47 0101 0101 0101 0000 : 1010 1010 1010 0000 = 1212121212120000
0xF0000AAAu,// 48 1111 0000 0000 0000 : 0000 1010 1010 1010 = 2222101010101010
0x0E0EE0E0u,// 49 0000 1110 0000 1110 : 1110 0000 1110 0000 = 1110222011102220
0x88887070u,// 50 1000 1000 1000 1000 : 0111 0000 0111 0000 = 2111200021112000
0x99906660u,// 51 1001 1001 1001 0000 : 0110 0110 0110 0000 = 2112211221120000
0xE00E0EE0u,// 52 1110 0000 0000 1110 : 0000 1110 1110 0000 = 2220111011102220
0x88880770u,// 53 1000 1000 1000 1000 : 0000 0111 0111 0000 = 2000211121112000
0xF0000666u,// 54 1111 0000 0000 0000 : 0000 0110 0110 0110 = 2222011001100110
0x99006600u,// 55 1001 1001 0000 0000 : 0110 0110 0000 0000 = 2112211200000000
0xFF000066u,// 56 1111 1111 0000 0000 : 0000 0000 0110 0110 = 2222222201100110
0xC00C0CC0u,// 57 1100 0000 0000 1100 : 0000 1100 1100 0000 = 2200110011002200
0xCCCC0330u,// 58 1100 1100 1100 1100 : 0000 0011 0011 0000 = 2200221122112200
0x90006000u,// 59 1001 0000 0000 0000 : 0110 0000 0000 0000 = 2112000000000000
0x08088080u,// 60 0000 1000 0000 1000 : 1000 0000 1000 0000 = 1000200010002000
0xEEEE1010u,// 61 1110 1110 1110 1110 : 0001 0000 0001 0000 = 2221222022212220
0xFFF0000Au,// 62 1111 1111 1111 0000 : 0000 0000 0000 1010 = 2222222222221010
0x731008CEu,// 63 0111 0011 0001 0000 : 0000 1000 1100 1110 = 0222102211021110
};
if (maxSubsets == 2) if (maxSubsets == 2)
{ {
CGV_UINT32 mask_packed = subset_mask_table[part_id]; CGV_UINT32 mask_packed = subset_mask_table[part_id];
@ -1029,14 +898,6 @@ INLINE CGV_EPOCODE ep_find_floor(
{ {
#ifdef ASPM_GPU // GPU Code #ifdef ASPM_GPU // GPU Code
CGV_FLOAT rampf = 0.0F; CGV_FLOAT rampf = 0.0F;
CMP_CONSTANT CGV_EPOCODE rampI[5*SOURCE_BLOCK_SIZE] = {
0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 0 bit index
0 ,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 1 bit index
0 ,21,43,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 2 bit index
0 ,9 ,18,27,37,46,55,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 3 bit index
0 ,4 ,9 ,13,17,21,26,30,34,38,43,47,51,55,60,64 // 4 bit index
};
CGV_EPOCODE e1 = expand_epocode(p1, bits); CGV_EPOCODE e1 = expand_epocode(p1, bits);
CGV_EPOCODE e2 = expand_epocode(p2,bits); CGV_EPOCODE e2 = expand_epocode(p2,bits);
CGV_FLOAT ramp = gather_epocode(rampI,clogBC7*16+index)/64.0F; CGV_FLOAT ramp = gather_epocode(rampI,clogBC7*16+index)/64.0F;
@ -1077,21 +938,6 @@ INLINE CGV_EPOCODE ep_find_floor(
INLINE void get_fixuptable(CGV_FIXUPINDEX fixup[3], CGV_PARTID part_id) INLINE void get_fixuptable(CGV_FIXUPINDEX fixup[3], CGV_PARTID part_id)
{ {
// same as CMP SDK v3.1 BC7_FIXUPINDEX1 & BC7_FIXUPINDEX2 for each partition range 0..63
// The data is saved as a packed INT = (BC7_FIXUPINDEX1 << 4 + BC7_FIXUPINDEX2)
CMP_STATIC uniform __constant CGV_FIXUPINDEX FIXUPINDEX[] = {
// 2 subset partitions 0..63
0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u,
0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0x20u, 0x20u,
0xf0u, 0xf0u, 0x60u, 0x80u, 0x20u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0x60u,
0x60u, 0x20u, 0x60u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u,
// 3 subset partitions 64..128
0x3fu, 0x38u, 0xf8u, 0xf3u, 0x8fu, 0x3fu, 0xf3u, 0xf8u, 0x8fu, 0x8fu, 0x6fu, 0x6fu, 0x6fu, 0x5fu, 0x3fu, 0x38u,
0x3fu, 0x38u, 0x8fu, 0xf3u, 0x3fu, 0x38u, 0x6fu, 0xa8u, 0x53u, 0x8fu, 0x86u, 0x6au, 0x8fu, 0x5fu, 0xfau, 0xf8u,
0x8fu, 0xf3u, 0x3fu, 0x5au, 0x6au, 0xa8u, 0x89u, 0xfau, 0xf6u, 0x3fu, 0xf8u, 0x5fu, 0xf3u, 0xf6u, 0xf6u, 0xf8u,
0x3fu, 0xf3u, 0x5fu, 0x5fu, 0x5fu, 0x8fu, 0x5fu, 0xafu, 0x5fu, 0xafu, 0x8fu, 0xdfu, 0xf3u, 0xcfu, 0x3fu, 0x38u
};
CGV_FIXUPINDEX skip_packed = FIXUPINDEX[part_id];// gather_int2(FIXUPINDEX, part_id); CGV_FIXUPINDEX skip_packed = FIXUPINDEX[part_id];// gather_int2(FIXUPINDEX, part_id);
fixup[0] = 0; fixup[0] = 0;
fixup[1] = skip_packed>>4; fixup[1] = skip_packed>>4;
@ -1472,27 +1318,29 @@ CGV_ERROR quant_solid_color(
if (error_t < error_0) if (error_t < error_0)
{ {
// We have a solid color: Use image src if on GPU
image_log = iclogBC7; image_log = iclogBC7;
image_idx = image_log; image_idx = image_log;
CGU_BOOL srcIsWhite = FALSE;
if ((image_src[0] == 255.0f)&&(image_src[1] == 255.0f)&&(image_src[2] == 255.0f)) srcIsWhite = TRUE;
#ifdef ASPM_GPU // This needs improving
CGV_IMAGE MinC[4] = {255,255,255,255};
CGV_IMAGE MaxC[4] = {0,0,0,0};
// get min max colors
for (CGU_CHANNEL ch=0;ch<channels3or4; ch++)
for (CGV_ENTRIES k=0;k<numEntries;k++)
{
if (image_src[k+ch*SOURCE_BLOCK_SIZE] < MinC[ch] ) MinC[ch] = image_src[k+ch*SOURCE_BLOCK_SIZE];
if (image_src[k+ch*SOURCE_BLOCK_SIZE] > MaxC[ch] ) MaxC[ch] = image_src[k+ch*SOURCE_BLOCK_SIZE];
}
for (CGU_CHANNEL ch = 0; ch<channels3or4; ch++)
{
epo_0[ch] = MinC[ch];
epo_0[4 + ch] = MaxC[ch];
}
#else // This is good on CPU
for (CGU_CHANNEL ch = 0; ch<channels3or4; ch++) for (CGU_CHANNEL ch = 0; ch<channels3or4; ch++)
{ {
#ifdef ASPM_GPU
if (srcIsWhite == TRUE)
{
// Default White block!
epo_0[ ch] = 0x7F;
epo_0[4+ch] = 0x7F;
}
else
{
// Default black block!
epo_0[ ch] = 0;
epo_0[4+ch] = 0;
}
#else
#ifdef USE_BC7_SP_ERR_IDX #ifdef USE_BC7_SP_ERR_IDX
if (BC7EncodeRamps.ramp_init) { if (BC7EncodeRamps.ramp_init) {
CGV_EPOCODE index = (CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits[ch])*256*2*2*16*2)+(epo_dr_0[ch]*2*2*16*2)+(t1o[ch]*2*16*2)+(t2o[ch]*16*2)+(iclogBC7*2); CGV_EPOCODE index = (CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits[ch])*256*2*2*16*2)+(epo_dr_0[ch]*2*2*16*2)+(t1o[ch]*2*16*2)+(t2o[ch]*16*2)+(iclogBC7*2);
@ -1500,15 +1348,15 @@ CGV_ERROR quant_solid_color(
epo_0[4+ch] = BC7EncodeRamps.sp_idx[index+1]&0xFF;// gather_epocode(u_BC7Encode->sp_idx,index+1)&0xFF; epo_0[4+ch] = BC7EncodeRamps.sp_idx[index+1]&0xFF;// gather_epocode(u_BC7Encode->sp_idx,index+1)&0xFF;
} }
else { else {
epo_0[ch] = 0; epo_0[ch] = 0;
epo_0[4 + ch] = 0; epo_0[4 + ch] = 0;
} }
#else #else
epo_0[ ch] = 0; epo_0[ ch] = 0;
epo_0[4+ch] = 0; epo_0[4+ch] = 0;
#endif
#endif #endif
} }
#endif
error_0 = error_t; error_0 = error_t;
} }
//if (error_0 == 0) //if (error_0 == 0)
@ -1980,7 +1828,11 @@ INLINE void cmp_encode_swap(CGV_EPOCODE endpoint[], CGU_INT channels, CGV_INDEX
{ {
cmp_swap_epo(&endpoint[0], &endpoint[channels], channels); cmp_swap_epo(&endpoint[0], &endpoint[channels], channels);
for (CGU_INT k=0; k<SOURCE_BLOCK_SIZE; k++) for (CGU_INT k=0; k<SOURCE_BLOCK_SIZE; k++)
#ifdef ASPM_GPU
block_index[k] = (levels-1) - block_index[k];
#else
block_index[k] = CGV_INDEX(levels-1) - block_index[k]; block_index[k] = CGV_INDEX(levels-1) - block_index[k];
#endif
} }
} }
@ -1994,6 +1846,7 @@ void cmp_encode_index(CGV_CMPOUT data[16], CGU_INT* uniform pPos, CGV_INDEX bloc
} }
} }
void encode_endpoint(CGV_CMPOUT data[16], CGU_INT* uniform pPos, CGV_BYTE block_index[16], CGU_INT bits, CGV_SHIFT32 flips) void encode_endpoint(CGV_CMPOUT data[16], CGU_INT* uniform pPos, CGV_BYTE block_index[16], CGU_INT bits, CGV_SHIFT32 flips)
{ {
CGU_INT levels = 1 << bits; CGU_INT levels = 1 << bits;
@ -2006,8 +1859,8 @@ void encode_endpoint(CGV_CMPOUT data[16], CGU_INT* uniform pPos, CGV_BYTE block_
CGV_TYPEINT q = qbits_shifted&15; CGV_TYPEINT q = qbits_shifted&15;
if ((flips_shifted&1)>0) q = (levels-1)-q; if ((flips_shifted&1)>0) q = (levels-1)-q;
if (k1==0 && k2==0) cmp_Write8Bit(data, pPos, bits - 1, static_cast <CGV_BYTE>(q)); if (k1==0 && k2==0) cmp_Write8Bit(data, pPos, bits-1, CMP_STATIC_CAST(CGV_BYTE,q));
else cmp_Write8Bit(data, pPos, bits, static_cast<CGV_BYTE>(q)); else cmp_Write8Bit(data, pPos, bits , CMP_STATIC_CAST(CGV_BYTE,q));
qbits_shifted >>= 4; qbits_shifted >>= 4;
flips_shifted >>= 1; flips_shifted >>= 1;
} }
@ -2236,10 +2089,10 @@ void Encode_mode4( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE],
cmp_Write8Bit(cmp_out,&bitPosition,1,1); cmp_Write8Bit(cmp_out,&bitPosition,1,1);
// rotation 2 bits // rotation 2 bits
cmp_Write8Bit(cmp_out, &bitPosition, 2, static_cast <CGV_BYTE> (params->rotated_channel)); cmp_Write8Bit(cmp_out,&bitPosition,2, CMP_STATIC_CAST(CGV_BYTE,params->rotated_channel));
// idxMode 1 bit // idxMode 1 bit
cmp_Write8Bit(cmp_out, &bitPosition, 1, static_cast <CGV_BYTE> (params->idxMode)); cmp_Write8Bit(cmp_out, &bitPosition, 1,CMP_STATIC_CAST(CGV_BYTE,params->idxMode));
CGU_INT idxBits[2] = {2,3}; CGU_INT idxBits[2] = {2,3};
@ -2264,14 +2117,14 @@ void Encode_mode4( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE],
// B0 : B1 // B0 : B1
for (CGU_INT component=0; component < 3; component++) for (CGU_INT component=0; component < 3; component++)
{ {
cmp_Write8Bit(cmp_out, &bitPosition, 5, static_cast<CGV_BYTE> (params->color_qendpoint[component])); cmp_Write8Bit(cmp_out,&bitPosition,5,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[component]));
cmp_Write8Bit(cmp_out, &bitPosition, 5, static_cast <CGV_BYTE> (params->color_qendpoint[4 + component])); cmp_Write8Bit(cmp_out,&bitPosition,5,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[4 + component]));
} }
// alpha endpoints (6 bits each) // alpha endpoints (6 bits each)
// A0 : A1 // A0 : A1
cmp_Write8Bit(cmp_out, &bitPosition, 6, static_cast<CGV_BYTE> (params->alpha_qendpoint[0])); cmp_Write8Bit(cmp_out,&bitPosition,6,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[0]));
cmp_Write8Bit(cmp_out, &bitPosition, 6, static_cast<CGV_BYTE> (params->alpha_qendpoint[4])); cmp_Write8Bit(cmp_out,&bitPosition,6,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[4]));
// index 2 bits each (31 bits total) // index 2 bits each (31 bits total)
cmp_encode_index(cmp_out, &bitPosition, params->color_index, 2); cmp_encode_index(cmp_out, &bitPosition, params->color_index, 2);
@ -2289,7 +2142,7 @@ void Encode_mode5( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE],
cmp_Write8Bit(cmp_out,&bitPosition,1,1); cmp_Write8Bit(cmp_out,&bitPosition,1,1);
// Write 2 bit rotation // Write 2 bit rotation
cmp_Write8Bit(cmp_out, &bitPosition, 2, static_cast<CGV_BYTE> (params->rotated_channel)); cmp_Write8Bit(cmp_out,&bitPosition,2, CMP_STATIC_CAST(CGV_BYTE,params->rotated_channel));
cmp_encode_swap(params->color_qendpoint, 4, params->color_index,2); cmp_encode_swap(params->color_qendpoint, 4, params->color_index,2);
cmp_encode_swap(params->alpha_qendpoint, 4, params->alpha_index,2); cmp_encode_swap(params->alpha_qendpoint, 4, params->alpha_index,2);
@ -2300,14 +2153,14 @@ void Encode_mode5( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE],
// B0 : B1 // B0 : B1
for (CGU_INT component=0; component < 3; component++) for (CGU_INT component=0; component < 3; component++)
{ {
cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast<CGV_BYTE> (params->color_qendpoint[component])); cmp_Write8Bit(cmp_out,&bitPosition,7,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[component]));
cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast <CGV_BYTE> (params->color_qendpoint[4 + component])); cmp_Write8Bit(cmp_out,&bitPosition,7,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[4 + component]));
} }
// alpha endpoints (8 bits each) // alpha endpoints (8 bits each)
// A0 : A1 // A0 : A1
cmp_Write8Bit(cmp_out, &bitPosition, 8, static_cast<CGV_BYTE> (params->alpha_qendpoint[0])); cmp_Write8Bit(cmp_out,&bitPosition,8,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[0]));
cmp_Write8Bit(cmp_out, &bitPosition, 8, static_cast<CGV_BYTE> (params->alpha_qendpoint[4])); cmp_Write8Bit(cmp_out,&bitPosition,8,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[4]));
// color index 2 bits each (31 bits total) // color index 2 bits each (31 bits total)
@ -2332,8 +2185,8 @@ void Encode_mode6(
// endpoints // endpoints
for (CGU_INT p=0; p<4; p++) for (CGU_INT p=0; p<4; p++)
{ {
cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast<CGV_BYTE> (epo_code[0 + p] >> 1)); cmp_Write8Bit(cmp_out, &bitPosition, 7, CMP_STATIC_CAST(CGV_BYTE,epo_code[0 + p] >> 1));
cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast<CGV_BYTE> (epo_code[4 + p] >> 1)); cmp_Write8Bit(cmp_out, &bitPosition, 7, CMP_STATIC_CAST(CGV_BYTE,epo_code[4 + p] >> 1));
} }
// p bits // p bits
@ -2348,7 +2201,7 @@ void Encode_mode6(
void Compress_mode01237( void Compress_mode01237(
CGU_INT blockMode, CGU_INT blockMode,
BC7_EncodeState EncodeState[], BC7_EncodeState EncodeState[],
uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) uniform CMP_GLOBAL BC7_Encode u_BC7Encode[])
{ {
CGV_INDEX storedBestindex[MAX_PARTITIONS][MAX_SUBSETS][MAX_SUBSET_SIZE]; CGV_INDEX storedBestindex[MAX_PARTITIONS][MAX_SUBSETS][MAX_SUBSET_SIZE];
CGV_ERROR storedError[MAX_PARTITIONS]; CGV_ERROR storedError[MAX_PARTITIONS];
@ -2417,7 +2270,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[])
GetPartitionSubSet_mode01237( GetPartitionSubSet_mode01237(
image_subsets, image_subsets,
subset_entryCount, subset_entryCount,
static_cast<CGV_UINT8>(mode_blockPartition), CMP_STATIC_CAST(CGV_UINT8,mode_blockPartition),
EncodeState->image_src, EncodeState->image_src,
blockMode, blockMode,
EncodeState->channels3or4); EncodeState->channels3or4);
@ -2526,7 +2379,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[])
tmp_epo_code, tmp_epo_code,
src_image_block, src_image_block,
numEntries, numEntries,
static_cast<CGU_INT8>(EncodeState->clusters), // Mi_ CMP_STATIC_CAST(CGU_INT8,EncodeState->clusters), // Mi_
EncodeState->bits, EncodeState->bits,
EncodeState->channels3or4, EncodeState->channels3or4,
u_BC7Encode); u_BC7Encode);
@ -2735,7 +2588,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[])
src_color_Block, src_color_Block,
SOURCE_BLOCK_SIZE, SOURCE_BLOCK_SIZE,
EncodeState->numClusters0[idxMode], EncodeState->numClusters0[idxMode],
static_cast<CGU_INT8>(EncodeState->modeBits[0]), CMP_STATIC_CAST(CGU_UINT8,EncodeState->modeBits[0]),
3, 3,
u_BC7Encode); u_BC7Encode);
@ -2746,7 +2599,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[])
src_alpha_Block, src_alpha_Block,
SOURCE_BLOCK_SIZE, SOURCE_BLOCK_SIZE,
EncodeState->numClusters1[idxMode], EncodeState->numClusters1[idxMode],
static_cast<CGU_UINT8>(EncodeState->modeBits[1]), CMP_STATIC_CAST(CGU_UINT8,EncodeState->modeBits[1]),
3, 3,
u_BC7Encode) / 3.0f; u_BC7Encode) / 3.0f;
@ -4574,6 +4427,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[])
CGU_INT Mode = 0x0001 << blockMode; CGU_INT Mode = 0x0001 << blockMode;
if (!(u_BC7Encode->validModeMask & Mode)) if (!(u_BC7Encode->validModeMask & Mode))
continue; continue;
switch (blockMode) switch (blockMode)
{ {
// image processing with no alpha // image processing with no alpha
@ -4802,8 +4656,8 @@ void GetBC7Ramp(CGU_UINT32 endpoint[][MAX_DIMENSION_BIG],
ep[0][i] += (CGU_UINT32)(ep[0][i] >> componentBits[i]); ep[0][i] += (CGU_UINT32)(ep[0][i] >> componentBits[i]);
ep[1][i] += (CGU_UINT32)(ep[1][i] >> componentBits[i]); ep[1][i] += (CGU_UINT32)(ep[1][i] >> componentBits[i]);
ep[0][i] = min8(255, max8(0, static_cast<CGU_UINT8>(ep[0][i]))); ep[0][i] = min8(255, max8(0,CMP_STATIC_CAST(CGU_UINT8,ep[0][i])));
ep[1][i] = min8(255, max8(0, static_cast<CGU_UINT8>(ep[1][i]))); ep[1][i] = min8(255, max8(0,CMP_STATIC_CAST(CGU_UINT8,ep[1][i])));
} }
} }
@ -4926,7 +4780,7 @@ void DecompressDualIndexBlock(
// If this is a fixup index then clear the implicit bit // If this is a fixup index then clear the implicit bit
if(j==0) if(j==0)
{ {
blockIndices[i][j] &= ~(1 << (bti[m_blockMode].indexBits[i]-1)); blockIndices[i][j] &= ~(1 << (bti[m_blockMode].indexBits[i]-1U));
for(k=0;k<static_cast <CGU_UINT32>(bti[m_blockMode].indexBits[i] - 1); k++) for(k=0;k<static_cast <CGU_UINT32>(bti[m_blockMode].indexBits[i] - 1); k++)
{ {
blockIndices[i][j] |= (CGU_UINT32)ReadBit(in,m_bitPosition) << k; blockIndices[i][j] |= (CGU_UINT32)ReadBit(in,m_bitPosition) << k;
@ -5377,7 +5231,7 @@ int CMP_CDECL CompressBlockBC7( const unsigned char *srcBlock,
EncodeState.best_err = CMP_FLOAT_MAX; EncodeState.best_err = CMP_FLOAT_MAX;
EncodeState.validModeMask = u_BC7Encode->validModeMask; EncodeState.validModeMask = u_BC7Encode->validModeMask;
EncodeState.part_count = u_BC7Encode->part_count; EncodeState.part_count = u_BC7Encode->part_count;
EncodeState.channels = static_cast<CGU_CHANNEL>(u_BC7Encode->channels); EncodeState.channels = CMP_STATIC_CAST(CGU_CHANNEL,u_BC7Encode->channels);
CGU_UINT8 offsetR = 0; CGU_UINT8 offsetR = 0;
CGU_UINT8 offsetG = 16; CGU_UINT8 offsetG = 16;
@ -5410,6 +5264,7 @@ int CMP_CDECL CompressBlockBC7( const unsigned char *srcBlock,
return CGU_CORE_OK; return CGU_CORE_OK;
} }
int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16], int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16],
unsigned char srcBlock[64], unsigned char srcBlock[64],
const void *options = NULL) { const void *options = NULL) {
@ -5429,7 +5284,7 @@ int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16],
#endif #endif
//============================================== OpenCL USER INTERFACE ==================================================== //============================================== OpenCL USER INTERFACE ====================================================
#ifdef ASPM_GPU #ifdef ASPM_OPENCL
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc ImageSource[], CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc ImageSource[],
CMP_GLOBAL CGV_CMPOUT ImageDestination[], CMP_GLOBAL CGV_CMPOUT ImageDestination[],
uniform CMP_GLOBAL Source_Info SourceInfo[], uniform CMP_GLOBAL Source_Info SourceInfo[],
@ -5438,21 +5293,21 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc
CGU_INT xID=0; CGU_INT xID=0;
CGU_INT yID=0; CGU_INT yID=0;
xID = get_global_id(0); // ToDo: Define a size_t 32 bit and 64 bit basd on clGetDeviceInfo xID = get_global_id(0); // ToDo: Define a size_t 32 bit and 64 bit based on clGetDeviceInfo
yID = get_global_id(1); yID = get_global_id(1);
CGU_INT srcWidth = SourceInfo->m_src_width; CGU_INT srcWidth = SourceInfo->m_src_width;
CGU_INT srcHeight = SourceInfo->m_src_height; CGU_INT srcHeight = SourceInfo->m_src_height;
if (xID >= (srcWidth / BlockX)) return; if (xID >= (srcWidth / BlockX)) return;
if (yID >= (srcHeight / BlockY)) return; if (yID >= (srcHeight / BlockY)) return;
//ASPM_PRINT(("[ASPM_OCL] %d %d size %d\n",xID,yID,sizeof(BC7_Encode)));
CGU_INT destI = (xID*COMPRESSED_BLOCK_SIZE) + (yID*(srcWidth / BlockX)*COMPRESSED_BLOCK_SIZE); CGU_INT destI = (xID*COMPRESSED_BLOCK_SIZE) + (yID*(srcWidth / BlockX)*COMPRESSED_BLOCK_SIZE);
CGU_INT srcindex = 4 * (yID * srcWidth + xID); CGU_INT srcindex = 4 * (yID * srcWidth + xID);
CGU_INT blkindex = 0; CGU_INT blkindex = 0;
BC7_EncodeState EncodeState; BC7_EncodeState EncodeState;
varying BC7_EncodeState* uniform state = &EncodeState; cmp_memsetBC7(&EncodeState,0,sizeof(EncodeState));
copy_BC7_Encode_settings(&EncodeState, BC7Encode);
copy_BC7_Encode_settings(state, BC7Encode);
//Check if it is a complete 4X4 block //Check if it is a complete 4X4 block
if (((xID + 1)*BlockX <= srcWidth) && ((yID + 1)*BlockY <= srcHeight)) if (((xID + 1)*BlockX <= srcWidth) && ((yID + 1)*BlockY <= srcHeight))
@ -5460,10 +5315,10 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc
srcWidth = srcWidth - 4; srcWidth = srcWidth - 4;
for (CGU_INT j = 0; j < 4; j++) { for (CGU_INT j = 0; j < 4; j++) {
for (CGU_INT i = 0; i < 4; i++) { for (CGU_INT i = 0; i < 4; i++) {
state->image_src[blkindex+0*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].x; EncodeState.image_src[blkindex+0*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].x;
state->image_src[blkindex+1*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].y; EncodeState.image_src[blkindex+1*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].y;
state->image_src[blkindex+2*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].z; EncodeState.image_src[blkindex+2*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].z;
state->image_src[blkindex+3*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].w; EncodeState.image_src[blkindex+3*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].w;
blkindex++; blkindex++;
srcindex++; srcindex++;
} }
@ -5471,13 +5326,21 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc
srcindex += srcWidth; srcindex += srcWidth;
} }
copy_BC7_Encode_settings(state, BC7Encode);
BC7_CompressBlock(&EncodeState, BC7Encode); BC7_CompressBlock(&EncodeState, BC7Encode);
// printf("CMP %x %x %x %x %x %x %x",
// state->cmp_out[0],
// state->cmp_out[1],
// state->cmp_out[2],
// state->cmp_out[3],
// state->cmp_out[4],
// state->cmp_out[5],
// state->cmp_out[6]
// );
for (CGU_INT i=0; i<COMPRESSED_BLOCK_SIZE; i++) for (CGU_INT i=0; i<COMPRESSED_BLOCK_SIZE; i++)
{ {
ImageDestination[destI+i] = state->cmp_out[i]; ImageDestination[destI+i] = EncodeState.cmp_out[i];
} }
} }

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -23,6 +23,8 @@
#ifndef BC7_ENCODE_KERNEL_H #ifndef BC7_ENCODE_KERNEL_H
#define BC7_ENCODE_KERNEL_H #define BC7_ENCODE_KERNEL_H
#pragma warning(disable:4505) // disable warnings on unreferenced local function has been removed
#if defined(ISPC)||defined(ASPM) #if defined(ISPC)||defined(ASPM)
//#include "..\..\Common\Common_Def.h" //#include "..\..\Common\Common_Def.h"
#include "Common_Def.h" #include "Common_Def.h"
@ -252,7 +254,6 @@ BC7_Encode
CGU_INT refineIterations; CGU_INT refineIterations;
CGU_INT part_count; CGU_INT part_count;
CGU_INT channels; CGU_INT channels;
} }
#ifndef ASPM #ifndef ASPM
BC7_Encode BC7_Encode
@ -569,6 +570,163 @@ CMP_CONSTANT CGU_UINT8 par_vectors_nd[2][8][64][2][4] = {
}, },
}; };
CMP_CONSTANT CGU_UINT32 subset_mask_table[] = {
// 2 subset region patterns
0x0000CCCCu, // 0 1100 1100 1100 1100 (MSB..LSB)
0x00008888u, // 1 1000 1000 1000 1000
0x0000EEEEu, // 2 1110 1110 1110 1110
0x0000ECC8u, // 3 1110 1100 1100 1000
0x0000C880u, // 4 1100 1000 1000 0000
0x0000FEECu, // 5 1111 1110 1110 1100
0x0000FEC8u, // 6 1111 1110 1100 1000
0x0000EC80u, // 7 1110 1100 1000 0000
0x0000C800u, // 8 1100 1000 0000 0000
0x0000FFECu, // 9 1111 1111 1110 1100
0x0000FE80u, // 10 1111 1110 1000 0000
0x0000E800u, // 11 1110 1000 0000 0000
0x0000FFE8u, // 12 1111 1111 1110 1000
0x0000FF00u, // 13 1111 1111 0000 0000
0x0000FFF0u, // 14 1111 1111 1111 0000
0x0000F000u, // 15 1111 0000 0000 0000
0x0000F710u, // 16 1111 0111 0001 0000
0x0000008Eu, // 17 0000 0000 1000 1110
0x00007100u, // 18 0111 0001 0000 0000
0x000008CEu, // 19 0000 1000 1100 1110
0x0000008Cu, // 20 0000 0000 1000 1100
0x00007310u, // 21 0111 0011 0001 0000
0x00003100u, // 22 0011 0001 0000 0000
0x00008CCEu, // 23 1000 1100 1100 1110
0x0000088Cu, // 24 0000 1000 1000 1100
0x00003110u, // 25 0011 0001 0001 0000
0x00006666u, // 26 0110 0110 0110 0110
0x0000366Cu, // 27 0011 0110 0110 1100
0x000017E8u, // 28 0001 0111 1110 1000
0x00000FF0u, // 29 0000 1111 1111 0000
0x0000718Eu, // 30 0111 0001 1000 1110
0x0000399Cu, // 31 0011 1001 1001 1100
0x0000AAAAu, // 32 1010 1010 1010 1010
0x0000F0F0u, // 33 1111 0000 1111 0000
0x00005A5Au, // 34 0101 1010 0101 1010
0x000033CCu, // 35 0011 0011 1100 1100
0x00003C3Cu, // 36 0011 1100 0011 1100
0x000055AAu, // 37 0101 0101 1010 1010
0x00009696u, // 38 1001 0110 1001 0110
0x0000A55Au, // 39 1010 0101 0101 1010
0x000073CEu, // 40 0111 0011 1100 1110
0x000013C8u, // 41 0001 0011 1100 1000
0x0000324Cu, // 42 0011 0010 0100 1100
0x00003BDCu, // 43 0011 1011 1101 1100
0x00006996u, // 44 0110 1001 1001 0110
0x0000C33Cu, // 45 1100 0011 0011 1100
0x00009966u, // 46 1001 1001 0110 0110
0x00000660u, // 47 0000 0110 0110 0000
0x00000272u, // 48 0000 0010 0111 0010
0x000004E4u, // 49 0000 0100 1110 0100
0x00004E40u, // 50 0100 1110 0100 0000
0x00002720u, // 51 0010 0111 0010 0000
0x0000C936u, // 52 1100 1001 0011 0110
0x0000936Cu, // 53 1001 0011 0110 1100
0x000039C6u, // 54 0011 1001 1100 0110
0x0000639Cu, // 55 0110 0011 1001 1100
0x00009336u, // 56 1001 0011 0011 0110
0x00009CC6u, // 57 1001 1100 1100 0110
0x0000817Eu, // 58 1000 0001 0111 1110
0x0000E718u, // 59 1110 0111 0001 1000
0x0000CCF0u, // 60 1100 1100 1111 0000
0x00000FCCu, // 61 0000 1111 1100 1100
0x00007744u, // 62 0111 0111 0100 0100
0x0000EE22u, // 63 1110 1110 0010 0010
// 3 Subset region patterns
0xF60008CCu,// 0 1111 0110 0000 0000 : 0000 1000 1100 1100 = 2222122011001100 (MSB...LSB)
0x73008CC8u,// 1 0111 0011 0000 0000 : 1000 1100 1100 1000 = 1222112211001000
0x3310CC80u,// 2 0011 0011 0001 0000 : 1100 1100 1000 0000 = 1122112210020000
0x00CEEC00u,// 3 0000 0000 1100 1110 : 1110 1100 0000 0000 = 1110110022002220
0xCC003300u,// 4 1100 1100 0000 0000 : 0011 0011 0000 0000 = 2211221100000000
0xCC0000CCu,// 5 1100 1100 0000 0000 : 0000 0000 1100 1100 = 2200220011001100
0x00CCFF00u,// 6 0000 0000 1100 1100 : 1111 1111 0000 0000 = 1111111122002200
0x3300CCCCu,// 7 0011 0011 0000 0000 : 1100 1100 1100 1100 = 1122112211001100
0xF0000F00u,// 8 1111 0000 0000 0000 : 0000 1111 0000 0000 = 2222111100000000
0xF0000FF0u,// 9 1111 0000 0000 0000 : 0000 1111 1111 0000 = 2222111111110000
0xFF0000F0u,// 10 1111 1111 0000 0000 : 0000 0000 1111 0000 = 2222222211110000
0x88884444u,// 11 1000 1000 1000 1000 : 0100 0100 0100 0100 = 2100210021002100
0x88886666u,// 12 1000 1000 1000 1000 : 0110 0110 0110 0110 = 2110211021102110
0xCCCC2222u,// 13 1100 1100 1100 1100 : 0010 0010 0010 0010 = 2210221022102210
0xEC80136Cu,// 14 1110 1100 1000 0000 : 0001 0011 0110 1100 = 2221221121101100
0x7310008Cu,// 15 0111 0011 0001 0000 : 0000 0000 1000 1100 = 0222002210021100
0xC80036C8u,// 16 1100 1000 0000 0000 : 0011 0110 1100 1000 = 2211211011001000
0x310008CEu,// 17 0011 0001 0000 0000 : 0000 1000 1100 1110 = 0022100211001110
0xCCC03330u,// 18 1100 1100 1100 0000 : 0011 0011 0011 0000 = 2211221122110000
0x0CCCF000u,// 19 0000 1100 1100 1100 : 1111 0000 0000 0000 = 1111220022002200
0xEE0000EEu,// 20 1110 1110 0000 0000 : 0000 0000 1110 1110 = 2220222011101110
0x77008888u,// 21 0111 0111 0000 0000 : 1000 1000 1000 1000 = 1222122210001000
0xCC0022C0u,// 22 1100 1100 0000 0000 : 0010 0010 1100 0000 = 2210221011000000
0x33004430u,// 23 0011 0011 0000 0000 : 0100 0100 0011 0000 = 0122012200110000
0x00CC0C22u,// 24 0000 0000 1100 1100 : 0000 1100 0010 0010 = 0000110022102210
0xFC880344u,// 25 1111 1100 1000 1000 : 0000 0011 0100 0100 = 2222221121002100
0x06606996u,// 26 0000 0110 0110 0000 : 0110 1001 1001 0110 = 0110122112210110
0x66009960u,// 27 0110 0110 0000 0000 : 1001 1001 0110 0000 = 1221122101100000
0xC88C0330u,// 28 1100 1000 1000 1100 : 0000 0011 0011 0000 = 2200201120112200
0xF9000066u,// 29 1111 1001 0000 0000 : 0000 0000 0110 0110 = 2222200201100110
0x0CC0C22Cu,// 30 0000 1100 1100 0000 : 1100 0010 0010 1100 = 1100221022101100
0x73108C00u,// 31 0111 0011 0001 0000 : 1000 1100 0000 0000 = 1222112200020000
0xEC801300u,// 32 1110 1100 1000 0000 : 0001 0011 0000 0000 = 2221221120000000
0x08CEC400u,// 33 0000 1000 1100 1110 : 1100 0100 0000 0000 = 1100210022002220
0xEC80004Cu,// 34 1110 1100 1000 0000 : 0000 0000 0100 1100 = 2220220021001100
0x44442222u,// 35 0100 0100 0100 0100 : 0010 0010 0010 0010 = 0210021002100210
0x0F0000F0u,// 36 0000 1111 0000 0000 : 0000 0000 1111 0000 = 0000222211110000
0x49242492u,// 37 0100 1001 0010 0100 : 0010 0100 1001 0010 = 0210210210210210
0x42942942u,// 38 0100 0010 1001 0100 : 0010 1001 0100 0010 = 0210102121020210
0x0C30C30Cu,// 39 0000 1100 0011 0000 : 1100 0011 0000 1100 = 1100221100221100
0x03C0C03Cu,// 40 0000 0011 1100 0000 : 1100 0000 0011 1100 = 1100002222111100
0xFF0000AAu,// 41 1111 1111 0000 0000 : 0000 0000 1010 1010 = 2222222210101010
0x5500AA00u,// 42 0101 0101 0000 0000 : 1010 1010 0000 0000 = 1212121200000000
0xCCCC3030u,// 43 1100 1100 1100 1100 : 0011 0000 0011 0000 = 2211220022112200
0x0C0CC0C0u,// 44 0000 1100 0000 1100 : 1100 0000 1100 0000 = 1100220011002200
0x66669090u,// 45 0110 0110 0110 0110 : 1001 0000 1001 0000 = 1221022012210220
0x0FF0A00Au,// 46 0000 1111 1111 0000 : 1010 0000 0000 1010 = 1010222222221010
0x5550AAA0u,// 47 0101 0101 0101 0000 : 1010 1010 1010 0000 = 1212121212120000
0xF0000AAAu,// 48 1111 0000 0000 0000 : 0000 1010 1010 1010 = 2222101010101010
0x0E0EE0E0u,// 49 0000 1110 0000 1110 : 1110 0000 1110 0000 = 1110222011102220
0x88887070u,// 50 1000 1000 1000 1000 : 0111 0000 0111 0000 = 2111200021112000
0x99906660u,// 51 1001 1001 1001 0000 : 0110 0110 0110 0000 = 2112211221120000
0xE00E0EE0u,// 52 1110 0000 0000 1110 : 0000 1110 1110 0000 = 2220111011102220
0x88880770u,// 53 1000 1000 1000 1000 : 0000 0111 0111 0000 = 2000211121112000
0xF0000666u,// 54 1111 0000 0000 0000 : 0000 0110 0110 0110 = 2222011001100110
0x99006600u,// 55 1001 1001 0000 0000 : 0110 0110 0000 0000 = 2112211200000000
0xFF000066u,// 56 1111 1111 0000 0000 : 0000 0000 0110 0110 = 2222222201100110
0xC00C0CC0u,// 57 1100 0000 0000 1100 : 0000 1100 1100 0000 = 2200110011002200
0xCCCC0330u,// 58 1100 1100 1100 1100 : 0000 0011 0011 0000 = 2200221122112200
0x90006000u,// 59 1001 0000 0000 0000 : 0110 0000 0000 0000 = 2112000000000000
0x08088080u,// 60 0000 1000 0000 1000 : 1000 0000 1000 0000 = 1000200010002000
0xEEEE1010u,// 61 1110 1110 1110 1110 : 0001 0000 0001 0000 = 2221222022212220
0xFFF0000Au,// 62 1111 1111 1111 0000 : 0000 0000 0000 1010 = 2222222222221010
0x731008CEu,// 63 0111 0011 0001 0000 : 0000 1000 1100 1110 = 0222102211021110
};
CMP_CONSTANT CGV_EPOCODE rampI[5*SOURCE_BLOCK_SIZE] = {
0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 0 bit index
0 ,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 1 bit index
0 ,21,43,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 2 bit index
0 ,9 ,18,27,37,46,55,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 3 bit index
0 ,4 ,9 ,13,17,21,26,30,34,38,43,47,51,55,60,64 // 4 bit index
};
// same as CMP SDK v3.1 BC7_FIXUPINDEX1 & BC7_FIXUPINDEX2 for each partition range 0..63
// The data is saved as a packed INT = (BC7_FIXUPINDEX1 << 4 + BC7_FIXUPINDEX2)
CMP_CONSTANT CGV_FIXUPINDEX FIXUPINDEX[] = {
// 2 subset partitions 0..63
0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u,
0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0x20u, 0x20u,
0xf0u, 0xf0u, 0x60u, 0x80u, 0x20u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0x60u,
0x60u, 0x20u, 0x60u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u,
// 3 subset partitions 64..128
0x3fu, 0x38u, 0xf8u, 0xf3u, 0x8fu, 0x3fu, 0xf3u, 0xf8u, 0x8fu, 0x8fu, 0x6fu, 0x6fu, 0x6fu, 0x5fu, 0x3fu, 0x38u,
0x3fu, 0x38u, 0x8fu, 0xf3u, 0x3fu, 0x38u, 0x6fu, 0xa8u, 0x53u, 0x8fu, 0x86u, 0x6au, 0x8fu, 0x5fu, 0xfau, 0xf8u,
0x8fu, 0xf3u, 0x3fu, 0x5au, 0x6au, 0xa8u, 0x89u, 0xfau, 0xf6u, 0x3fu, 0xf8u, 0x5fu, 0xf3u, 0xf6u, 0xf6u, 0xf8u,
0x3fu, 0xf3u, 0x5fu, 0x5fu, 0x5fu, 0x8fu, 0x5fu, 0xafu, 0x5fu, 0xafu, 0x8fu, 0xdfu, 0xf3u, 0xcfu, 0x3fu, 0x38u
};
#ifndef ASPM_GPU #ifndef ASPM_GPU
// =============================== USED BY DECODER THIS CODE NEEDS TO BE UPDATED ========================================= // =============================== USED BY DECODER THIS CODE NEEDS TO BE UPDATED =========================================
CMP_CONSTANT CGU_UINT32 BC7_FIXUPINDICES_LOCAL[MAX_SUBSETS][MAX_PARTITIONS][3] = CMP_CONSTANT CGU_UINT32 BC7_FIXUPINDICES_LOCAL[MAX_SUBSETS][MAX_PARTITIONS][3] =

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,5 @@
#ifndef _COMMON_DEFINITIONS_H
#define _COMMON_DEFINITIONS_H
//=============================================================================== //===============================================================================
// Copyright (c) 2007-2019 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2007-2020 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2004-2006 ATI Technologies Inc. // Copyright (c) 2004-2006 ATI Technologies Inc.
//=============================================================================== //===============================================================================
// //
@ -25,11 +22,26 @@
// THE SOFTWARE. // THE SOFTWARE.
// //
// //
// File Name: Common_Def.h // File Name: Common_Def
// Description: common definitions used for CPU/HPC/GPU // Description: common definitions used for CPU/HPC/GPU
// //
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
#ifndef _COMMON_DEFINITIONS_H
#define _COMMON_DEFINITIONS_H
// The shaders for UE4 require extension in the form of .ush in place of standard .h
// this directive is used to make the change without users requiring to modify all of the include extensions
// specific to UE4
#ifdef ASPM_HLSL_UE4
#pragma once
#define INC_cmp_math_vec4 "cmp_math_vec4.ush"
#define INC_cmp_math_func "cmp_math_func.ush"
#else
#define INC_cmp_math_vec4 "cmp_math_vec4.h"
#define INC_cmp_math_func "cmp_math_func.h"
#endif
// Features // Features
#ifdef _WIN32 #ifdef _WIN32
@ -44,15 +56,24 @@
// Using OpenCL Compiler // Using OpenCL Compiler
#ifdef __OPENCL_VERSION__ #ifdef __OPENCL_VERSION__
#define ASPM_GPU #define ASPM_GPU
#define ASPM_OPENCL
#endif #endif
// Using DirectX fxc Compiler
// Note use the /DASPM_HLSL command line to define this
#ifdef ASPM_HLSL
#define ASPM_GPU
#endif
#ifdef _LINUX #ifdef _LINUX
#undef ASPM_GPU #undef ASPM_GPU
#undef ASPM_OPENCL
#ifndef ASPM_HLSL
#include <cstring> #include <cstring>
#include <cmath> #include <cmath>
#include <stdio.h> #include <stdio.h>
#include "cmp_math_vec4.h" #include INC_cmp_math_vec4
#endif
#endif #endif
#ifndef CMP_MAX #ifndef CMP_MAX
@ -63,6 +84,13 @@
#define CMP_MIN(x, y) (((x) < (y)) ? (x) : (y)) #define CMP_MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif #endif
#ifndef ASPM_GPU
#define CMP_STATIC_CAST(x,y) static_cast<x>(y)
#else
#define CMP_STATIC_CAST(x,y) (x)(y)
#endif
#define CMP_SET_BC13_DECODER_RGBA // Sets mapping BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha #define CMP_SET_BC13_DECODER_RGBA // Sets mapping BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha
// RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3] // RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3]
// BC4 alpha always maps as AAAA to channels [0,1,2,3] // BC4 alpha always maps as AAAA to channels [0,1,2,3]
@ -70,8 +98,8 @@
//#define USE_BLOCK_LINEAR //#define USE_BLOCK_LINEAR
#define CMP_FLOAT_MAX 3.402823466e+38F // max value used to detect an Error in processing #define CMP_FLOAT_MAX 3.402823466e+38F // max value used to detect an Error in processing
#define CMP_FLOAT_MAX_EXP 38 #define CMP_FLOAT_MAX_EXP 38
#define USE_PROCESS_SEPERATE_ALPHA // Enable this to use higher quality code using CompressDualIndexBlock #define USE_PROCESS_SEPERATE_ALPHA // Enable this to use higher quality code using CompressDualIndexBlock
#define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes #define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes
#define MAX_DIMENSION_BIG 4 // Max number of channels (RGBA) #define MAX_DIMENSION_BIG 4 // Max number of channels (RGBA)
@ -84,6 +112,75 @@
//#define USE_BLOCK_LINEAR // Source Data is organized in linear form for each block : Experimental Code not fully developed //#define USE_BLOCK_LINEAR // Source Data is organized in linear form for each block : Experimental Code not fully developed
//#define USE_DOUBLE // Default is to use float, enable to use double data types only for float definitions //#define USE_DOUBLE // Default is to use float, enable to use double data types only for float definitions
//---------------------------------------------
// Predefinitions for GPU and CPU compiled code
//---------------------------------------------
#ifdef ASPM_HLSL
// ==== Vectors ====
typedef float2 CGU_Vec2f;
typedef float2 CGV_Vec2f;
typedef float3 CGU_Vec3f;
typedef float3 CGV_Vec3f;
typedef float4 CGU_Vec4f;
typedef float4 CGV_Vec4f;
typedef int2 CGU_Vec2i;
typedef int2 CGV_Vec2i;
typedef uint2 CGU_Vec2ui;
typedef uint2 CGV_Vec2ui;
typedef int3 CGU_Vec3i;
typedef int3 CGV_Vec3i;
typedef uint3 CGU_Vec3ui;
typedef uint3 CGV_Vec3ui;
typedef uint4 CGU_Vec4ui;
typedef uint4 CGV_Vec4ui;
// ==== Scalar Types ==== to remove from code
typedef int CGU_INT8;
typedef uint CGU_INT;
typedef int CGV_INT;
typedef uint CGU_UINT8;
typedef uint CGU_UINT;
// ==== Scalar Types ====
typedef int CGU_BOOL;
typedef int CGV_BOOL;
typedef int CGU_INT32;
typedef int CGV_INT32;
typedef uint CGU_UINT32;
typedef uint CGV_UINT32;
typedef float CGV_FLOAT;
typedef float CGU_FLOAT;
typedef min16float CGU_MIN16_FLOAT; // FP16 GPU support defaults to 32 bit if no HW support
#define TRUE 1
#define FALSE 0
#define CMP_CDECL
#define BC7_ENCODECLASS
#define CMP_EXPORT
#define INLINE
#define uniform
#define varying
#define CMP_GLOBAL
#define CMP_KERNEL
#define CMP_CONSTANT
#define CMP_STATIC
#define CMP_REFINOUT
#define CMP_PTRINOUT
#define CMP_INOUT inout
#define CMP_OUT out
#define CMP_IN in
#define CMP_UNUSED(x) (x);
#define CMP_UNROLL [unroll]
#else
typedef enum { typedef enum {
CGU_CORE_OK = 0, // No errors, call was successfull CGU_CORE_OK = 0, // No errors, call was successfull
CGU_CORE_ERR_UNKOWN, // An unknown error occurred CGU_CORE_ERR_UNKOWN, // An unknown error occurred
@ -95,26 +192,41 @@ typedef enum {
} CGU_ERROR_CODES; } CGU_ERROR_CODES;
//--------------------------------------------- #ifdef ASPM_OPENCL // GPU Based code using OpenCL
// Predefinitions for GPU and CPU compiled code
//---------------------------------------------
#ifdef ASPM_GPU // GPU Based code
// ==== Vectors ==== // ==== Vectors ====
typedef float2 CGU_Vec2f; typedef float2 CGU_Vec2f;
typedef float2 CGV_Vec2f; typedef float2 CGV_Vec2f;
typedef float3 CMP_Vec3f; typedef float3 CMP_Vec3f;
typedef float3 CGU_Vec3f; typedef float3 CGU_Vec3f;
typedef float3 CGV_Vec3f; typedef float3 CGV_Vec3f;
typedef float4 CGU_Vec4f;
typedef float4 CGV_Vec4f;
typedef uchar3 CGU_Vec3uc; typedef uchar3 CGU_Vec3uc;
typedef uchar3 CGV_Vec3uc; typedef uchar3 CGV_Vec3uc;
typedef uchar4 CMP_Vec4uc; typedef uchar4 CMP_Vec4uc;
typedef uchar4 CGU_Vec4uc; typedef uchar4 CGU_Vec4uc;
typedef uchar4 CGV_Vec4uc; typedef uchar4 CGV_Vec4uc;
typedef int2 CGU_Vec2i;
typedef int2 CGV_Vec2i;
typedef int3 CGU_Vec3i;
typedef int3 CGV_Vec3i;
typedef int4 CGU_Vec4i;
typedef int4 CGV_Vec4i;
typedef uint2 CGU_Vec2ui;
typedef uint2 CGV_Vec2ui;
typedef uint3 CGU_Vec3ui;
typedef uint3 CGV_Vec3ui;
typedef uint4 CGU_Vec4ui;
typedef uint4 CGV_Vec4ui;
#define USE_BC7_SP_ERR_IDX #define USE_BC7_SP_ERR_IDX
#define ASPM_PRINT(args) printf args
#define BC7_ENCODECLASS #define BC7_ENCODECLASS
#define ASPM_PRINT(args) printf args
#define CMP_EXPORT #define CMP_EXPORT
#define INLINE #define INLINE
@ -124,13 +236,20 @@ typedef enum {
#define CMP_KERNEL __kernel #define CMP_KERNEL __kernel
#define CMP_CONSTANT __constant #define CMP_CONSTANT __constant
#define CMP_STATIC #define CMP_STATIC
#define CMP_REFINOUT &
#define CMP_PTRINOUT *
#define CMP_INOUT
#define CMP_OUT
#define CMP_IN
#define CMP_UNUSED(x)
#define CMP_UNROLL
typedef unsigned int CGU_DWORD; //32bits typedef unsigned int CGU_DWORD; //32bits
typedef int CGU_INT; //32bits typedef int CGU_INT; //32bits
typedef int CGU_BOOL; typedef bool CGU_BOOL;
typedef unsigned short CGU_SHORT; //16bits typedef unsigned short CGU_SHORT; //16bits
typedef float CGU_FLOAT; typedef float CGU_FLOAT;
typedef half CGU_MIN16_FLOAT; // FP16 GPU support defaults to 32 bit if no HW support
typedef unsigned int uint32; // need to remove this def typedef unsigned int uint32; // need to remove this def
typedef int CGV_INT; typedef int CGV_INT;
@ -163,6 +282,15 @@ typedef enum {
#else #else
// CPU & ASPM definitions // CPU & ASPM definitions
#define CMP_REFINOUT &
#define CMP_PTRINOUT *
#define CMP_INOUT
#define CMP_OUT
#define CMP_IN
#define CMP_UNUSED(x) (void)(x);
#define CMP_UNROLL
#ifdef ASPM // SPMD ,SIMD CPU code #ifdef ASPM // SPMD ,SIMD CPU code
// using hybrid (CPU/GPU) aspm compiler // using hybrid (CPU/GPU) aspm compiler
#define ASPM_PRINT(args) print args #define ASPM_PRINT(args) print args
@ -185,6 +313,8 @@ typedef enum {
typedef unsigned int64 uint64; typedef unsigned int64 uint64;
typedef uniform float CGU_FLOAT; typedef uniform float CGU_FLOAT;
typedef varying float CGV_FLOAT; typedef varying float CGV_FLOAT;
typedef uniform float CGU_MIN16_FLOAT;
typedef uniform uint8 CGU_UINT8; typedef uniform uint8 CGU_UINT8;
typedef varying uint8 CGV_UINT8; typedef varying uint8 CGV_UINT8;
@ -192,18 +322,24 @@ typedef enum {
typedef CGV_UINT8<4> CGV_Vec4uc; typedef CGV_UINT8<4> CGV_Vec4uc;
typedef CGU_UINT8<4> CGU_Vec4uc; typedef CGU_UINT8<4> CGU_Vec4uc;
typedef CGU_FLOAT<3> CGU_Vec3f;
typedef CGV_FLOAT<3> CGV_Vec3f;
typedef CGU_FLOAT<2> CGU_Vec2f; typedef CGU_FLOAT<2> CGU_Vec2f;
typedef CGV_FLOAT<2> CGV_Vec2f; typedef CGV_FLOAT<2> CGV_Vec2f;
typedef CGU_FLOAT<3> CGU_Vec3f;
typedef CGV_FLOAT<3> CGV_Vec3f;
typedef CGU_FLOAT<4> CGU_Vec4f;
typedef CGV_FLOAT<4> CGV_Vec4f;
typedef CGU_UINT32<3> CGU_Vec3ui;
typedef CGV_UINT32<3> CGV_Vec3ui;
typedef CGU_UINT32<4> CGU_Vec4ui;
typedef CGV_UINT32<4> CGV_Vec4ui;
#define CMP_CDECL #define CMP_CDECL
#else // standard CPU code #else // standard CPU code
#include <stdio.h> #include <stdio.h>
#include <string> #include <string>
#include "cmp_math_vec4.h" #include INC_cmp_math_vec4
// using CPU compiler // using CPU compiler
#define ASPM_PRINT(args) printf args #define ASPM_PRINT(args) printf args
@ -227,7 +363,7 @@ typedef enum {
typedef unsigned long uint64; typedef unsigned long uint64;
typedef int8 CGV_BOOL; typedef int8 CGV_BOOL;
typedef int8 CGU_BOOL; typedef bool CGU_BOOL;
typedef int16 CGU_WORD; typedef int16 CGU_WORD;
typedef uint8 CGU_SHORT; typedef uint8 CGU_SHORT;
typedef int64 CGU_LONG; typedef int64 CGU_LONG;
@ -235,8 +371,19 @@ typedef enum {
typedef uniform float CGU_FLOAT; typedef uniform float CGU_FLOAT;
typedef varying float CGV_FLOAT; typedef varying float CGV_FLOAT;
typedef uniform float CGU_MIN16_FLOAT;
typedef uniform uint8 CGU_UINT8; typedef uniform uint8 CGU_UINT8;
typedef varying uint8 CGV_UINT8; typedef varying uint8 CGV_UINT8;
typedef CMP_Vec3ui CGU_Vec3ui;
typedef CMP_Vec3ui CGV_Vec3ui;
typedef CMP_Vec4ui CGU_Vec4ui;
typedef CMP_Vec4ui CGV_Vec4ui;
typedef CMP_Vec4f CGU_Vec4f;
typedef CMP_Vec4f CGV_Vec4f;
#if defined(WIN32) || defined(_WIN64) #if defined(WIN32) || defined(_WIN64)
#define CMP_CDECL __cdecl #define CMP_CDECL __cdecl
#else #else
@ -275,9 +422,10 @@ typedef enum {
typedef uint16 CGV_UINT16; typedef uint16 CGV_UINT16;
typedef uint32 CGV_UINT32; typedef uint32 CGV_UINT32;
typedef uint64 CGV_UINT64; typedef uint64 CGV_UINT64;
#endif // ASPM_GPU
#endif // else ASPM_GPU
typedef struct typedef struct
{ {
CGU_UINT32 m_src_width; CGU_UINT32 m_src_width;
@ -287,14 +435,20 @@ typedef struct
CGU_FLOAT m_fquality; CGU_FLOAT m_fquality;
} Source_Info; } Source_Info;
typedef unsigned char* CGU_PTR;
// Ref Compute_CPU_HPC // Ref Compute_CPU_HPC
struct texture_surface struct texture_surface
{ {
CGU_UINT8* ptr; CGU_PTR ptr;
CGU_INT width, CGU_INT width,
height, height,
stride; stride;
CGU_INT channels; CGU_INT channels;
}; };
#endif
#endif // else ASPM_HLSL
#endif // Common_Def.h

View File

@ -12,36 +12,55 @@ echo %mypath:~0,-1%
IF NOT EXIST "%outpath%"\Plugins mkdir %BUILD_OUTDIR%Plugins IF NOT EXIST "%outpath%"\Plugins mkdir %BUILD_OUTDIR%Plugins
IF NOT EXIST "%outpath%"\Plugins\Compute mkdir %BUILD_OUTDIR%Plugins\Compute IF NOT EXIST "%outpath%"\Plugins\Compute mkdir %BUILD_OUTDIR%Plugins\Compute
REM Build Vulkan Shader Binary REM ToDo: Build Vulkan based shaders
REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1.comp -o %BUILD_OUTDIR%\Plugins\Compute\BC1.spv REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1... -o %BUILD_OUTDIR%\Plugins\Compute\BC1....spv
REM IF %ERRORLEVEL% GTR 0 exit 123 REM IF %ERRORLEVEL% GTR 0 exit 123
REM Enabled in v4.0 REM Remove any OpenCL compiled Binaries
REM REM
REM del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_Kernel.cpp.cmp del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_Kernel.cpp.cmp del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_kernel.hlsl.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_Kernel.cpp.cmp del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_Kernel.cpp.cmp del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_kernel.hlsl.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_Kernel.cpp.cmp del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_Kernel.cpp.cmp del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_kernel.hlsl.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_kernel.cpp.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_kernel.hlsl.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_kernel.cpp.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_kernel.hlsl.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.cpp.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.0.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.1.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.0.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.1.cmp
del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.2.cmp
XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
echo "Dependencies copied done" echo "Dependencies copied done"

View File

@ -1,5 +1,5 @@
//===================================================================== //=====================================================================
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -19,7 +19,7 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE. // THE SOFTWARE.
// //
/// \file CMP_Core.h /// \file CMP_Core.h CPU User Interface
// //
//===================================================================== //=====================================================================

143
extern/CMP_Core/source/cmp_math_func.h vendored Normal file
View File

@ -0,0 +1,143 @@
//=====================================================================
// Copyright 2020 (c), Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#ifndef CMP_MATH_FUNC_H
#define CMP_MATH_FUNC_H
#include "Common_Def.h"
#ifndef ASPM_GPU
//============================================================================
// Core API which have have GPU equivalents, defined here for HPC_CPU usage
//============================================================================
#include <algorithm>
using namespace std;
static CGU_INT QSortFCmp(const void *Elem1, const void *Elem2) {
CGU_INT ret = 0;
if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 < 0.)
ret = -1;
else if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 > 0.)
ret = 1;
return ret;
}
static int QSortIntCmp(const void *Elem1, const void *Elem2)
{
return (*(CGU_INT32 *)Elem1 - *(CGU_INT32 *)Elem2);
}
static CGU_FLOAT dot(CMP_IN CGU_Vec3f Color,CMP_IN CGU_Vec3f Color2)
{
CGU_FLOAT ColorDot;
ColorDot = (Color.x * Color2.x) + (Color.y * Color2.y) + (Color.z * Color2.z);
return ColorDot;
}
static CGU_FLOAT dot(CMP_IN CGU_Vec2f Color,CMP_IN CGU_Vec2f Color2)
{
CGU_FLOAT ColorDot;
ColorDot = Color.x * Color2.x + Color.y * Color2.y;
return ColorDot;
}
static CGU_Vec2f abs(CMP_IN CGU_Vec2f Color)
{
CGU_Vec2f ColorAbs;
ColorAbs.x = std::abs(Color.x);
ColorAbs.y = std::abs(Color.y);
return ColorAbs;
}
static CGU_Vec3f fabs(CMP_IN CGU_Vec3f Color)
{
CGU_Vec3f ColorAbs;
ColorAbs.x = std::abs(Color.x);
ColorAbs.y = std::abs(Color.y);
ColorAbs.z = std::abs(Color.z);
return ColorAbs;
}
static CGU_Vec3f round(CMP_IN CGU_Vec3f Color)
{
CGU_Vec3f ColorRound;
ColorRound.x = std::round(Color.x);
ColorRound.y = std::round(Color.y);
ColorRound.z = std::round(Color.z);
return ColorRound;
}
static CGU_Vec2f round(CMP_IN CGU_Vec2f Color)
{
CGU_Vec2f ColorRound;
ColorRound.x = std::round(Color.x);
ColorRound.y = std::round(Color.y);
return ColorRound;
}
static CGU_Vec3f ceil(CMP_IN CGU_Vec3f Color)
{
CGU_Vec3f ColorCeil;
ColorCeil.x = std::ceil(Color.x);
ColorCeil.y = std::ceil(Color.y);
ColorCeil.z = std::ceil(Color.z);
return ColorCeil;
}
static CGU_Vec3f floor(CMP_IN CGU_Vec3f Color)
{
CGU_Vec3f Colorfloor;
Colorfloor.x = std::floor(Color.x);
Colorfloor.y = std::floor(Color.y);
Colorfloor.z = std::floor(Color.z);
return Colorfloor;
}
static CGU_Vec3f saturate(CGU_Vec3f value)
{
if (value.x > 1.0f) value.x = 1.0f;
else
if (value.x < 0.0f) value.x = 0.0f;
if (value.y > 1.0f) value.y = 1.0f;
else
if (value.y < 0.0f) value.y = 0.0f;
if (value.z > 1.0f) value.z = 1.0f;
else
if (value.z < 0.0f) value.z = 0.0f;
return value;
}
#endif
//============================================================================
// Core API which are shared between GPU & CPU
//============================================================================
#endif // Header Guard

View File

@ -30,14 +30,16 @@
#if defined (_LINUX) || defined (_WIN32) #if defined (_LINUX) || defined (_WIN32)
//============================================= VEC2 ================================================== //============================================= VEC2 ==================================================
template <class T> class vec3;
template<class T> template<class T>
class Vec2 class Vec2
{ {
public: public:
T x; T x;
T y; T y;
// ***************************************** // *****************************************
// Constructors // Constructors
// ***************************************** // *****************************************
@ -54,7 +56,6 @@ public:
/// Single value constructor. Sets all components to the given value /// Single value constructor. Sets all components to the given value
Vec2(const T& v) : x(v), y(v) {}; Vec2(const T& v) : x(v), y(v) {};
// ***************************************** // *****************************************
// Conversions/Assignment/Indexing // Conversions/Assignment/Indexing
// ***************************************** // *****************************************
@ -92,6 +93,13 @@ public:
/// Subtraction /// Subtraction
const Vec2<T> operator-(const Vec2<T>& rhs) const { return Vec2<T>(x - rhs.x, y - rhs.y); }; const Vec2<T> operator-(const Vec2<T>& rhs) const { return Vec2<T>(x - rhs.x, y - rhs.y); };
/// Multiply
const Vec2<T> operator*(const Vec2<T>& rhs) const { return Vec2<T>(x * rhs.x, y * rhs.y); };
/// Divide
const Vec2<T> operator/(const Vec2<T>& rhs) const { return Vec2<T>(x / rhs.x, y / rhs.y); };
/// Multiply by scalar /// Multiply by scalar
const Vec2<T> operator*(const T& v) const { return Vec2<T>(x * v, y * v); }; const Vec2<T> operator*(const T& v) const { return Vec2<T>(x * v, y * v); };
@ -113,11 +121,12 @@ public:
}; };
typedef Vec2<float> CMP_Vec2f; typedef Vec2<float> CMP_Vec2f;
typedef Vec2<float> CGU_Vec2f; typedef Vec2<float> CGU_Vec2f;
typedef Vec2<float> CGV_Vec2f; typedef Vec2<float> CGV_Vec2f;
typedef Vec2<double> CMP_Vec2d; typedef Vec2<double> CMP_Vec2d;
typedef Vec2<int> CMP_Vec2i; typedef Vec2<int> CMP_Vec2i;
typedef Vec2<unsigned int> CGU_Vec2ui;
//} //}
@ -134,6 +143,7 @@ public:
T y; T y;
T z; T z;
// ***************************************** // *****************************************
// Constructors // Constructors
// ***************************************** // *****************************************
@ -180,21 +190,24 @@ public:
// Arithmetic // Arithmetic
// ***************************************** // *****************************************
/// Addition /// Addition by vector
const Vec3<T> operator+(const Vec3<T>& rhs) const { return Vec3<T>(x + rhs.x, y + rhs.y, z + rhs.z); }; const Vec3<T> operator+(const Vec3<T>& rhs) const { return Vec3<T>(x + rhs.x, y + rhs.y, z + rhs.z); };
/// Subtraction /// Subtraction by vector
const Vec3<T> operator-(const Vec3<T>& rhs) const { return Vec3<T>(x - rhs.x, y - rhs.y, z - rhs.z); }; const Vec3<T> operator-(const Vec3<T>& rhs) const { return Vec3<T>(x - rhs.x, y - rhs.y, z - rhs.z); };
/// Multiply by vector
const Vec3<T> operator*(const Vec3<T>& rhs) const { return Vec3<T>(x * rhs.x, y * rhs.y, z * rhs.z); };
/// Divide by vector
const Vec3<T> operator/(const Vec3<T>& rhs) const { return Vec3<T>(x / rhs.x, y / rhs.y, z / rhs.z); };
/// Multiply by scalar /// Multiply by scalar
const Vec3<T> operator*(const T& v) const { return Vec3<T>(x * v, y * v, z * v); }; const Vec3<T> operator*(const T& v) const { return Vec3<T>(x * v, y * v, z * v); };
/// Divide by scalar /// Divide by scalar
const Vec3<T> operator/(const T& v) const { return Vec3<T>(x / v, y / v, z / v); }; const Vec3<T> operator/(const T& v) const { return Vec3<T>(x / v, y / v, z / v); };
/// Divide by vector
const Vec3<T> operator/(const Vec3<T>& rhs) const { return Vec3<T>(x / rhs.x, y / rhs.y, z / rhs.z); };
/// Addition in-place /// Addition in-place
Vec3<T>& operator+= (const Vec3<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; }; Vec3<T>& operator+= (const Vec3<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; };
@ -208,6 +221,7 @@ public:
Vec3<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; return *this; }; Vec3<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; return *this; };
}; };
typedef Vec3<bool> CGU_Vec3bool;
typedef Vec3<float> CGU_Vec3f; typedef Vec3<float> CGU_Vec3f;
typedef Vec3<float> CGV_Vec3f; typedef Vec3<float> CGV_Vec3f;
typedef Vec3<unsigned char> CGU_Vec3uc; typedef Vec3<unsigned char> CGU_Vec3uc;
@ -217,6 +231,7 @@ typedef Vec3<float> CMP_Vec3f;
typedef Vec3<double> CMP_Vec3d; typedef Vec3<double> CMP_Vec3d;
typedef Vec3<int> CMP_Vec3i; typedef Vec3<int> CMP_Vec3i;
typedef Vec3<unsigned char> CMP_Vec3uc; typedef Vec3<unsigned char> CMP_Vec3uc;
typedef Vec3<unsigned int> CMP_Vec3ui;
//============================================= VEC4 ================================================== //============================================= VEC4 ==================================================
template<class T> template<class T>
@ -275,21 +290,24 @@ public:
// Arithmetic // Arithmetic
// ***************************************** // *****************************************
/// Addition /// Addition by vector
const Vec4<T> operator+(const Vec4<T>& rhs) const { return Vec4<T>(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); }; const Vec4<T> operator+(const Vec4<T>& rhs) const { return Vec4<T>(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); };
/// Subtraction /// Subtraction by vector
const Vec4<T> operator-(const Vec4<T>& rhs) const { return Vec4<T>(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); }; const Vec4<T> operator-(const Vec4<T>& rhs) const { return Vec4<T>(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); };
/// Multiply by vector
const Vec4<T> operator*(const Vec4<T>& rhs) const { return Vec4<T>(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w); };
/// Divide by vector
const Vec4<T> operator/(const Vec4<T>& rhs) const { return Vec4<T>(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); };
/// Multiply by scalar /// Multiply by scalar
const Vec4<T> operator*(const T& v) const { return Vec4<T>(x * v, y * v, z * v, w * v); }; const Vec4<T> operator*(const T& v) const { return Vec4<T>(x * v, y * v, z * v, w * v); };
/// Divide by scalar /// Divide by scalar
const Vec4<T> operator/(const T& v) const { return Vec4<T>(x / v, y / v, z / v, w / v); }; const Vec4<T> operator/(const T& v) const { return Vec4<T>(x / v, y / v, z / v, w / v); };
/// Divide by vector
const Vec4<T> operator/(const Vec4<T>& rhs) const { return Vec4<T>(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); };
/// Addition in-place /// Addition in-place
Vec4<T>& operator+= (const Vec4<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; }; Vec4<T>& operator+= (const Vec4<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; };

View File

@ -3,6 +3,7 @@
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
struct Block { const unsigned char* data; const unsigned char* color; }; struct Block { const unsigned char* data; const unsigned char* color; };
struct BlockBC6 { const unsigned char* data; const float* color; };
static const unsigned char BC1_Red_Ignore_Alpha [] {0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; static const unsigned char BC1_Red_Ignore_Alpha [] {0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static const unsigned char BC1_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
@ -76,6 +77,102 @@ static const unsigned char BC3_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0
static const unsigned char BC3_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; static const unsigned char BC3_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; static const unsigned char BC3_Red_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; static const unsigned char BC3_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Red_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_White_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Black_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_Red_Blue_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Red_Green_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Green_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_Red_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_White_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Green_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_White_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Black_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_Red_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Red_Green_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Green_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC4_Red_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC4_Green_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Red_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_White_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Black_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Red_Blue_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Red_Green_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Green_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Red_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_White_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Green_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_White_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Black_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Red_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Red_Green_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Green_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC5_Red_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
static const unsigned char BC5_Green_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Ignore_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Blue_Half_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_White_Half_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Black_Half_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Blue_Half_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Green_Half_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Green_Blue_Half_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Full_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Green_Full_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Blue_Full_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_White_Full_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Green_Ignore_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Black_Full_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Blue_Full_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Green_Full_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Green_Blue_Full_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Blue_Ignore_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_White_Ignore_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Black_Ignore_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Blue_Ignore_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Green_Ignore_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Green_Blue_Ignore_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Red_Half_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC6_Green_Half_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Ignore_Alpha [] {0x10, 0xff, 0x3 , 0x0 , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Blue_Half_Alpha [] {0x20, 0x0 , 0x0 , 0x0 , 0xf0, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_White_Half_Alpha [] {0x20, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Black_Half_Alpha [] {0x20, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Blue_Half_Alpha [] {0x20, 0xff, 0x3f, 0x0 , 0xf0, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Green_Half_Alpha [] {0x20, 0xff, 0xff, 0xff, 0xf , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Green_Blue_Half_Alpha [] {0x20, 0x0 , 0xc0, 0xff, 0xff, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Full_Alpha [] {0x10, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Green_Full_Alpha [] {0x10, 0x0 , 0xfc, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Blue_Full_Alpha [] {0x10, 0x0 , 0x0 , 0xf0, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_White_Full_Alpha [] {0x10, 0xff, 0xff, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Green_Ignore_Alpha [] {0x10, 0x0 , 0xfc, 0xf , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Black_Full_Alpha [] {0x10, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Blue_Full_Alpha [] {0x10, 0xff, 0x3 , 0xf0, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Green_Full_Alpha [] {0x10, 0xff, 0xff, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Green_Blue_Full_Alpha [] {0x10, 0x0 , 0xfc, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Blue_Ignore_Alpha [] {0x10, 0x0 , 0x0 , 0xf0, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_White_Ignore_Alpha [] {0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Black_Ignore_Alpha [] {0x10, 0x0 , 0x0 , 0x0 , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Blue_Ignore_Alpha [] {0x10, 0xff, 0x3 , 0xf0, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Green_Ignore_Alpha [] {0x10, 0xff, 0xff, 0xf , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Green_Blue_Ignore_Alpha [] {0x10, 0x0 , 0xfc, 0xff, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Red_Half_Alpha [] {0x20, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC7_Green_Half_Alpha [] {0x20, 0x0 , 0xc0, 0xff, 0xf , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
Block BC1_Red_Ignore_Alpha_Block = {BC1_Red_Ignore_Alpha, nullptr}; Block BC1_Red_Ignore_Alpha_Block = {BC1_Red_Ignore_Alpha, nullptr};
Block BC1_Blue_Half_Alpha_Block = {BC1_Blue_Half_Alpha, nullptr}; Block BC1_Blue_Half_Alpha_Block = {BC1_Blue_Half_Alpha, nullptr};
@ -149,6 +246,102 @@ Block BC3_Red_Green_Ignore_Alpha_Block = {BC3_Red_Green_Ignore_Alpha, nullptr};
Block BC3_Green_Blue_Ignore_Alpha_Block = {BC3_Green_Blue_Ignore_Alpha, nullptr}; Block BC3_Green_Blue_Ignore_Alpha_Block = {BC3_Green_Blue_Ignore_Alpha, nullptr};
Block BC3_Red_Half_Alpha_Block = {BC3_Red_Half_Alpha, nullptr}; Block BC3_Red_Half_Alpha_Block = {BC3_Red_Half_Alpha, nullptr};
Block BC3_Green_Half_Alpha_Block = {BC3_Green_Half_Alpha, nullptr}; Block BC3_Green_Half_Alpha_Block = {BC3_Green_Half_Alpha, nullptr};
Block BC4_Red_Ignore_Alpha_Block = {BC4_Red_Ignore_Alpha, nullptr};
Block BC4_Blue_Half_Alpha_Block = {BC4_Blue_Half_Alpha, nullptr};
Block BC4_White_Half_Alpha_Block = {BC4_White_Half_Alpha, nullptr};
Block BC4_Black_Half_Alpha_Block = {BC4_Black_Half_Alpha, nullptr};
Block BC4_Red_Blue_Half_Alpha_Block = {BC4_Red_Blue_Half_Alpha, nullptr};
Block BC4_Red_Green_Half_Alpha_Block = {BC4_Red_Green_Half_Alpha, nullptr};
Block BC4_Green_Blue_Half_Alpha_Block = {BC4_Green_Blue_Half_Alpha, nullptr};
Block BC4_Red_Full_Alpha_Block = {BC4_Red_Full_Alpha, nullptr};
Block BC4_Green_Full_Alpha_Block = {BC4_Green_Full_Alpha, nullptr};
Block BC4_Blue_Full_Alpha_Block = {BC4_Blue_Full_Alpha, nullptr};
Block BC4_White_Full_Alpha_Block = {BC4_White_Full_Alpha, nullptr};
Block BC4_Green_Ignore_Alpha_Block = {BC4_Green_Ignore_Alpha, nullptr};
Block BC4_Black_Full_Alpha_Block = {BC4_Black_Full_Alpha, nullptr};
Block BC4_Red_Blue_Full_Alpha_Block = {BC4_Red_Blue_Full_Alpha, nullptr};
Block BC4_Red_Green_Full_Alpha_Block = {BC4_Red_Green_Full_Alpha, nullptr};
Block BC4_Green_Blue_Full_Alpha_Block = {BC4_Green_Blue_Full_Alpha, nullptr};
Block BC4_Blue_Ignore_Alpha_Block = {BC4_Blue_Ignore_Alpha, nullptr};
Block BC4_White_Ignore_Alpha_Block = {BC4_White_Ignore_Alpha, nullptr};
Block BC4_Black_Ignore_Alpha_Block = {BC4_Black_Ignore_Alpha, nullptr};
Block BC4_Red_Blue_Ignore_Alpha_Block = {BC4_Red_Blue_Ignore_Alpha, nullptr};
Block BC4_Red_Green_Ignore_Alpha_Block = {BC4_Red_Green_Ignore_Alpha, nullptr};
Block BC4_Green_Blue_Ignore_Alpha_Block = {BC4_Green_Blue_Ignore_Alpha, nullptr};
Block BC4_Red_Half_Alpha_Block = {BC4_Red_Half_Alpha, nullptr};
Block BC4_Green_Half_Alpha_Block = {BC4_Green_Half_Alpha, nullptr};
Block BC5_Red_Ignore_Alpha_Block = {BC5_Red_Ignore_Alpha, nullptr};
Block BC5_Blue_Half_Alpha_Block = {BC5_Blue_Half_Alpha, nullptr};
Block BC5_White_Half_Alpha_Block = {BC5_White_Half_Alpha, nullptr};
Block BC5_Black_Half_Alpha_Block = {BC5_Black_Half_Alpha, nullptr};
Block BC5_Red_Blue_Half_Alpha_Block = {BC5_Red_Blue_Half_Alpha, nullptr};
Block BC5_Red_Green_Half_Alpha_Block = {BC5_Red_Green_Half_Alpha, nullptr};
Block BC5_Green_Blue_Half_Alpha_Block = {BC5_Green_Blue_Half_Alpha, nullptr};
Block BC5_Red_Full_Alpha_Block = {BC5_Red_Full_Alpha, nullptr};
Block BC5_Green_Full_Alpha_Block = {BC5_Green_Full_Alpha, nullptr};
Block BC5_Blue_Full_Alpha_Block = {BC5_Blue_Full_Alpha, nullptr};
Block BC5_White_Full_Alpha_Block = {BC5_White_Full_Alpha, nullptr};
Block BC5_Green_Ignore_Alpha_Block = {BC5_Green_Ignore_Alpha, nullptr};
Block BC5_Black_Full_Alpha_Block = {BC5_Black_Full_Alpha, nullptr};
Block BC5_Red_Blue_Full_Alpha_Block = {BC5_Red_Blue_Full_Alpha, nullptr};
Block BC5_Red_Green_Full_Alpha_Block = {BC5_Red_Green_Full_Alpha, nullptr};
Block BC5_Green_Blue_Full_Alpha_Block = {BC5_Green_Blue_Full_Alpha, nullptr};
Block BC5_Blue_Ignore_Alpha_Block = {BC5_Blue_Ignore_Alpha, nullptr};
Block BC5_White_Ignore_Alpha_Block = {BC5_White_Ignore_Alpha, nullptr};
Block BC5_Black_Ignore_Alpha_Block = {BC5_Black_Ignore_Alpha, nullptr};
Block BC5_Red_Blue_Ignore_Alpha_Block = {BC5_Red_Blue_Ignore_Alpha, nullptr};
Block BC5_Red_Green_Ignore_Alpha_Block = {BC5_Red_Green_Ignore_Alpha, nullptr};
Block BC5_Green_Blue_Ignore_Alpha_Block = {BC5_Green_Blue_Ignore_Alpha, nullptr};
Block BC5_Red_Half_Alpha_Block = {BC5_Red_Half_Alpha, nullptr};
Block BC5_Green_Half_Alpha_Block = {BC5_Green_Half_Alpha, nullptr};
BlockBC6 BC6_Red_Ignore_Alpha_Block = {BC6_Red_Ignore_Alpha, nullptr};
BlockBC6 BC6_Blue_Half_Alpha_Block = {BC6_Blue_Half_Alpha, nullptr};
BlockBC6 BC6_White_Half_Alpha_Block = {BC6_White_Half_Alpha, nullptr};
BlockBC6 BC6_Black_Half_Alpha_Block = {BC6_Black_Half_Alpha, nullptr};
BlockBC6 BC6_Red_Blue_Half_Alpha_Block = {BC6_Red_Blue_Half_Alpha, nullptr};
BlockBC6 BC6_Red_Green_Half_Alpha_Block = {BC6_Red_Green_Half_Alpha, nullptr};
BlockBC6 BC6_Green_Blue_Half_Alpha_Block = {BC6_Green_Blue_Half_Alpha, nullptr};
BlockBC6 BC6_Red_Full_Alpha_Block = {BC6_Red_Full_Alpha, nullptr};
BlockBC6 BC6_Green_Full_Alpha_Block = {BC6_Green_Full_Alpha, nullptr};
BlockBC6 BC6_Blue_Full_Alpha_Block = {BC6_Blue_Full_Alpha, nullptr};
BlockBC6 BC6_White_Full_Alpha_Block = {BC6_White_Full_Alpha, nullptr};
BlockBC6 BC6_Green_Ignore_Alpha_Block = {BC6_Green_Ignore_Alpha, nullptr};
BlockBC6 BC6_Black_Full_Alpha_Block = {BC6_Black_Full_Alpha, nullptr};
BlockBC6 BC6_Red_Blue_Full_Alpha_Block = {BC6_Red_Blue_Full_Alpha, nullptr};
BlockBC6 BC6_Red_Green_Full_Alpha_Block = {BC6_Red_Green_Full_Alpha, nullptr};
BlockBC6 BC6_Green_Blue_Full_Alpha_Block = {BC6_Green_Blue_Full_Alpha, nullptr};
BlockBC6 BC6_Blue_Ignore_Alpha_Block = {BC6_Blue_Ignore_Alpha, nullptr};
BlockBC6 BC6_White_Ignore_Alpha_Block = {BC6_White_Ignore_Alpha, nullptr};
BlockBC6 BC6_Black_Ignore_Alpha_Block = {BC6_Black_Ignore_Alpha, nullptr};
BlockBC6 BC6_Red_Blue_Ignore_Alpha_Block = {BC6_Red_Blue_Ignore_Alpha, nullptr};
BlockBC6 BC6_Red_Green_Ignore_Alpha_Block = {BC6_Red_Green_Ignore_Alpha, nullptr};
BlockBC6 BC6_Green_Blue_Ignore_Alpha_Block = {BC6_Green_Blue_Ignore_Alpha, nullptr};
BlockBC6 BC6_Red_Half_Alpha_Block = {BC6_Red_Half_Alpha, nullptr};
BlockBC6 BC6_Green_Half_Alpha_Block = {BC6_Green_Half_Alpha, nullptr};
Block BC7_Red_Ignore_Alpha_Block = {BC7_Red_Ignore_Alpha, nullptr};
Block BC7_Blue_Half_Alpha_Block = {BC7_Blue_Half_Alpha, nullptr};
Block BC7_White_Half_Alpha_Block = {BC7_White_Half_Alpha, nullptr};
Block BC7_Black_Half_Alpha_Block = {BC7_Black_Half_Alpha, nullptr};
Block BC7_Red_Blue_Half_Alpha_Block = {BC7_Red_Blue_Half_Alpha, nullptr};
Block BC7_Red_Green_Half_Alpha_Block = {BC7_Red_Green_Half_Alpha, nullptr};
Block BC7_Green_Blue_Half_Alpha_Block = {BC7_Green_Blue_Half_Alpha, nullptr};
Block BC7_Red_Full_Alpha_Block = {BC7_Red_Full_Alpha, nullptr};
Block BC7_Green_Full_Alpha_Block = {BC7_Green_Full_Alpha, nullptr};
Block BC7_Blue_Full_Alpha_Block = {BC7_Blue_Full_Alpha, nullptr};
Block BC7_White_Full_Alpha_Block = {BC7_White_Full_Alpha, nullptr};
Block BC7_Green_Ignore_Alpha_Block = {BC7_Green_Ignore_Alpha, nullptr};
Block BC7_Black_Full_Alpha_Block = {BC7_Black_Full_Alpha, nullptr};
Block BC7_Red_Blue_Full_Alpha_Block = {BC7_Red_Blue_Full_Alpha, nullptr};
Block BC7_Red_Green_Full_Alpha_Block = {BC7_Red_Green_Full_Alpha, nullptr};
Block BC7_Green_Blue_Full_Alpha_Block = {BC7_Green_Blue_Full_Alpha, nullptr};
Block BC7_Blue_Ignore_Alpha_Block = {BC7_Blue_Ignore_Alpha, nullptr};
Block BC7_White_Ignore_Alpha_Block = {BC7_White_Ignore_Alpha, nullptr};
Block BC7_Black_Ignore_Alpha_Block = {BC7_Black_Ignore_Alpha, nullptr};
Block BC7_Red_Blue_Ignore_Alpha_Block = {BC7_Red_Blue_Ignore_Alpha, nullptr};
Block BC7_Red_Green_Ignore_Alpha_Block = {BC7_Red_Green_Ignore_Alpha, nullptr};
Block BC7_Green_Blue_Ignore_Alpha_Block = {BC7_Green_Blue_Ignore_Alpha, nullptr};
Block BC7_Red_Half_Alpha_Block = {BC7_Red_Half_Alpha, nullptr};
Block BC7_Green_Half_Alpha_Block = {BC7_Green_Half_Alpha, nullptr};
static std::unordered_map<std::string, Block> blocks { static std::unordered_map<std::string, Block> blocks {
{ "BC1_Red_Ignore_Alpha", BC1_Red_Ignore_Alpha_Block}, { "BC1_Red_Ignore_Alpha", BC1_Red_Ignore_Alpha_Block},
@ -222,7 +415,106 @@ static std::unordered_map<std::string, Block> blocks {
{ "BC3_Red_Green_Ignore_Alpha", BC3_Red_Green_Ignore_Alpha_Block}, { "BC3_Red_Green_Ignore_Alpha", BC3_Red_Green_Ignore_Alpha_Block},
{ "BC3_Green_Blue_Ignore_Alpha", BC3_Green_Blue_Ignore_Alpha_Block}, { "BC3_Green_Blue_Ignore_Alpha", BC3_Green_Blue_Ignore_Alpha_Block},
{ "BC3_Red_Half_Alpha", BC3_Red_Half_Alpha_Block}, { "BC3_Red_Half_Alpha", BC3_Red_Half_Alpha_Block},
{ "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block} { "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block},
{ "BC4_Red_Ignore_Alpha", BC4_Red_Ignore_Alpha_Block},
{ "BC4_Blue_Half_Alpha", BC4_Blue_Half_Alpha_Block},
{ "BC4_White_Half_Alpha", BC4_White_Half_Alpha_Block},
{ "BC4_Black_Half_Alpha", BC4_Black_Half_Alpha_Block},
{ "BC4_Red_Blue_Half_Alpha", BC4_Red_Blue_Half_Alpha_Block},
{ "BC4_Red_Green_Half_Alpha", BC4_Red_Green_Half_Alpha_Block},
{ "BC4_Green_Blue_Half_Alpha", BC4_Green_Blue_Half_Alpha_Block},
{ "BC4_Red_Full_Alpha", BC4_Red_Full_Alpha_Block},
{ "BC4_Green_Full_Alpha", BC4_Green_Full_Alpha_Block},
{ "BC4_Blue_Full_Alpha", BC4_Blue_Full_Alpha_Block},
{ "BC4_White_Full_Alpha", BC4_White_Full_Alpha_Block},
{ "BC4_Green_Ignore_Alpha", BC4_Green_Ignore_Alpha_Block},
{ "BC4_Black_Full_Alpha", BC4_Black_Full_Alpha_Block},
{ "BC4_Red_Blue_Full_Alpha", BC4_Red_Blue_Full_Alpha_Block},
{ "BC4_Red_Green_Full_Alpha", BC4_Red_Green_Full_Alpha_Block},
{ "BC4_Green_Blue_Full_Alpha", BC4_Green_Blue_Full_Alpha_Block},
{ "BC4_Blue_Ignore_Alpha", BC4_Blue_Ignore_Alpha_Block},
{ "BC4_White_Ignore_Alpha", BC4_White_Ignore_Alpha_Block},
{ "BC4_Black_Ignore_Alpha", BC4_Black_Ignore_Alpha_Block},
{ "BC4_Red_Blue_Ignore_Alpha", BC4_Red_Blue_Ignore_Alpha_Block},
{ "BC4_Red_Green_Ignore_Alpha", BC4_Red_Green_Ignore_Alpha_Block},
{ "BC4_Green_Blue_Ignore_Alpha", BC4_Green_Blue_Ignore_Alpha_Block},
{ "BC4_Red_Half_Alpha", BC4_Red_Half_Alpha_Block},
{ "BC4_Green_Half_Alpha", BC4_Green_Half_Alpha_Block},
{ "BC5_Red_Ignore_Alpha", BC5_Red_Ignore_Alpha_Block},
{ "BC5_Blue_Half_Alpha", BC5_Blue_Half_Alpha_Block},
{ "BC5_White_Half_Alpha", BC5_White_Half_Alpha_Block},
{ "BC5_Black_Half_Alpha", BC5_Black_Half_Alpha_Block},
{ "BC5_Red_Blue_Half_Alpha", BC5_Red_Blue_Half_Alpha_Block},
{ "BC5_Red_Green_Half_Alpha", BC5_Red_Green_Half_Alpha_Block},
{ "BC5_Green_Blue_Half_Alpha", BC5_Green_Blue_Half_Alpha_Block},
{ "BC5_Red_Full_Alpha", BC5_Red_Full_Alpha_Block},
{ "BC5_Green_Full_Alpha", BC5_Green_Full_Alpha_Block},
{ "BC5_Blue_Full_Alpha", BC5_Blue_Full_Alpha_Block},
{ "BC5_White_Full_Alpha", BC5_White_Full_Alpha_Block},
{ "BC5_Green_Ignore_Alpha", BC5_Green_Ignore_Alpha_Block},
{ "BC5_Black_Full_Alpha", BC5_Black_Full_Alpha_Block},
{ "BC5_Red_Blue_Full_Alpha", BC5_Red_Blue_Full_Alpha_Block},
{ "BC5_Red_Green_Full_Alpha", BC5_Red_Green_Full_Alpha_Block},
{ "BC5_Green_Blue_Full_Alpha", BC5_Green_Blue_Full_Alpha_Block},
{ "BC5_Blue_Ignore_Alpha", BC5_Blue_Ignore_Alpha_Block},
{ "BC5_White_Ignore_Alpha", BC5_White_Ignore_Alpha_Block},
{ "BC5_Black_Ignore_Alpha", BC5_Black_Ignore_Alpha_Block},
{ "BC5_Red_Blue_Ignore_Alpha", BC5_Red_Blue_Ignore_Alpha_Block},
{ "BC5_Red_Green_Ignore_Alpha", BC5_Red_Green_Ignore_Alpha_Block},
{ "BC5_Green_Blue_Ignore_Alpha", BC5_Green_Blue_Ignore_Alpha_Block},
{ "BC5_Red_Half_Alpha", BC5_Red_Half_Alpha_Block},
{ "BC5_Green_Half_Alpha", BC5_Green_Half_Alpha_Block},
{ "BC7_Red_Ignore_Alpha", BC7_Red_Ignore_Alpha_Block},
{ "BC7_Blue_Half_Alpha", BC7_Blue_Half_Alpha_Block},
{ "BC7_White_Half_Alpha", BC7_White_Half_Alpha_Block},
{ "BC7_Black_Half_Alpha", BC7_Black_Half_Alpha_Block},
{ "BC7_Red_Blue_Half_Alpha", BC7_Red_Blue_Half_Alpha_Block},
{ "BC7_Red_Green_Half_Alpha", BC7_Red_Green_Half_Alpha_Block},
{ "BC7_Green_Blue_Half_Alpha", BC7_Green_Blue_Half_Alpha_Block},
{ "BC7_Red_Full_Alpha", BC7_Red_Full_Alpha_Block},
{ "BC7_Green_Full_Alpha", BC7_Green_Full_Alpha_Block},
{ "BC7_Blue_Full_Alpha", BC7_Blue_Full_Alpha_Block},
{ "BC7_White_Full_Alpha", BC7_White_Full_Alpha_Block},
{ "BC7_Green_Ignore_Alpha", BC7_Green_Ignore_Alpha_Block},
{ "BC7_Black_Full_Alpha", BC7_Black_Full_Alpha_Block},
{ "BC7_Red_Blue_Full_Alpha", BC7_Red_Blue_Full_Alpha_Block},
{ "BC7_Red_Green_Full_Alpha", BC7_Red_Green_Full_Alpha_Block},
{ "BC7_Green_Blue_Full_Alpha", BC7_Green_Blue_Full_Alpha_Block},
{ "BC7_Blue_Ignore_Alpha", BC7_Blue_Ignore_Alpha_Block},
{ "BC7_White_Ignore_Alpha", BC7_White_Ignore_Alpha_Block},
{ "BC7_Black_Ignore_Alpha", BC7_Black_Ignore_Alpha_Block},
{ "BC7_Red_Blue_Ignore_Alpha", BC7_Red_Blue_Ignore_Alpha_Block},
{ "BC7_Red_Green_Ignore_Alpha", BC7_Red_Green_Ignore_Alpha_Block},
{ "BC7_Green_Blue_Ignore_Alpha", BC7_Green_Blue_Ignore_Alpha_Block},
{ "BC7_Red_Half_Alpha", BC7_Red_Half_Alpha_Block},
{ "BC7_Green_Half_Alpha", BC7_Green_Half_Alpha_Block}
};
static std::unordered_map<std::string, BlockBC6> blocksBC6 {
{ "BC6_Red_Ignore_Alpha", BC6_Red_Ignore_Alpha_Block},
{ "BC6_Blue_Half_Alpha", BC6_Blue_Half_Alpha_Block},
{ "BC6_White_Half_Alpha", BC6_White_Half_Alpha_Block},
{ "BC6_Black_Half_Alpha", BC6_Black_Half_Alpha_Block},
{ "BC6_Red_Blue_Half_Alpha", BC6_Red_Blue_Half_Alpha_Block},
{ "BC6_Red_Green_Half_Alpha", BC6_Red_Green_Half_Alpha_Block},
{ "BC6_Green_Blue_Half_Alpha", BC6_Green_Blue_Half_Alpha_Block},
{ "BC6_Red_Full_Alpha", BC6_Red_Full_Alpha_Block},
{ "BC6_Green_Full_Alpha", BC6_Green_Full_Alpha_Block},
{ "BC6_Blue_Full_Alpha", BC6_Blue_Full_Alpha_Block},
{ "BC6_White_Full_Alpha", BC6_White_Full_Alpha_Block},
{ "BC6_Green_Ignore_Alpha", BC6_Green_Ignore_Alpha_Block},
{ "BC6_Black_Full_Alpha", BC6_Black_Full_Alpha_Block},
{ "BC6_Red_Blue_Full_Alpha", BC6_Red_Blue_Full_Alpha_Block},
{ "BC6_Red_Green_Full_Alpha", BC6_Red_Green_Full_Alpha_Block},
{ "BC6_Green_Blue_Full_Alpha", BC6_Green_Blue_Full_Alpha_Block},
{ "BC6_Blue_Ignore_Alpha", BC6_Blue_Ignore_Alpha_Block},
{ "BC6_White_Ignore_Alpha", BC6_White_Ignore_Alpha_Block},
{ "BC6_Black_Ignore_Alpha", BC6_Black_Ignore_Alpha_Block},
{ "BC6_Red_Blue_Ignore_Alpha", BC6_Red_Blue_Ignore_Alpha_Block},
{ "BC6_Red_Green_Ignore_Alpha", BC6_Red_Green_Ignore_Alpha_Block},
{ "BC6_Green_Blue_Ignore_Alpha", BC6_Green_Blue_Ignore_Alpha_Block},
{ "BC6_Red_Half_Alpha", BC6_Red_Half_Alpha_Block},
{ "BC6_Green_Half_Alpha", BC6_Green_Half_Alpha_Block}
}; };
#endif #endif

View File

@ -9,5 +9,7 @@ target_sources(Tests
CompressonatorTests.cpp CompressonatorTests.cpp
CompressonatorTests.h CompressonatorTests.h
BlockConstants.h BlockConstants.h
../../Applications/_Plugins/Common/UtilFuncs.cpp
../../Applications/_Plugins/Common/UtilFuncs.h
) )
target_link_libraries(Tests Catch2::Catch2 CMP_Core) target_link_libraries(Tests Catch2::Catch2 CMP_Core)

File diff suppressed because it is too large Load Diff