Upgrade CMP Core.

2020-07-05 23:05:07 -07:00 · 2020-07-05 23:05:07 -07:00 · 4ff7af50ca
commit 4ff7af50ca
parent 1e06539012
30 changed files with 10082 additions and 3060 deletions
--- a/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp
@ -1,5 +1,5 @@
-//=====================================================================
-// Copyright (c) 2019    Advanced Micro Devices, Inc. All rights reserved.
+//==============================================================================
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -19,397 +19,117 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 //
-//=====================================================================
-#include "BC1_Encode_kernel.h"
+//===============================================================================

-//============================================== BC1 INTERFACES  =======================================================
-void CompressBlockBC1_Fast(
-    CMP_Vec4uc  srcBlockTemp[16],
-    CMP_GLOBAL CGU_UINT32 compressedBlock[2])
-{
-    int i, k;
+// Heat Mapping
+// This is code that compares quality of two similar or equal codecs with varying quality settings
+// A resulting compressed codec data block is colored according to three colors conditions 
+// The base codec, lowest quality is colored green and the varying quality code is colored red.
+// If the quality of the base matches that of the varying codec then the color is set to blue 
+// Base codecs can be local to CMP_Core or imported using a external set of files, the base codec

-    CMP_Vec3f rgb;
-    CMP_Vec3f average_rgb;                  // The centrepoint of the axis
-    CMP_Vec3f v_rgb;                        // The axis
-    CMP_Vec3f uniques[16];                  // The list of unique colours
-    int unique_pixels;                     // The number of unique pixels
-    CGU_FLOAT unique_recip;                    // Reciprocal of the above for fast multiplication
-    int index_map[16];                     // The map of source pixels to unique indices
-                                    
-    CGU_FLOAT pos_on_axis[16];                 // The distance each unique falls along the compression axis
-    CGU_FLOAT dist_from_axis[16];              // The distance each unique falls from the compression axis
-    CGU_FLOAT left = 0, right = 0, centre = 0; // The extremities and centre (average of left/right) of uniques along the compression axis
-    CGU_FLOAT axis_mapping_error = 0;          // The total computed error in mapping pixels to the axis
-
-    int swap;                              // Indicator if the RGB values need swapping to generate an opaque result
-
-    // -------------------------------------------------------------------------------------
-    // (3) Find the array of unique pixel values and sum them to find their average position
-    // -------------------------------------------------------------------------------------
-    {
-        // Find the array of unique pixel values and sum them to find their average position      
-        int current_pixel, firstdiff;
-        current_pixel = unique_pixels = 0;
-        average_rgb = 0.0f;
-        firstdiff = -1;
-        for (i = 0; i<16; i++)
-        {
-                for (k = 0; k<i; k++)
-                    if ((((srcBlockTemp[k].x ^ srcBlockTemp[i].x) & 0xf8) == 0) && (((srcBlockTemp[k].y ^ srcBlockTemp[i].y) & 0xfc) == 0) && (((srcBlockTemp[k].z ^ srcBlockTemp[i].z) & 0xf8) == 0))
-                        break;
-                index_map[i] = current_pixel++;
-                //pixel_count[i] = 1;
-                CMP_Vec3f trgb;
-                rgb.x = (CGU_FLOAT)((srcBlockTemp[i].x) & 0xff);
-                rgb.y = (CGU_FLOAT)((srcBlockTemp[i].y) & 0xff);
-                rgb.z = (CGU_FLOAT)((srcBlockTemp[i].z) & 0xff);
-
-                trgb.x = CS_RED(rgb.x, rgb.y, rgb.z);
-                trgb.y = CS_GREEN(rgb.x, rgb.y, rgb.z);
-                trgb.z = CS_BLUE(rgb.x, rgb.y, rgb.z);
-                uniques[i] = trgb;
-
-                if (k == i)
-                {
-                    unique_pixels++;
-                    if ((i != 0) && (firstdiff < 0)) firstdiff = i;
-                }
-                average_rgb = average_rgb + trgb;
-        }
-
-        unique_pixels = 16;
-        // Compute average of the uniques
-        unique_recip = 1.0f / (CGU_FLOAT)unique_pixels;
-        average_rgb = average_rgb * unique_recip;
-    }
-
-    // -------------------------------------------------------------------------------------
-    // (4) For each component, reflect points about the average so all lie on the same side
-    // of the average, and compute the new average - this gives a second point that defines the axis
-    // To compute the sign of the axis sum the positive differences of G for each of R and B (the
-    // G axis is always positive in this implementation
-    // -------------------------------------------------------------------------------------
-    // An interesting situation occurs if the G axis contains no information, in which case the RB
-    // axis is also compared. I am not entirely sure if this is the correct implementation - should
-    // the priority axis be determined by magnitude?
-    {
-
-        CGU_FLOAT rg_pos, bg_pos, rb_pos;
-        v_rgb = 0.0f;
-        rg_pos = bg_pos = rb_pos = 0;
-
-        for (i = 0; i < unique_pixels; i++)
-        {
-            rgb = uniques[i] - average_rgb;
-
-#ifndef ASPM_GPU
-            v_rgb.x += (CGU_FLOAT)fabs(rgb.x);
-            v_rgb.y += (CGU_FLOAT)fabs(rgb.y);
-            v_rgb.z += (CGU_FLOAT)fabs(rgb.z);
-#else
-            v_rgb = v_rgb + fabs(rgb);
+#ifndef TEST_HEATMAP
+//#define TEST_HEATMAP   // Enable this to run heat map tests on BC1 codec
 #endif

-            if (rgb.x > 0) { rg_pos += rgb.y; rb_pos += rgb.z; }
-            if (rgb.z > 0) bg_pos += rgb.y;
-        }
-        v_rgb = v_rgb*unique_recip;
-        if (rg_pos < 0) v_rgb.x = -v_rgb.x;
-        if (bg_pos < 0) v_rgb.z = -v_rgb.z;
-        if ((rg_pos == bg_pos) && (rg_pos == 0))
-            if (rb_pos < 0) v_rgb.z = -v_rgb.z;
-    }
+#include "BC1_Encode_kernel.h"

-    // -------------------------------------------------------------------------------------
-    // (5) Axis projection and remapping
-    // -------------------------------------------------------------------------------------
-    {
-        CGU_FLOAT v2_recip;
-        // Normalise the axis for simplicity of future calculation
-        v2_recip = (v_rgb.x*v_rgb.x + v_rgb.y*v_rgb.y + v_rgb.z*v_rgb.z);
-        if (v2_recip > 0)
-            v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip);
-        else
-            v2_recip = 1.0f;
-        v_rgb = v_rgb*v2_recip;
-    }
+#ifdef TEST_HEATMAP 
+#include "ExternCodec.h" // Use external codec for testing 
+#endif
+//============================================== BC1 INTERFACES  =======================================================
+ 
+#ifndef ASPM_HLSL

-    // -------------------------------------------------------------------------------------
-    // (6) Map the axis
-    // -------------------------------------------------------------------------------------
-    // the line joining (and extended on either side of) average and axis
-    // defines the axis onto which the points will be projected
-    // Project all the points onto the axis, calculate the distance along
-    // the axis from the centre of the axis (average)
-    // From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is
-    //                            P + ((R-P).v) / (v.v))v
-    // The distance along v is therefore (R-P).v / (v.v)
-    // (v.v) is 1 if v is a unit vector.
-    //
-    // Calculate the extremities at the same time - these need to be reasonably accurately
-    // represented in all cases
-    //
-    // In this first calculation, also find the error of mapping the points to the axis - this
-    // is our major indicator of whether or not the block has compressed well - if the points
-    // map well onto the axis then most of the noise introduced is high-frequency noise
-    {
-        left = 10000.0f;
-        right = -10000.0f;
-        axis_mapping_error = 0;
-        for (i = 0; i < unique_pixels; i++)
-        {
-            // Compute the distance along the axis of the point of closest approach
-            CMP_Vec3f temp = (uniques[i] - average_rgb);
-            pos_on_axis[i] = (temp.x * v_rgb.x) + (temp.y * v_rgb.y) + (temp.z * v_rgb.z);
-
-            // Compute the actual point and thence the mapping error
-            rgb = uniques[i] - (average_rgb + (v_rgb * pos_on_axis[i]));
-            dist_from_axis[i] = rgb.x*rgb.x + rgb.y*rgb.y + rgb.z*rgb.z;
-            axis_mapping_error += dist_from_axis[i];
-
-            // Work out the extremities
-            if (pos_on_axis[i] < left)
-                left = pos_on_axis[i];
-            if (pos_on_axis[i] > right)
-                right = pos_on_axis[i];
-        }
-    }
-
-    // -------------------------------------------------------------------------------------
-    // (7) Now we have a good axis and the basic information about how the points are mapped
-    // to it
-    // Our initial guess is to represent the endpoints accurately, by moving the average
-    // to the centre and recalculating the point positions along the line
-    // -------------------------------------------------------------------------------------
-    {
-        centre = (left + right) / 2;
-        average_rgb = average_rgb + (v_rgb*centre);
-        for (i = 0; i<unique_pixels; i++)
-            pos_on_axis[i] -= centre;
-        right -= centre;
-        left -= centre;
-
-        // Accumulate our final resultant error
-        axis_mapping_error *= unique_recip * (1 / 255.0f);
-
-    }
-
-    // -------------------------------------------------------------------------------------
-    // (8) Calculate the high and low output colour values
-    // Involved in this is a rounding procedure which is undoubtedly slightly twitchy. A
-    // straight rounded average is not correct, as the decompressor 'unrounds' by replicating
-    // the top bits to the bottom.
-    // In order to take account of this process, we don't just apply a straight rounding correction,
-    // but base our rounding on the input value (a straight rounding is actually pretty good in terms of
-    // error measure, but creates a visual colour and/or brightness shift relative to the original image)
-    // The method used here is to apply a centre-biased rounding dependent on the input value, which was
-    // (mostly by experiment) found to give minimum MSE while preserving the visual characteristics of
-    // the image.
-    // rgb = (average_rgb + (left|right)*v_rgb);
-    // -------------------------------------------------------------------------------------
-    {
-        CGU_UINT32 c0, c1, t;
-        int rd, gd, bd;
-        rgb = (average_rgb + (v_rgb * left));
-        rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
-        gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
-        bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
-        ROUND_AND_CLAMP(rd, 5);
-        ROUND_AND_CLAMP(gd, 6);
-        ROUND_AND_CLAMP(bd, 5);
-        c0 = ((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3);
-
-        rgb = average_rgb + (v_rgb * right);
-        rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
-        gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
-        bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
-        ROUND_AND_CLAMP(rd, 5);
-        ROUND_AND_CLAMP(gd, 6);
-        ROUND_AND_CLAMP(bd, 5);
-        c1 = (((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3));
-
-        // Force to be a 4-colour opaque block - in which case, c0 is greater than c1
-        // blocktype == 4
-        {
-            if (c0 < c1)
-            {
-                t = c0;
-                c0 = c1;
-                c1 = t;
-                swap = 1;
-            }
-            else if (c0 == c1)
-            {
-                // This block will always be encoded in 3-colour mode
-                // Need to ensure that only one of the two points gets used,
-                // avoiding accidentally setting some transparent pixels into the block
-                for (i = 0; i<unique_pixels; i++)
-                    pos_on_axis[i] = left;
-                swap = 0;
-            }
-            else
-                swap = 0;
-        }
-
-        compressedBlock[0] = c0 | (c1 << 16);
-    }
-
-    // -------------------------------------------------------------------------------------
-    // (9) Final clustering, creating the 2-bit values that define the output
-    // -------------------------------------------------------------------------------------
-    {
-        CGU_UINT32 bit;
-        CGU_FLOAT division;
-        CGU_FLOAT cluster_x[4];
-        CGU_FLOAT cluster_y[4];
-        int cluster_count[4];
-
-        // (blocktype == 4)
-        {
-            compressedBlock[1] = 0;
-            division = right*2.0f / 3.0f;
-            centre = (left + right) / 2;        // Actually, this code only works if centre is 0 or approximately so
-
-            for (i = 0; i<4; i++)
-            {
-                cluster_x[i] = cluster_y[i] = 0.0f;
-                cluster_count[i] = 0;
-            }
-
-
-            for (i = 0; i<16; i++)
-            {
-                rgb.z = pos_on_axis[index_map[i]];
-                // Endpoints (indicated by block > average) are 0 and 1, while
-                // interpolants are 2 and 3
-                if (fabs(rgb.z) >= division)
-                    bit = 0;
-                else
-                    bit = 2;
-                // Positive is in the latter half of the block
-                if (rgb.z >= centre)
-                    bit += 1;
-                // Set the output, taking swapping into account
-                compressedBlock[1] |= ((bit^swap) << (2 * i));
-
-                // Average the X and Y locations for each cluster
-                cluster_x[bit] += (CGU_FLOAT)(i & 3);
-                cluster_y[bit] += (CGU_FLOAT)(i >> 2);
-                cluster_count[bit]++;
-            }
-
-            for (i = 0; i<4; i++)
-            {
-                CGU_FLOAT cr;
-                if (cluster_count[i])
-                {
-                    cr = 1.0f / cluster_count[i];
-                    cluster_x[i] *= cr;
-                    cluster_y[i] *= cr;
-                }
-                else
-                {
-                    cluster_x[i] = cluster_y[i] = -1;
-                }
-            }
-
-            // patterns in axis position detection
-            // (same algorithm as used in the SSE version)
-            if ((compressedBlock[0] & 0xffff) != (compressedBlock[0] >> 16))
-            {
-                CGU_UINT32 i1, k1;
-                CGU_UINT32 x = 0, y = 0;
-                int xstep = 0, ystep = 0;
-
-                // Find a corner to search from
-                for (k1 = 0; k1<4; k1++)
-                {
-                    switch (k1)
-                    {
-                    case 0:
-                        x = 0; y = 0; xstep = 1; ystep = 1;
-                        break;
-                    case 1:
-                        x = 0; y = 3; xstep = 1; ystep = -1;
-                        break;
-                    case 2:
-                        x = 3; y = 0; xstep = -1; ystep = 1;
-                        break;
-                    case 3:
-                        x = 3; y = 3; xstep = -1; ystep = -1;
-                        break;
-                    }
-
-                    for (i1 = 0; i1<4; i1++)
-                    {
-                        if ((POS(x, y + ystep*i1)                < POS(x + xstep, y + ystep*i1)) ||
-                            (POS(x + xstep, y + ystep*i1)        < POS(x + 2 * xstep, y + ystep*i1)) ||
-                            (POS(x + 2 * xstep, y + ystep*i1)    < POS(x + 3 * xstep, y + ystep*i1))
-                            )
-                            break;
-                        if ((POS(x + xstep*i1, y)                < POS(x + xstep*i1, y + ystep)) ||
-                            (POS(x + xstep*i1, y + ystep)        < POS(x + xstep*i1, y + 2 * ystep)) ||
-                            (POS(x + xstep*i1, y + 2 * ystep)    < POS(x + xstep*i1, y + 3 * ystep))
-                            )
-                            break;
-                    }
-                    if (i1 == 4)
-                        break;
-                }
-            }
-        }
-
-    }
-    // done
-}
-
-INLINE void store_uint8(CMP_GLOBAL CGU_UINT8 u_dstptr[8], CGU_UINT32 data[2])
-{
-   int shift = 0;
-   for (CGU_INT k=0; k<4; k++)
-   {
-      u_dstptr[k] = (data[0] >> shift)&0xFF;
-      shift += 8;
-   }
-   shift = 0;
-   for (CGU_INT k=4; k<8; k++)
-   {
-      u_dstptr[k] = (data[1] >> shift)&0xFF;
-      shift += 8;
-   }
-}
-
-void  CompressBlockBC1_Internal(
-    const CMP_Vec4uc  srcBlockTemp[16],
+void  CompressBlockBC1_Internal( 
+    const       CMP_Vec4uc      srcBlockTemp[16],
    CMP_GLOBAL  CGU_UINT32      compressedBlock[2],
-    CMP_GLOBAL  const CMP_BC15Options *BC15options)
+    CMP_GLOBAL  CMP_BC15Options *BC15options)
 {
-    CGU_UINT8    blkindex = 0;
    CGU_UINT8    srcindex = 0;
-    CGU_UINT8    rgbBlock[64];
+    CGU_FLOAT    BlockA[16];
+    CGU_Vec3f    rgbBlockUV[16];
    for ( CGU_INT32 j = 0; j < 4; j++) {
     for ( CGU_INT32 i = 0; i < 4; i++) {
-        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
-        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
-        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
-        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
+        rgbBlockUV[srcindex].x = (CGU_FLOAT)(srcBlockTemp[srcindex].x & 0xFF)/ 255.0f;  // R
+        rgbBlockUV[srcindex].y = (CGU_FLOAT)(srcBlockTemp[srcindex].y & 0xFF)/ 255.0f;  // G
+        rgbBlockUV[srcindex].z = (CGU_FLOAT)(srcBlockTemp[srcindex].z & 0xFF)/ 255.0f;  // B
        srcindex++;
        }
    }

    CMP_BC15Options internalOptions = *BC15options;
-    CalculateColourWeightings(rgbBlock, &internalOptions);
+    internalOptions = CalculateColourWeightings3f(rgbBlockUV,internalOptions);
+    CGU_Vec3f  channelWeights     = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]};
+    CGU_BOOL   isSRGB             = internalOptions.m_bIsSRGB; // feature not supported in this section of code until v4.1
+    CGU_Vec2ui  cmpBlock    = 0;
+
+//#define CMP_PRINTRESULTS 
+#ifdef TEST_HEATMAP
+
+    #ifdef CMP_PRINTRESULTS
+        static int q1= 0,q2= 0,same = 0;
+        static int testnum = 0;
+        printf("%4d ",testnum); 
+    #endif
+    {
+
+        // Heatmap test: See BCn_Common_Kernel for details 
+        CGU_Vec2ui red   = {0xf800f800,0};
+        CGU_Vec2ui green = {0x07e007e0,0};
+        CGU_Vec2ui blue  = {0x001f001f,0};
+
+        CGU_Vec2ui comp1;
+        CGU_Vec2ui comp2;
+        float err ;
+
+        comp1 =  (BC15options->m_fquality < 0.3)?CompressBC1Block_SRGB(rgbBlockUV):CompressBC1Block(rgbBlockUV);
+        comp2 =  CompressBlockBC1_UNORM(rgbBlockUV, BC15options->m_fquality,BC15options->m_fquality < 0.3?true:false);
+
+        if ((comp1.x == comp2.x)&&(comp1.y == comp2.y)) err = 0.0f;
+        else {
+            float err1 = CMP_RGBBlockError(rgbBlockUV,comp1,(BC15options->m_fquality < 0.3)?true:false);
+            float err2 = CMP_RGBBlockError(rgbBlockUV,comp2,(BC15options->m_fquality < 0.3)?true:false);
+            err = err1-err2;
+        }
+
+        if (err > 0.0f) 
+        {
+            cmpBlock = red;
+        }
+        else if (err < 0.0f) {
+            cmpBlock = green;
+        }
+        else {
+            cmpBlock = blue;
+        }
+    }
+    #ifdef CMP_PRINTRESULTS
+        printf("Q1 [%4X:%4X]  %.3f, ",cmpBlockQ1.x,cmpBlockQ1.y,err1);
+        printf("Q2 [%4X:%4X]  %.3f, ",cmpBlock.x,cmpBlock.y  ,err2); 
+        testnum++;
+    #endif
+#else
+
+    // printf("q = %f\n",internalOptions.m_fquality);
+    cmpBlock = CompressBlockBC1_RGBA_Internal( 
+                       rgbBlockUV, 
+                       BlockA,
+                       channelWeights,
+                       0, //internalOptions.m_nAlphaThreshold, bug to investigate in debug is ok release has issue!
+                       1,
+                       internalOptions.m_fquality,
+                       isSRGB
+                       );
+#endif
+    compressedBlock[0] = cmpBlock.x;
+    compressedBlock[1] = cmpBlock.y;
+

-    CompressRGBBlock(rgbBlock,
-                     compressedBlock,
-                     &internalOptions,
-                     TRUE,
-                     FALSE, 
-                     internalOptions.m_nAlphaThreshold);
 }
+#endif

-//============================================== USER INTERFACES  ========================================================
+//============================================== CPU USER INTERFACES  ========================================================
 #ifndef ASPM_GPU
 int CMP_CDECL CreateOptionsBC1(void **options)
 {
@ -528,15 +248,27 @@ int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8],
        BC15options     = &BC15optionsDefault;
        SetDefaultBC15Options(BC15options);
    }
-    DecompressDXTRGB_Internal(srcBlock, ( CGU_UINT32 *)cmpBlock, BC15options);

+    CGU_Vec2ui compBlock;
+
+    compBlock.x = (CGU_UINT32)cmpBlock[3] << 24 |
+                  (CGU_UINT32)cmpBlock[2] << 16 |
+                  (CGU_UINT32)cmpBlock[1] << 8  |
+                  (CGU_UINT32)cmpBlock[0];
+
+    compBlock.y = (CGU_UINT32)cmpBlock[7] << 24 |
+                  (CGU_UINT32)cmpBlock[6] << 16 |
+                  (CGU_UINT32)cmpBlock[5] << 8  |
+                  (CGU_UINT32)cmpBlock[4];
+
+    cmp_decompressDXTRGBA_Internal(srcBlock, compBlock, BC15options->m_mapDecodeRGBA);

    return CGU_CORE_OK;
 }
 #endif

 //============================================== OpenCL USER INTERFACE ========================================================
-#ifdef ASPM_GPU
+#ifdef ASPM_OPENCL
 CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
    CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
    CMP_GLOBAL  CGU_UINT8*          ImageDestination,
@ -547,14 +279,10 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
    CGU_UINT32 xID;
    CGU_UINT32 yID;

-//printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality);
-#ifdef ASPM_GPU
+    //printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality);
    xID = get_global_id(0);
    yID = get_global_id(1);
-#else
-    xID = 0;
-    yID = 0;
-#endif
+

    if (xID >= (SourceInfo->m_src_width / BlockX)) return;
    if (yID >= (SourceInfo->m_src_height / BlockX)) return;
@ -572,11 +300,6 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
        }
        srcindex += srcWidth;
    }
-
-    // fast low quality mode that matches v3.1 code
-    if (SourceInfo->m_fquality <= 0.04f)
-        CompressBlockBC1_Fast(srcData, (CMP_GLOBAL  CGU_UINT32 *)&ImageDestination[destI]);
-    else
-        CompressBlockBC1_Internal(srcData, (CMP_GLOBAL  CGU_UINT32 *)&ImageDestination[destI], BC15options);
+    CompressBlockBC1_Internal(srcData, (CMP_GLOBAL  CGU_UINT32 *)&ImageDestination[destI], BC15options);
 }
 #endif
--- a/extern/CMP_Core/shaders/BC1_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.h
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -26,23 +26,5 @@
 #include "Common_Def.h"
 #include "BCn_Common_Kernel.h"

-#define CS_RED(r, g, b)        (r)
-#define CS_GREEN(r, g, b)    (g)
-#define CS_BLUE(r, g, b)    ((b+g)*0.5f)
-#define DCS_RED(r, g, b)    (r)
-#define DCS_GREEN(r, g, b)    (g)
-#define DCS_BLUE(r, g, b)    ((2.0f*b)-g)
-#define BYTEPP 4
-#define BC1CompBlockSize    8
-
-
-#define ROUND_AND_CLAMP(v, shift)    \
-{\
-    if (v < 0) v = 0;\
-    else if (v > 255) v = 255;\
-    else v += (0x80>>shift) - (v>>shift);\
-}
-
-#define POS(x,y) (pos_on_axis[(x)+(y)*4])

 #endif
--- a/extern/CMP_Core/shaders/BC1_Encode_kernel.hlsl
+++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.hlsl
@ -0,0 +1,99 @@
+//=====================================================================
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// File: BC1_Encode_kernel.hlsl
+//--------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//--------------------------------------------------------------------------------------
+#ifndef ASPM_HLSL
+#define ASPM_HLSL
+#endif
+
+cbuffer cbCS : register( b0 )
+{
+    uint  g_tex_width;
+    uint  g_num_block_x;
+    uint  g_format;
+    uint  g_mode_id;
+    uint  g_start_block_id;
+    uint  g_num_total_blocks;
+    float g_alpha_weight;
+    float g_quality;
+};
+
+#include "BCn_Common_Kernel.h"
+
+// Source Data
+Texture2D g_Input                : register( t0 ); 
+StructuredBuffer<uint4> g_InBuff : register( t1 );
+
+// Compressed Output Data
+RWStructuredBuffer<uint2> g_OutBuff : register( u0 );
+
+// Processing multiple blocks at a time
+#define MAX_USED_THREAD     16  // pixels in a BC (block compressed) block
+#define BLOCK_IN_GROUP      4   // the number of BC blocks a thread group processes = 64 / 16 = 4
+#define THREAD_GROUP_SIZE   64  // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
+#define BLOCK_SIZE_Y        4
+#define BLOCK_SIZE_X        4
+
+groupshared float4 shared_temp[THREAD_GROUP_SIZE];
+
+[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
+void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
+{
+    // we process 4 BC blocks per thread group
+    uint blockInGroup   = GI / MAX_USED_THREAD;                                         // what BC block this thread is on within this thread group
+    uint blockID        = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
+    uint pixelBase      = blockInGroup * MAX_USED_THREAD;                               // the first id of the pixel in this BC block in this thread group
+    uint pixelInBlock   = GI - pixelBase;                                               // id of the pixel in this BC block
+
+
+    uint block_y        = blockID / g_num_block_x;
+    uint block_x        = blockID - block_y * g_num_block_x;
+    uint base_x         = block_x * BLOCK_SIZE_X;
+    uint base_y         = block_y * BLOCK_SIZE_Y;
+
+
+    // Load up the pixels
+    if (pixelInBlock < 16)
+    {
+           // load pixels (0..1)
+           shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
+    }
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // Process and save s
+    if (pixelInBlock == 0)
+    {
+           float3 block[16];
+           for (int i = 0; i < 16; i++ )
+           {
+                   block[i].x  = shared_temp[pixelBase + i].x;
+                   block[i].y  = shared_temp[pixelBase + i].y;
+                   block[i].z  = shared_temp[pixelBase + i].z;
+           }
+
+           g_OutBuff[blockID] = CompressBlockBC1_UNORM(block,g_quality,false);
+    }
+}
--- a/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -20,70 +20,50 @@
 // THE SOFTWARE.
 //
 //=====================================================================
+
 #include "BC2_Encode_kernel.h"

 //============================================== BC2 INTERFACES =======================================================

-void DXTCV11CompressExplicitAlphaBlock(const CGU_UINT8 block_8[16], CMP_GLOBAL CGU_UINT32 block_dxtc[2])
-{
-    CGU_UINT8 i;
-    block_dxtc[0] = block_dxtc[1] = 0;
-    for (i = 0; i < 16; i++)
-    {
-        int v = block_8[i];
-        v = (v + 7 - (v >> 4));
-        v >>= 4;
-        if (v < 0)
-            v = 0;
-        if (v > 0xf)
-            v = 0xf;
-        if (i < 8)
-            block_dxtc[0] |= v << (4 * i);
-        else
-            block_dxtc[1] |= v << (4 * (i - 8));
-    }
-}
-
-#define EXPLICIT_ALPHA_PIXEL_MASK 0xf
-#define EXPLICIT_ALPHA_PIXEL_BPP  4
-
-CGU_INT CompressExplicitAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], 
-    CMP_GLOBAL CGU_UINT32 compressedBlock[2])
-{
-    DXTCV11CompressExplicitAlphaBlock(alphaBlock, compressedBlock);
-    return CGU_CORE_OK;
-}
-
 void  CompressBlockBC2_Internal(const CMP_Vec4uc srcBlockTemp[16],
                                CMP_GLOBAL CGU_UINT32 compressedBlock[4],
                                CMP_GLOBAL const CMP_BC15Options *BC15options)
 {
-    CGU_UINT8    blkindex = 0;
-    CGU_UINT8    srcindex = 0;
-    CGU_UINT8    rgbaBlock[64];
-    for (CGU_INT32 j = 0; j < 4; j++) {
-        for (CGU_INT32 i = 0; i < 4; i++) {
-            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
-            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
-            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
-            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
-            srcindex++;
-        }
+
+    CGU_Vec2ui  cmpBlock;
+    CGU_Vec3f   rgbBlock[16];
+    CGU_FLOAT   BlockA[16];
+
+    for (CGU_INT32 i = 0; i < 16; i++) {
+        rgbBlock[i].x = (CGU_FLOAT)(srcBlockTemp[i].x & 0xFF)/255.0f;   // R
+        rgbBlock[i].y = (CGU_FLOAT)(srcBlockTemp[i].y & 0xFF)/255.0f;   // G
+        rgbBlock[i].z = (CGU_FLOAT)(srcBlockTemp[i].z & 0xFF)/255.0f;   // B
+        BlockA[i]     = (CGU_FLOAT)(srcBlockTemp[i].w & 0xFF)/255.0f;   // A
    }

-    CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
-    for (CGU_INT32 i = 0; i < 16; i++)
-        alphaBlock[i] = (CGU_UINT8)(((CGU_INT32*)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
+    cmpBlock = cmp_compressExplicitAlphaBlock(BlockA);
+    compressedBlock[DXTC_OFFSET_ALPHA  ] = cmpBlock.x;
+    compressedBlock[DXTC_OFFSET_ALPHA+1] = cmpBlock.y;

    // Need a copy, as CalculateColourWeightings sets variables in the BC15options
    CMP_BC15Options internalOptions = *BC15options;
-    CalculateColourWeightings(rgbaBlock, &internalOptions);
+    internalOptions             = CalculateColourWeightings3f(rgbBlock, internalOptions);
+    internalOptions.m_bUseAlpha = false;
+    CGU_Vec3f  channelWeights   = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]};
+    CGU_Vec3f MinColor = {0,0,0}, MaxColor={0,0,0};

-    CGU_INT err = CompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
-    if (err != 0)
-        return;
+    cmpBlock = CompressBlockBC1_RGBA_Internal(
+                 rgbBlock, 
+                 BlockA,
+                 channelWeights,
+                 0,//internalOptions.m_nAlphaThreshold,
+                 1, //internalOptions.m_nRefinementSteps
+                 internalOptions.m_fquality,
+                 FALSE);
+
+    compressedBlock[DXTC_OFFSET_RGB]   = cmpBlock.x;
+    compressedBlock[DXTC_OFFSET_RGB+1] = cmpBlock.y;

-    CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,FALSE,FALSE,0);
 }

 //============================================== USER INTERFACES ========================================================
@ -141,6 +121,9 @@ int CMP_CDECL SetChannelWeightsBC2(void *options,
    return CGU_CORE_OK;
 }

+#define EXPLICIT_ALPHA_PIXEL_MASK 0xf
+#define EXPLICIT_ALPHA_PIXEL_BPP  4
+
 // Decompresses an explicit alpha block (DXT3)
 void DecompressExplicitAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4],
    const CGU_UINT32 compressedBlock[2])
@ -160,7 +143,13 @@ void DecompressBC2_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[BLOCK_SIZE_4X4X4],
    CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];

    DecompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
-    DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
+
+
+    CGU_Vec2ui compBlock;
+    compBlock.x = compressedBlock[DXTC_OFFSET_RGB];
+    compBlock.y = compressedBlock[DXTC_OFFSET_RGB+1];
+
+    cmp_decompressDXTRGBA_Internal(rgbaBlock, compBlock,BC15options->m_mapDecodeRGBA);

    for (CGU_UINT32 i = 0; i < 16; i++)
        ((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] = (alphaBlock[i] << RGBA8888_OFFSET_A) | (((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] & ~(BYTE_MASK << RGBA8888_OFFSET_A));
@ -219,7 +208,7 @@ int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16],
 #endif

 //============================================== OpenCL USER INTERFACE ========================================================
-#ifdef ASPM_GPU
+#ifdef ASPM_OPENCL
 CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
    CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
    CMP_GLOBAL  CGU_UINT8*          ImageDestination,
--- a/extern/CMP_Core/shaders/BC2_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.h
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
--- a/extern/CMP_Core/shaders/BC2_Encode_kernel.hlsl
+++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.hlsl
@ -0,0 +1,101 @@
+//=====================================================================
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// File: BC1Encode.hlsl
+//--------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//--------------------------------------------------------------------------------------
+#ifndef ASPM_HLSL
+#define ASPM_HLSL
+#endif
+
+
+cbuffer cbCS : register( b0 )
+{
+    uint  g_tex_width;
+    uint  g_num_block_x;
+    uint  g_format;
+    uint  g_mode_id;
+    uint  g_start_block_id;
+    uint  g_num_total_blocks;
+    float g_alpha_weight;
+    float g_quality;
+};
+
+#include "BCn_Common_Kernel.h"
+
+// Source Data
+Texture2D g_Input : register( t0 ); 
+StructuredBuffer<uint4> g_InBuff : register( t1 );
+
+// Compressed Output Data
+RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
+
+// Processing multiple blocks at a time
+#define MAX_USED_THREAD     16  // pixels in a BC (block compressed) block
+#define BLOCK_IN_GROUP      4   // the number of BC blocks a thread group processes = 64 / 16 = 4
+#define THREAD_GROUP_SIZE   64  // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
+#define BLOCK_SIZE_Y        4
+#define BLOCK_SIZE_X        4
+
+groupshared float4 shared_temp[THREAD_GROUP_SIZE];
+
+[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
+void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
+{
+    // we process 4 BC blocks per thread group
+    uint blockInGroup   = GI / MAX_USED_THREAD;                                         // what BC block this thread is on within this thread group
+    uint blockID        = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
+    uint pixelBase      = blockInGroup * MAX_USED_THREAD;                               // the first id of the pixel in this BC block in this thread group
+    uint pixelInBlock   = GI - pixelBase;                                               // id of the pixel in this BC block
+
+
+    uint block_y        = blockID / g_num_block_x;
+    uint block_x        = blockID - block_y * g_num_block_x;
+    uint base_x         = block_x * BLOCK_SIZE_X;
+    uint base_y         = block_y * BLOCK_SIZE_Y;
+
+
+    // Load up the pixels
+    if (pixelInBlock < 16)
+    {
+           // load pixels (0..1)
+           shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
+    }
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // Process and save s
+    if (pixelInBlock == 0)
+    {
+        float3 blockRGB[16];
+        float  blockA[16];
+        for (int i = 0; i < 16; i++ )
+        {
+                blockRGB[i].x   = shared_temp[pixelBase + i].x;
+                blockRGB[i].y   = shared_temp[pixelBase + i].y;
+                blockRGB[i].z   = shared_temp[pixelBase + i].z;
+                blockA[i]       = shared_temp[pixelBase + i].w;
+        }
+        g_OutBuff[blockID] = CompressBlockBC2_UNORM(blockRGB,blockA,g_quality,false);
+    }
+}
--- a/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -23,37 +23,50 @@
 #include "BC3_Encode_kernel.h"

 //============================================== BC3 INTERFACES =======================================================
+#ifndef ASPM_HLSL

 void CompressBlockBC3_Internal(const CMP_Vec4uc srcBlockTemp[16],
                               CMP_GLOBAL CGU_UINT32 compressedBlock[4],
-                               CMP_GLOBAL const CMP_BC15Options *BC15options) {
-  CGU_UINT8 blkindex = 0;
-  CGU_UINT8 srcindex = 0;
-  CGU_UINT8 rgbaBlock[64];
-  for (CGU_INT32 j = 0; j < 4; j++) {
-    for (CGU_INT32 i = 0; i < 4; i++) {
-      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
-      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
-      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
-      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
-      srcindex++;
-    }
+                               CMP_GLOBAL CMP_BC15Options *BC15options) {
+  CGU_Vec3f  rgbBlock[16];
+  CGU_FLOAT  alphaBlock[BLOCK_SIZE_4X4];
+
+  for (CGU_INT32 i = 0; i < 16; i++) {
+        rgbBlock[i].x  = (CGU_FLOAT)(srcBlockTemp[i].x & 0xFF)/255;  // R
+        rgbBlock[i].y  = (CGU_FLOAT)(srcBlockTemp[i].y & 0xFF)/255;  // G
+        rgbBlock[i].z  = (CGU_FLOAT)(srcBlockTemp[i].z & 0xFF)/255;  // B
+        alphaBlock[i]  = (CGU_FLOAT)(srcBlockTemp[i].w) / 255.0f;
  }

-  CMP_BC15Options internalOptions = *BC15options;
-  CalculateColourWeightings(rgbaBlock, &internalOptions);
+    CMP_BC15Options internalOptions = *BC15options;

-  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
-  for (CGU_INT32 i = 0; i < 16; i++)
-    alphaBlock[i] =
-        (CGU_UINT8)(((CGU_INT32 *)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
+    CGU_Vec2ui cmpBlock;

-  CGU_INT err = CompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
-  if (err != 0) return;
+    cmpBlock = cmp_compressAlphaBlock(alphaBlock,internalOptions.m_fquality);
+    compressedBlock[0] = cmpBlock.x;
+    compressedBlock[1] = cmpBlock.y;

-  CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,
-                   FALSE, FALSE, 0);
+    for (CGU_INT32 i = 0; i < 16; i++) {
+        alphaBlock[i]  = (CGU_FLOAT)(srcBlockTemp[i].w);
+    }
+
+    internalOptions = CalculateColourWeightings3f(rgbBlock, internalOptions);
+    CGU_Vec3f  channelWeights     = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]};
+
+    cmpBlock = CompressBlockBC1_RGBA_Internal(
+                 rgbBlock, 
+                 alphaBlock,
+                 channelWeights,
+                 0, // internalOptions.m_nAlphaThreshold,
+                 1, // internalOptions.m_nRefinementSteps
+                 internalOptions.m_fquality,
+                 FALSE);
+
+
+    compressedBlock[2] = cmpBlock.x;
+    compressedBlock[3] = cmpBlock.y;
 }
+#endif

 //============================================== USER INTERFACES ========================================================
 #ifndef ASPM_GPU
@ -117,8 +130,12 @@ void DecompressBC3_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
                            const CMP_BC15Options *BC15options) {
  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];

-  DecompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
-  DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
+  cmp_decompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+
+  CGU_Vec2ui compBlock;
+  compBlock.x = compressedBlock[DXTC_OFFSET_RGB];
+  compBlock.y = compressedBlock[DXTC_OFFSET_RGB+1];
+  cmp_decompressDXTRGBA_Internal(rgbaBlock, compBlock,BC15options->m_mapDecodeRGBA);

  for (CGU_UINT32 i = 0; i < 16; i++)
    ((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] =
@ -178,7 +195,7 @@ int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16],
 #endif

 //============================================== OpenCL USER INTERFACE ====================================================
-#ifdef ASPM_GPU
+#ifdef ASPM_OPENCL
 CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
    CMP_GLOBAL const CMP_Vec4uc *ImageSource,
    CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,
--- a/extern/CMP_Core/shaders/BC3_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.h
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
--- a/extern/CMP_Core/shaders/BC3_Encode_kernel.hlsl
+++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.hlsl
@ -0,0 +1,101 @@
+//=====================================================================
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// File: BC1Encode.hlsl
+//--------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//--------------------------------------------------------------------------------------
+#ifndef ASPM_HLSL
+#define ASPM_HLSL
+#endif
+
+cbuffer cbCS : register( b0 )
+{
+    uint  g_tex_width;
+    uint  g_num_block_x;
+    uint  g_format;
+    uint  g_mode_id;
+    uint  g_start_block_id;
+    uint  g_num_total_blocks;
+    float g_alpha_weight;
+    float g_quality;
+};
+
+#include "BCn_Common_Kernel.h"
+
+// Source Data
+Texture2D g_Input : register( t0 ); 
+StructuredBuffer<uint4> g_InBuff : register( t1 );
+
+// Compressed Output Data
+RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
+
+// Processing multiple blocks at a time
+#define MAX_USED_THREAD     16  // pixels in a BC (block compressed) block
+#define BLOCK_IN_GROUP      4   // the number of BC blocks a thread group processes = 64 / 16 = 4
+#define THREAD_GROUP_SIZE   64  // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
+#define BLOCK_SIZE_Y        4
+#define BLOCK_SIZE_X        4
+
+groupshared float4 shared_temp[THREAD_GROUP_SIZE];
+
+[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
+void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
+{
+    // we process 4 BC blocks per thread group
+    uint blockInGroup   = GI / MAX_USED_THREAD;                                         // what BC block this thread is on within this thread group
+    uint blockID        = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
+    uint pixelBase      = blockInGroup * MAX_USED_THREAD;                               // the first id of the pixel in this BC block in this thread group
+    uint pixelInBlock   = GI - pixelBase;                                               // id of the pixel in this BC block
+
+
+    uint block_y        = blockID / g_num_block_x;
+    uint block_x        = blockID - block_y * g_num_block_x;
+    uint base_x         = block_x * BLOCK_SIZE_X;
+    uint base_y         = block_y * BLOCK_SIZE_Y;
+
+
+    // Load up the pixels
+    if (pixelInBlock < 16)
+    {
+           // load pixels (0..1)
+           shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
+    }
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // Process and save s
+    if (pixelInBlock == 0)
+    {
+        float3 blockRGB[16];
+        float  blockA[16];
+        for (int i = 0; i < 16; i++ )
+        {
+                blockRGB[i].x   = shared_temp[pixelBase + i].x;
+                blockRGB[i].y   = shared_temp[pixelBase + i].y;
+                blockRGB[i].z   = shared_temp[pixelBase + i].z;
+                blockA[i]       = shared_temp[pixelBase + i].w;
+        }
+
+        g_OutBuff[blockID] = CompressBlockBC3_UNORM(blockRGB,blockA, g_quality,false); 
+    }
+}
--- a/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020   Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -32,15 +32,19 @@ void CompressBlockBC4_Internal(const CMP_Vec4uc srcBlockTemp[16],
  }
  CGU_UINT8 blkindex = 0;
  CGU_UINT8 srcindex = 0;
-  CGU_UINT8 alphaBlock[16];
+  CGU_FLOAT alphaBlock[16];
  for (CGU_INT32 j = 0; j < 4; j++) {
    for (CGU_INT32 i = 0; i < 4; i++) {
-      alphaBlock[blkindex++] =
-          (CGU_UINT8)srcBlockTemp[srcindex].x;  // Red channel
+      alphaBlock[blkindex++] = srcBlockTemp[srcindex].x / 255.0f;  // Red channel
      srcindex++;
    }
  }
-  CompressAlphaBlock(alphaBlock, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
+
+  CGU_Vec2ui cmpBlock;
+
+  cmpBlock = cmp_compressAlphaBlock(alphaBlock,BC15options->m_fquality);
+  compressedBlock[0] = cmpBlock.x;
+  compressedBlock[1] = cmpBlock.y;
 }

 void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
@ -48,7 +52,7 @@ void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
                            const CMP_BC15Options *BC15options) {
  if (BC15options) {}
  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
-  DecompressAlphaBlock(alphaBlock, compressedBlock);
+  cmp_decompressAlphaBlock(alphaBlock, compressedBlock);

  CGU_UINT8 blkindex = 0;
  CGU_UINT8 srcindex = 0;
@ -63,18 +67,27 @@ void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
  }
 }

-void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[16],
+void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[BLOCK_SIZE_4X4],
                               CMP_GLOBAL CGU_UINT32 compressedBlock[2],
                               CMP_GLOBAL const CMP_BC15Options *BC15options) {
  if (BC15options) {}
-  CompressAlphaBlock(srcBlockTemp, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
+  CGU_FLOAT  alphaBlock[BLOCK_SIZE_4X4];
+
+  for (CGU_INT32 i = 0; i < BLOCK_SIZE_4X4; i++) alphaBlock[i] = (srcBlockTemp[i] / 255.0f);
+
+  CGU_Vec2ui cmpBlock;
+  cmpBlock = cmp_compressAlphaBlock(alphaBlock,BC15options->m_fquality);
+  compressedBlock[0] = cmpBlock.x;
+  compressedBlock[1] = cmpBlock.y;
+
+
 }

 void DecompressBlockBC4_SingleChannel(CGU_UINT8 srcBlockTemp[16],
                            const CGU_UINT32 compressedBlock[2],
                            const CMP_BC15Options *BC15options) {
  if (BC15options) {}
-  DecompressAlphaBlock(srcBlockTemp, compressedBlock);
+  cmp_decompressAlphaBlock(srcBlockTemp, compressedBlock);
 }

 //============================================== USER INTERFACES ========================================================
@ -161,7 +174,7 @@ int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8],
 #endif

 //============================================== OpenCL USER INTERFACE ====================================================
-#ifdef ASPM_GPU
+#ifdef ASPM_OPENCL
 CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
    CMP_GLOBAL const CMP_Vec4uc *ImageSource,
    CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,
--- a/extern/CMP_Core/shaders/BC4_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.h
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
--- a/extern/CMP_Core/shaders/BC4_Encode_kernel.hlsl
+++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.hlsl
@ -0,0 +1,97 @@
+//=====================================================================
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// File: BC4Encode.hlsl
+//--------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//--------------------------------------------------------------------------------------
+#ifndef ASPM_HLSL
+#define ASPM_HLSL
+#endif
+
+cbuffer cbCS : register( b0 )
+{
+    uint  g_tex_width;
+    uint  g_num_block_x;
+    uint  g_format;
+    uint  g_mode_id;
+    uint  g_start_block_id;
+    uint  g_num_total_blocks;
+    float g_alpha_weight;
+    float g_quality;
+};
+
+#include "BCn_Common_Kernel.h"
+
+// Source Data
+Texture2D g_Input                : register( t0 ); 
+StructuredBuffer<uint4> g_InBuff : register( t1 );
+
+// Compressed Output Data
+RWStructuredBuffer<uint2> g_OutBuff : register( u0 );
+
+// Processing multiple blocks at a time
+#define MAX_USED_THREAD     16  // pixels in a BC (block compressed) block
+#define BLOCK_IN_GROUP      4   // the number of BC blocks a thread group processes = 64 / 16 = 4
+#define THREAD_GROUP_SIZE   64  // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
+#define BLOCK_SIZE_Y        4
+#define BLOCK_SIZE_X        4
+
+groupshared float4 shared_temp[THREAD_GROUP_SIZE];
+
+[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
+void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
+{
+    // we process 4 BC blocks per thread group
+    uint blockInGroup   = GI / MAX_USED_THREAD;                                         // what BC block this thread is on within this thread group
+    uint blockID        = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
+    uint pixelBase      = blockInGroup * MAX_USED_THREAD;                               // the first id of the pixel in this BC block in this thread group
+    uint pixelInBlock   = GI - pixelBase;                                               // id of the pixel in this BC block
+
+
+    uint block_y        = blockID / g_num_block_x;
+    uint block_x        = blockID - block_y * g_num_block_x;
+    uint base_x         = block_x * BLOCK_SIZE_X;
+    uint base_y         = block_y * BLOCK_SIZE_Y;
+
+
+    // Load up the pixels
+    if (pixelInBlock < 16)
+    {
+           // load pixels (0..1)
+           shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
+    }
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // Process and save s
+    if (pixelInBlock == 0)
+    {
+        float block[16];
+        // covert back to UV for processing!!
+        for ( uint i = 0; i < 16; i ++ )
+        {
+                block[i].x  = shared_temp[pixelBase + i].x;
+        }
+        g_OutBuff[blockID] = CompressBlockBC4_UNORM(block, g_quality);
+    }
+}
--- a/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020   Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -24,36 +24,45 @@

 //============================================== BC5 INTERFACES =======================================================

-void  CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16],
+CGU_Vec4ui CompressBC5Block_Internal(CMP_IN CGU_FLOAT aBlockU[16], CMP_IN CGU_FLOAT aBlockV[16], CMP_IN CGU_FLOAT fquality) 
+{
+    CGU_Vec4ui   compBlock;
+    CGU_Vec2ui cmpBlock;
+
+    cmpBlock = cmp_compressAlphaBlock(aBlockU,fquality);
+    compBlock.x = cmpBlock.x;
+    compBlock.y = cmpBlock.y;
+
+    cmpBlock = cmp_compressAlphaBlock(aBlockV,fquality);
+    compBlock.z = cmpBlock.x;
+    compBlock.w = cmpBlock.y;
+    return compBlock;
+}
+
+#ifndef ASPM_HLSL
+void  CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16],                    // range 0 to 255
                                CMP_GLOBAL CGU_UINT32 compressedBlock[4],
                                CMP_GLOBAL  CMP_BC15Options *BC15options)
 {
-    if (BC15options->m_fquality) {
-        // Resreved
-    }
-    CGU_UINT8    blkindex = 0;
-    CGU_UINT8    srcindex = 0;
-    CGU_UINT8    alphaBlock[16];
-    for (CGU_INT32 j = 0; j < 4; j++) {
-        for (CGU_INT32 i = 0; i < 4; i++) {
-            alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // Red channel
-            srcindex++;
-        }
-    }
-    CompressAlphaBlock(alphaBlock,&compressedBlock[0]);
+    CGU_Vec4ui   cmpBlock;
+    CGU_FLOAT    alphaBlockU[16];
+    CGU_FLOAT    alphaBlockV[16];
+    CGU_UINT32   i;

-    blkindex = 0;
-    srcindex = 0;
-    for (CGU_INT32 j = 0; j < 4; j++) {
-        for (CGU_INT32 i = 0; i < 4; i++) {
-            alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // Green channel
-            srcindex++;
-        }
+    for (i = 0; i < 16; i++) {
+        alphaBlockU[i] =  srcBlockTemp[i].x  / 255.0f;
+        alphaBlockV[i] =  srcBlockTemp[i].y / 255.0f;
    }
-    CompressAlphaBlock(alphaBlock,&compressedBlock[2]);

+    cmpBlock = CompressBC5Block_Internal(alphaBlockU, alphaBlockV,BC15options->m_fquality);
+    compressedBlock[0] = cmpBlock.x;
+    compressedBlock[1] = cmpBlock.y;
+    compressedBlock[2] = cmpBlock.z;
+    compressedBlock[3] = cmpBlock.w;
 }
+#endif

+#ifndef ASPM_GPU
 void  DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], 
                             CGU_UINT32 compressedBlock[4],
                             CMP_BC15Options *BC15options)
@ -61,8 +70,8 @@ void  DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
    CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4];
    CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4];

-    DecompressAlphaBlock(alphaBlockR, &compressedBlock[0]);
-    DecompressAlphaBlock(alphaBlockG, &compressedBlock[2]);
+    cmp_decompressAlphaBlock(alphaBlockR, &compressedBlock[0]);
+    cmp_decompressAlphaBlock(alphaBlockG, &compressedBlock[2]);
 
    CGU_UINT8    blkindex = 0;
    CGU_UINT8    srcindex = 0;
@ -94,15 +103,29 @@ void  DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],

 }

-
 void  CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[16],
                                            const CGU_UINT8 srcBlockG[16],
                                            CMP_GLOBAL  CGU_UINT32 compressedBlock[4],
                                            CMP_GLOBAL  const CMP_BC15Options *BC15options)
 {
    if (BC15options) {}
-    CompressAlphaBlock(srcBlockR,&compressedBlock[0]);
-    CompressAlphaBlock(srcBlockG,&compressedBlock[2]);
+    CGU_Vec2ui    cmpBlock;
+    CGU_FLOAT     srcAlphaRF[16];
+    CGU_FLOAT     srcAlphaGF[16];
+
+    for (CGU_INT i =0; i< 16; i++)
+    {
+        srcAlphaRF[i] = srcBlockR[i];
+        srcAlphaGF[i] = srcBlockG[i];
+    } 
+
+    cmpBlock = cmp_compressAlphaBlock(srcAlphaRF,BC15options->m_fquality);
+    compressedBlock[0] = cmpBlock.x;
+    compressedBlock[1] = cmpBlock.y;
+
+    cmpBlock = cmp_compressAlphaBlock(srcAlphaGF,BC15options->m_fquality);
+    compressedBlock[2] = cmpBlock.x;
+    compressedBlock[3] = cmpBlock.y;
 }

 void  DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16],
@ -111,10 +134,10 @@ void  DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16],
                                         const CMP_BC15Options *BC15options)
 {
    if (BC15options) {}
-    DecompressAlphaBlock(srcBlockR, &compressedBlock[0]);
-    DecompressAlphaBlock(srcBlockG, &compressedBlock[2]);
+    cmp_decompressAlphaBlock(srcBlockR, &compressedBlock[0]);
+    cmp_decompressAlphaBlock(srcBlockG, &compressedBlock[2]);
 }
-
+#endif

 //============================================== USER INTERFACES ========================================================
 #ifndef ASPM_GPU
@ -224,7 +247,7 @@ int  CMP_CDECL DecompressBlockBC5(const CGU_UINT8 cmpBlock[16],
 #endif

 //============================================== OpenCL USER INTERFACE ====================================================
-#ifdef ASPM_GPU
+#ifdef ASPM_OPENCL
 CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
                                          CMP_GLOBAL  CGU_UINT8*          ImageDestination,
                                          CMP_GLOBAL  Source_Info*        SourceInfo,
--- a/extern/CMP_Core/shaders/BC5_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.h
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
--- a/extern/CMP_Core/shaders/BC5_Encode_kernel.hlsl
+++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.hlsl
@ -0,0 +1,98 @@
+//=====================================================================
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// File: BC1Encode.hlsl
+//--------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//--------------------------------------------------------------------------------------
+#ifndef ASPM_HLSL
+#define ASPM_HLSL
+#endif
+
+cbuffer cbCS : register( b0 )
+{
+    uint  g_tex_width;
+    uint  g_num_block_x;
+    uint  g_format;
+    uint  g_mode_id;
+    uint  g_start_block_id;
+    uint  g_num_total_blocks;
+    float g_alpha_weight;
+    float g_quality;
+};
+
+#include "BCn_Common_Kernel.h"
+
+// Source Data
+Texture2D g_Input : register( t0 ); 
+StructuredBuffer<uint4> g_InBuff : register( t1 );
+
+// Compressed Output Data
+RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
+
+// Processing multiple blocks at a time
+#define MAX_USED_THREAD     16  // pixels in a BC (block compressed) block
+#define BLOCK_IN_GROUP      4   // the number of BC blocks a thread group processes = 64 / 16 = 4
+#define THREAD_GROUP_SIZE   64  // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y)
+#define BLOCK_SIZE_Y        4
+#define BLOCK_SIZE_X        4
+
+groupshared float4 shared_temp[THREAD_GROUP_SIZE];
+
+[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
+void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
+{
+    // we process 4 BC blocks per thread group
+    uint blockInGroup   = GI / MAX_USED_THREAD;                                         // what BC block this thread is on within this thread group
+    uint blockID        = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
+    uint pixelBase      = blockInGroup * MAX_USED_THREAD;                               // the first id of the pixel in this BC block in this thread group
+    uint pixelInBlock   = GI - pixelBase;                                               // id of the pixel in this BC block
+
+
+    uint block_y        = blockID / g_num_block_x;
+    uint block_x        = blockID - block_y * g_num_block_x;
+    uint base_x         = block_x * BLOCK_SIZE_X;
+    uint base_y         = block_y * BLOCK_SIZE_Y;
+
+
+    // Load up the pixels
+    if (pixelInBlock < 16)
+    {
+           // load pixels (0..1)
+           shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) ));
+    }
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // Process and save s
+    if (pixelInBlock == 0)
+    {
+        float blockU[16];
+        float blockV[16];
+        for ( uint i = 0; i < 16; i ++ )
+        {
+             blockU[i]  = shared_temp[pixelBase + i].x;
+             blockV[i]  = shared_temp[pixelBase + i].y;
+        }
+        g_OutBuff[blockID] = CompressBlockBC5_UNORM(blockU,blockV,g_quality);
+    }
+}
--- a/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -29,11 +29,11 @@ void memset(CGU_UINT8 *srcdata, CGU_UINT8 value, CGU_INT size)
        *srcdata++ = value;
 }

-void memcpy(CGU_UINT8 *srcdata, CGU_UINT8 *dstdata, CGU_INT size)
+void memcpy(CGU_UINT8 *dstdata, CGU_UINT8 *srcdata, CGU_INT size)
 {
    for (CGU_INT i = 0; i < size; i++)
    {
-        *srcdata = *dstdata;
+        *dstdata = *srcdata;
        srcdata++;
        dstdata++;
    }
@ -509,7 +509,7 @@ CGU_FLOAT totalError_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_FLOAT
 // index, uncentered, in the range 0..k-1
 //

-void quant_AnD_Shell(CGU_FLOAT* v_, CGU_INT k, CGU_INT n, CGU_INT *idx)
+void quant_AnD_Shell(CGU_FLOAT* v_, CGU_INT k, CGU_INT n, CGU_INT idx[MAX_ENTRIES])
 {
 #define MAX_BLOCK MAX_ENTRIES
    CGU_INT i, j;
@ -2530,7 +2530,7 @@ CGU_INT Unquantize(CGU_INT comp, unsigned char uBitsPerComp, CGU_BOOL bSigned)
    return unq;
 }

-CGU_INT finish_unquantizeF16(CGU_INT q, CGU_BOOL isSigned)
+CGU_INT finish_unquantizef16(CGU_INT q, CGU_BOOL isSigned)
 {
    // Is it F16 Signed else F16 Unsigned
    if (isSigned)
@ -2565,8 +2565,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
                out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);

                // F16 format
-                outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false);
-                outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false);
+                outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
+                outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
            }
        }
        else
@ -2581,8 +2581,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
                out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);

                // F16 format
-                outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false);
-                outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false);
+                outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
+                outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
            }
        }

@ -2602,8 +2602,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
                out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);

                // F16 format
-                outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false);
-                outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false);
+                outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
+                outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
            }
        }
        else
@ -2618,8 +2618,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
                out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);

                // F16 format
-                outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false);
-                outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false);
+                outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
+                outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
            }
        }
    }
@ -2659,10 +2659,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
                out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, true);

                // F16 format
-                outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], true);
-                outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], true);
-                outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], true);
-                outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], true);
+                outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], true);
+                outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], true);
+                outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], true);
+                outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], true);

            }
        }
@ -2682,10 +2682,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
                out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);

                // nbits to F16 format
-                outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false);
-                outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false);
-                outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false);
-                outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false);
+                outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
+                outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
+                outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
+                outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);
            }
        }

@ -2713,10 +2713,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
                out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);

                // nbits to F16 format
-                outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false);
-                outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false);
-                outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false);
-                outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false);
+                outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
+                outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
+                outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
+                outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);

            }
        }
@ -2736,10 +2736,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M
                out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);

                // nbits to F16 format
-                outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false);
-                outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false);
-                outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false);
-                outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false);
+                outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
+                outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
+                outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
+                outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);
            }
        }
    }
@ -3906,7 +3906,7 @@ int  CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16],
 #endif // !ASPM_GPU

 //============================================== OpenCL USER INTERFACE ====================================================
-#ifdef ASPM_GPU
+#ifdef ASPM_OPENCL
 CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
    CMP_GLOBAL  CGU_UINT8*          p_source_pixels,
    CMP_GLOBAL  CGU_UINT8*          p_encoded_blocks,
--- a/extern/CMP_Core/shaders/BC6_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.h
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -23,6 +23,8 @@
 #ifndef BC6_ENCODE_KERNEL_H
 #define BC6_ENCODE_KERNEL_H

+#pragma warning(disable:4505)  // disable warnings on unreferenced local function has been removed
+
 #include "Common_Def.h"

 #define MAX_TRACE                       10
@ -127,25 +129,25 @@ typedef struct

 __constant ModePartitions ModePartition[MAX_BC6H_MODES + 1] =
 {
-   0,    0,0,0,        0,    0,    0,    0,     0,   // Mode = Invaild
+   {0,     {0,0,0},       0,    0,    0,    0,     0},   // Mode = Invaild

   // Two region Partition
-   10,   5,5,5,        1,    2,    3,    0x00,  31,    // Mode = 1
-   7,    6,6,6,        1,    2,    3,    0x01,  248,   // Mode = 2
-   11,   5,4,4,        1,    5,    3,    0x02,  15,    // Mode = 3
-   11,   4,5,4,        1,    5,    3,    0x06,  15,    // Mode = 4 
-   11,   4,4,5,        1,    5,    3,    0x0a,  15,    // Mode = 5
-   9,    5,5,5,        1,    5,    3,    0x0e,  62,    // Mode = 6
-   8,    6,5,5,        1,    5,    3,    0x12,  124,   // Mode = 7
-   8,    5,6,5,        1,    5,    3,    0x16,  124,   // Mode = 8
-   8,    5,5,6,        1,    5,    3,    0x1a,  124,   // Mode = 9
-   6,    6,6,6,        0,    5,    3,    0x1e,  496,   // Mode = 10
+   { 10,   {5,5,5},       1,    2,    3,    0x00,  31 },    // Mode = 1
+   { 7,    {6,6,6},       1,    2,    3,    0x01,  248},   // Mode = 2
+   { 11,   {5,4,4},       1,    5,    3,    0x02,  15 },    // Mode = 3
+   { 11,   {4,5,4},       1,    5,    3,    0x06,  15 },    // Mode = 4 
+   { 11,   {4,4,5},       1,    5,    3,    0x0a,  15 },    // Mode = 5
+   { 9,    {5,5,5},       1,    5,    3,    0x0e,  62 },    // Mode = 6
+   { 8,    {6,5,5},       1,    5,    3,    0x12,  124},   // Mode = 7
+   { 8,    {5,6,5},       1,    5,    3,    0x16,  124},   // Mode = 8
+   { 8,    {5,5,6},       1,    5,    3,    0x1a,  124},   // Mode = 9
+   { 6,    {6,6,6},       0,    5,    3,    0x1e,  496},   // Mode = 10

   // One region Partition    
-   10,   10,10,10,     0,    5,    4,    0x03,  31,    // Mode = 11
-   11,   9,9,9,        1,    5,    4,    0x07,  15,    // Mode = 12
-   12,   8,8,8,        1,    5,    4,    0x0b,  7,     // Mode = 13
-   16,   4,4,4,        1,    5,    4,    0x0f,  1,     // Mode = 14
+   {10,   {10,10,10},     0,    5,    4,    0x03,  31},    // Mode = 11
+   {11,   {9,9,9   },     1,    5,    4,    0x07,  15},    // Mode = 12
+   {12,   {8,8,8   },     1,    5,    4,    0x0b,  7 },     // Mode = 13
+   {16,   {4,4,4   },     1,    5,    4,    0x0f,  1 }     // Mode = 14
 };

 //================================================
--- a/extern/CMP_Core/shaders/BC6_Encode_kernel.hlsl
+++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.hlsl
--- a/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp
+++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp
@ -1,6 +1,6 @@
 //=====================================================================
-// Copyright (c) 2019    Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
+// 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
@ -106,6 +106,7 @@ CGU_INT expandbits(CGU_INT bits, CGU_INT v)
 }

 CMP_EXPORT CGU_INT bc7_isa() {
+#ifndef ASPM_GPU
 #if defined(ISPC_TARGET_SSE2)
    ASPM_PRINT(("SSE2"));
    return 0;
@ -120,8 +121,9 @@ CMP_EXPORT CGU_INT bc7_isa() {
    return 3;
 #else
    ASPM_PRINT(("CPU"));
-    return -1;
 #endif
+#endif
+    return -1;
 }

 CMP_EXPORT void init_BC7ramps()
@ -528,139 +530,6 @@ void GetProjecedImage(

 INLINE CGV_UINT8 get_partition_subset(CGV_INT part_id, CGU_INT maxSubsets, CGV_INT index)
 {
-   CMP_STATIC  uniform CMP_CONSTANT   CGU_UINT32 subset_mask_table[] = {
-        // 2 subset region patterns
-        0x0000CCCCu, // 0   1100 1100 1100 1100  (MSB..LSB)
-        0x00008888u, // 1   1000 1000 1000 1000
-        0x0000EEEEu, // 2   1110 1110 1110 1110
-        0x0000ECC8u, // 3   1110 1100 1100 1000
-        0x0000C880u, // 4   1100 1000 1000 0000
-        0x0000FEECu, // 5   1111 1110 1110 1100
-        0x0000FEC8u, // 6   1111 1110 1100 1000
-        0x0000EC80u, // 7   1110 1100 1000 0000
-        0x0000C800u, // 8   1100 1000 0000 0000
-        0x0000FFECu, // 9   1111 1111 1110 1100
-        0x0000FE80u, // 10  1111 1110 1000 0000
-        0x0000E800u, // 11  1110 1000 0000 0000
-        0x0000FFE8u, // 12  1111 1111 1110 1000
-        0x0000FF00u, // 13  1111 1111 0000 0000
-        0x0000FFF0u, // 14  1111 1111 1111 0000
-        0x0000F000u, // 15  1111 0000 0000 0000
-        0x0000F710u, // 16  1111 0111 0001 0000
-        0x0000008Eu, // 17  0000 0000 1000 1110
-        0x00007100u, // 18  0111 0001 0000 0000
-        0x000008CEu, // 19  0000 1000 1100 1110
-        0x0000008Cu, // 20  0000 0000 1000 1100
-        0x00007310u, // 21  0111 0011 0001 0000
-        0x00003100u, // 22  0011 0001 0000 0000
-        0x00008CCEu, // 23  1000 1100 1100 1110
-        0x0000088Cu, // 24  0000 1000 1000 1100
-        0x00003110u, // 25  0011 0001 0001 0000
-        0x00006666u, // 26  0110 0110 0110 0110
-        0x0000366Cu, // 27  0011 0110 0110 1100
-        0x000017E8u, // 28  0001 0111 1110 1000
-        0x00000FF0u, // 29  0000 1111 1111 0000
-        0x0000718Eu, // 30  0111 0001 1000 1110
-        0x0000399Cu, // 31  0011 1001 1001 1100
-        0x0000AAAAu, // 32  1010 1010 1010 1010
-        0x0000F0F0u, // 33  1111 0000 1111 0000
-        0x00005A5Au, // 34  0101 1010 0101 1010
-        0x000033CCu, // 35  0011 0011 1100 1100
-        0x00003C3Cu, // 36  0011 1100 0011 1100
-        0x000055AAu, // 37  0101 0101 1010 1010
-        0x00009696u, // 38  1001 0110 1001 0110
-        0x0000A55Au, // 39  1010 0101 0101 1010
-        0x000073CEu, // 40  0111 0011 1100 1110
-        0x000013C8u, // 41  0001 0011 1100 1000
-        0x0000324Cu, // 42  0011 0010 0100 1100
-        0x00003BDCu, // 43  0011 1011 1101 1100
-        0x00006996u, // 44  0110 1001 1001 0110
-        0x0000C33Cu, // 45  1100 0011 0011 1100
-        0x00009966u, // 46  1001 1001 0110 0110
-        0x00000660u, // 47  0000 0110 0110 0000
-        0x00000272u, // 48  0000 0010 0111 0010
-        0x000004E4u, // 49  0000 0100 1110 0100
-        0x00004E40u, // 50  0100 1110 0100 0000
-        0x00002720u, // 51  0010 0111 0010 0000
-        0x0000C936u, // 52  1100 1001 0011 0110
-        0x0000936Cu, // 53  1001 0011 0110 1100
-        0x000039C6u, // 54  0011 1001 1100 0110
-        0x0000639Cu, // 55  0110 0011 1001 1100
-        0x00009336u, // 56  1001 0011 0011 0110
-        0x00009CC6u, // 57  1001 1100 1100 0110
-        0x0000817Eu, // 58  1000 0001 0111 1110
-        0x0000E718u, // 59  1110 0111 0001 1000
-        0x0000CCF0u, // 60  1100 1100 1111 0000
-        0x00000FCCu, // 61  0000 1111 1100 1100
-        0x00007744u, // 62  0111 0111 0100 0100
-        0x0000EE22u, // 63  1110 1110 0010 0010
-        // 3 Subset region patterns
-        0xF60008CCu,// 0    1111 0110 0000 0000 : 0000 1000 1100 1100 = 2222122011001100 (MSB...LSB)
-        0x73008CC8u,// 1    0111 0011 0000 0000 : 1000 1100 1100 1000 = 1222112211001000
-        0x3310CC80u,// 2    0011 0011 0001 0000 : 1100 1100 1000 0000 = 1122112210020000
-        0x00CEEC00u,// 3    0000 0000 1100 1110 : 1110 1100 0000 0000 = 1110110022002220
-        0xCC003300u,// 4    1100 1100 0000 0000 : 0011 0011 0000 0000 = 2211221100000000
-        0xCC0000CCu,// 5    1100 1100 0000 0000 : 0000 0000 1100 1100 = 2200220011001100
-        0x00CCFF00u,// 6    0000 0000 1100 1100 : 1111 1111 0000 0000 = 1111111122002200
-        0x3300CCCCu,// 7    0011 0011 0000 0000 : 1100 1100 1100 1100 = 1122112211001100
-        0xF0000F00u,// 8    1111 0000 0000 0000 : 0000 1111 0000 0000 = 2222111100000000
-        0xF0000FF0u,// 9    1111 0000 0000 0000 : 0000 1111 1111 0000 = 2222111111110000
-        0xFF0000F0u,// 10   1111 1111 0000 0000 : 0000 0000 1111 0000 = 2222222211110000
-        0x88884444u,// 11   1000 1000 1000 1000 : 0100 0100 0100 0100 = 2100210021002100
-        0x88886666u,// 12   1000 1000 1000 1000 : 0110 0110 0110 0110 = 2110211021102110
-        0xCCCC2222u,// 13   1100 1100 1100 1100 : 0010 0010 0010 0010 = 2210221022102210
-        0xEC80136Cu,// 14   1110 1100 1000 0000 : 0001 0011 0110 1100 = 2221221121101100
-        0x7310008Cu,// 15   0111 0011 0001 0000 : 0000 0000 1000 1100 = 0222002210021100
-        0xC80036C8u,// 16   1100 1000 0000 0000 : 0011 0110 1100 1000 = 2211211011001000
-        0x310008CEu,// 17   0011 0001 0000 0000 : 0000 1000 1100 1110 = 0022100211001110
-        0xCCC03330u,// 18   1100 1100 1100 0000 : 0011 0011 0011 0000 = 2211221122110000
-        0x0CCCF000u,// 19   0000 1100 1100 1100 : 1111 0000 0000 0000 = 1111220022002200
-        0xEE0000EEu,// 20   1110 1110 0000 0000 : 0000 0000 1110 1110 = 2220222011101110
-        0x77008888u,// 21   0111 0111 0000 0000 : 1000 1000 1000 1000 = 1222122210001000
-        0xCC0022C0u,// 22   1100 1100 0000 0000 : 0010 0010 1100 0000 = 2210221011000000
-        0x33004430u,// 23   0011 0011 0000 0000 : 0100 0100 0011 0000 = 0122012200110000
-        0x00CC0C22u,// 24   0000 0000 1100 1100 : 0000 1100 0010 0010 = 0000110022102210
-        0xFC880344u,// 25   1111 1100 1000 1000 : 0000 0011 0100 0100 = 2222221121002100
-        0x06606996u,// 26   0000 0110 0110 0000 : 0110 1001 1001 0110 = 0110122112210110
-        0x66009960u,// 27   0110 0110 0000 0000 : 1001 1001 0110 0000 = 1221122101100000
-        0xC88C0330u,// 28   1100 1000 1000 1100 : 0000 0011 0011 0000 = 2200201120112200
-        0xF9000066u,// 29   1111 1001 0000 0000 : 0000 0000 0110 0110 = 2222200201100110
-        0x0CC0C22Cu,// 30   0000 1100 1100 0000 : 1100 0010 0010 1100 = 1100221022101100
-        0x73108C00u,// 31   0111 0011 0001 0000 : 1000 1100 0000 0000 = 1222112200020000
-        0xEC801300u,// 32   1110 1100 1000 0000 : 0001 0011 0000 0000 = 2221221120000000
-        0x08CEC400u,// 33   0000 1000 1100 1110 : 1100 0100 0000 0000 = 1100210022002220
-        0xEC80004Cu,// 34   1110 1100 1000 0000 : 0000 0000 0100 1100 = 2220220021001100
-        0x44442222u,// 35   0100 0100 0100 0100 : 0010 0010 0010 0010 = 0210021002100210
-        0x0F0000F0u,// 36   0000 1111 0000 0000 : 0000 0000 1111 0000 = 0000222211110000
-        0x49242492u,// 37   0100 1001 0010 0100 : 0010 0100 1001 0010 = 0210210210210210
-        0x42942942u,// 38   0100 0010 1001 0100 : 0010 1001 0100 0010 = 0210102121020210
-        0x0C30C30Cu,// 39   0000 1100 0011 0000 : 1100 0011 0000 1100 = 1100221100221100
-        0x03C0C03Cu,// 40   0000 0011 1100 0000 : 1100 0000 0011 1100 = 1100002222111100
-        0xFF0000AAu,// 41   1111 1111 0000 0000 : 0000 0000 1010 1010 = 2222222210101010
-        0x5500AA00u,// 42   0101 0101 0000 0000 : 1010 1010 0000 0000 = 1212121200000000
-        0xCCCC3030u,// 43   1100 1100 1100 1100 : 0011 0000 0011 0000 = 2211220022112200
-        0x0C0CC0C0u,// 44   0000 1100 0000 1100 : 1100 0000 1100 0000 = 1100220011002200
-        0x66669090u,// 45   0110 0110 0110 0110 : 1001 0000 1001 0000 = 1221022012210220
-        0x0FF0A00Au,// 46   0000 1111 1111 0000 : 1010 0000 0000 1010 = 1010222222221010
-        0x5550AAA0u,// 47   0101 0101 0101 0000 : 1010 1010 1010 0000 = 1212121212120000
-        0xF0000AAAu,// 48   1111 0000 0000 0000 : 0000 1010 1010 1010 = 2222101010101010
-        0x0E0EE0E0u,// 49   0000 1110 0000 1110 : 1110 0000 1110 0000 = 1110222011102220
-        0x88887070u,// 50   1000 1000 1000 1000 : 0111 0000 0111 0000 = 2111200021112000
-        0x99906660u,// 51   1001 1001 1001 0000 : 0110 0110 0110 0000 = 2112211221120000
-        0xE00E0EE0u,// 52   1110 0000 0000 1110 : 0000 1110 1110 0000 = 2220111011102220
-        0x88880770u,// 53   1000 1000 1000 1000 : 0000 0111 0111 0000 = 2000211121112000
-        0xF0000666u,// 54   1111 0000 0000 0000 : 0000 0110 0110 0110 = 2222011001100110
-        0x99006600u,// 55   1001 1001 0000 0000 : 0110 0110 0000 0000 = 2112211200000000
-        0xFF000066u,// 56   1111 1111 0000 0000 : 0000 0000 0110 0110 = 2222222201100110
-        0xC00C0CC0u,// 57   1100 0000 0000 1100 : 0000 1100 1100 0000 = 2200110011002200
-        0xCCCC0330u,// 58   1100 1100 1100 1100 : 0000 0011 0011 0000 = 2200221122112200
-        0x90006000u,// 59   1001 0000 0000 0000 : 0110 0000 0000 0000 = 2112000000000000
-        0x08088080u,// 60   0000 1000 0000 1000 : 1000 0000 1000 0000 = 1000200010002000
-        0xEEEE1010u,// 61   1110 1110 1110 1110 : 0001 0000 0001 0000 = 2221222022212220
-        0xFFF0000Au,// 62   1111 1111 1111 0000 : 0000 0000 0000 1010 = 2222222222221010
-        0x731008CEu,// 63   0111 0011 0001 0000 : 0000 1000 1100 1110 = 0222102211021110
-        };
-
  if (maxSubsets == 2)
  {
      CGV_UINT32 mask_packed = subset_mask_table[part_id];
@ -1029,14 +898,6 @@ INLINE CGV_EPOCODE ep_find_floor(
 {
 #ifdef ASPM_GPU // GPU Code 
    CGV_FLOAT rampf = 0.0F;
-    CMP_CONSTANT CGV_EPOCODE rampI[5*SOURCE_BLOCK_SIZE] = {
-    0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 0 bit index
-    0 ,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 1 bit index
-    0 ,21,43,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 2 bit index
-    0 ,9 ,18,27,37,46,55,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 3 bit index
-    0 ,4 ,9 ,13,17,21,26,30,34,38,43,47,51,55,60,64  // 4 bit index
-    };
-
    CGV_EPOCODE e1 = expand_epocode(p1, bits);
    CGV_EPOCODE e2 = expand_epocode(p2,bits);
    CGV_FLOAT ramp = gather_epocode(rampI,clogBC7*16+index)/64.0F;
@ -1077,21 +938,6 @@ INLINE CGV_EPOCODE ep_find_floor(

 INLINE void get_fixuptable(CGV_FIXUPINDEX  fixup[3], CGV_PARTID  part_id)
 {
-   // same as  CMP SDK v3.1 BC7_FIXUPINDEX1 &  BC7_FIXUPINDEX2 for each partition range 0..63
-   // The data is saved as a packed INT = (BC7_FIXUPINDEX1 << 4 + BC7_FIXUPINDEX2)
-   CMP_STATIC uniform __constant  CGV_FIXUPINDEX  FIXUPINDEX[] = {
-       // 2 subset partitions 0..63
-        0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u,
-        0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0x20u, 0x20u,
-        0xf0u, 0xf0u, 0x60u, 0x80u, 0x20u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0x60u,
-        0x60u, 0x20u, 0x60u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u,
-        // 3 subset partitions 64..128
-        0x3fu, 0x38u, 0xf8u, 0xf3u, 0x8fu, 0x3fu, 0xf3u, 0xf8u, 0x8fu, 0x8fu, 0x6fu, 0x6fu, 0x6fu, 0x5fu, 0x3fu, 0x38u,
-        0x3fu, 0x38u, 0x8fu, 0xf3u, 0x3fu, 0x38u, 0x6fu, 0xa8u, 0x53u, 0x8fu, 0x86u, 0x6au, 0x8fu, 0x5fu, 0xfau, 0xf8u,
-        0x8fu, 0xf3u, 0x3fu, 0x5au, 0x6au, 0xa8u, 0x89u, 0xfau, 0xf6u, 0x3fu, 0xf8u, 0x5fu, 0xf3u, 0xf6u, 0xf6u, 0xf8u,
-        0x3fu, 0xf3u, 0x5fu, 0x5fu, 0x5fu, 0x8fu, 0x5fu, 0xafu, 0x5fu, 0xafu, 0x8fu, 0xdfu, 0xf3u, 0xcfu, 0x3fu, 0x38u 
-   };
-
   CGV_FIXUPINDEX skip_packed = FIXUPINDEX[part_id];// gather_int2(FIXUPINDEX, part_id);
   fixup[0] = 0;
   fixup[1] = skip_packed>>4;
@ -1472,27 +1318,29 @@ CGV_ERROR   quant_solid_color(

            if (error_t < error_0)
            {
+                // We have a solid color: Use image src if on GPU
                image_log = iclogBC7;
                image_idx = image_log;
-                CGU_BOOL srcIsWhite = FALSE;
-                if ((image_src[0] == 255.0f)&&(image_src[1] == 255.0f)&&(image_src[2] == 255.0f)) srcIsWhite = TRUE;

+#ifdef ASPM_GPU  // This needs improving 
+                CGV_IMAGE MinC[4] = {255,255,255,255};
+                CGV_IMAGE MaxC[4] = {0,0,0,0};
+                // get min max colors
+                for (CGU_CHANNEL ch=0;ch<channels3or4; ch++) 
+                   for (CGV_ENTRIES k=0;k<numEntries;k++) 
+                   {
+                       if (image_src[k+ch*SOURCE_BLOCK_SIZE] < MinC[ch] ) MinC[ch] = image_src[k+ch*SOURCE_BLOCK_SIZE];
+                       if (image_src[k+ch*SOURCE_BLOCK_SIZE] > MaxC[ch] ) MaxC[ch] = image_src[k+ch*SOURCE_BLOCK_SIZE];
+                   }
+                for (CGU_CHANNEL ch = 0; ch<channels3or4; ch++)
+                {
+                     epo_0[ch]     = MinC[ch];
+                     epo_0[4 + ch] = MaxC[ch];
+                }
+
+#else           // This is good on CPU
                for (CGU_CHANNEL ch = 0; ch<channels3or4; ch++)
                {
-#ifdef ASPM_GPU
-                    if (srcIsWhite == TRUE)
-                    {
-                        // Default White block!
-                        epo_0[  ch] = 0x7F;
-                        epo_0[4+ch] = 0x7F;
-                    }
-                    else
-                    {
-                        // Default black block!
-                        epo_0[  ch] = 0;
-                        epo_0[4+ch] = 0;
-                    }
-#else
 #ifdef USE_BC7_SP_ERR_IDX
                    if (BC7EncodeRamps.ramp_init) {
                        CGV_EPOCODE index = (CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits[ch])*256*2*2*16*2)+(epo_dr_0[ch]*2*2*16*2)+(t1o[ch]*2*16*2)+(t2o[ch]*16*2)+(iclogBC7*2);
@ -1500,15 +1348,15 @@ CGV_ERROR   quant_solid_color(
                        epo_0[4+ch] = BC7EncodeRamps.sp_idx[index+1]&0xFF;// gather_epocode(u_BC7Encode->sp_idx,index+1)&0xFF;
                    }
                    else {
-                        epo_0[ch] = 0;
+                        epo_0[ch]     = 0;
                        epo_0[4 + ch] = 0;
                    }
 #else
                    epo_0[  ch] = 0;
                    epo_0[4+ch] = 0;
-#endif
 #endif
                }
+#endif
                error_0 = error_t;
            }
            //if (error_0 == 0)
@ -1980,7 +1828,11 @@ INLINE void cmp_encode_swap(CGV_EPOCODE endpoint[], CGU_INT channels, CGV_INDEX
   {
      cmp_swap_epo(&endpoint[0], &endpoint[channels], channels);
      for (CGU_INT k=0; k<SOURCE_BLOCK_SIZE; k++)
+#ifdef ASPM_GPU
+         block_index[k] = (levels-1) - block_index[k];
+#else
         block_index[k] = CGV_INDEX(levels-1) - block_index[k];
+#endif
   }
 }

@ -1994,6 +1846,7 @@ void cmp_encode_index(CGV_CMPOUT data[16], CGU_INT* uniform pPos, CGV_INDEX bloc
   }
 }

+
 void encode_endpoint(CGV_CMPOUT data[16], CGU_INT* uniform pPos, CGV_BYTE block_index[16],  CGU_INT bits, CGV_SHIFT32 flips)
 {
   CGU_INT      levels = 1 << bits;
@ -2006,8 +1859,8 @@ void encode_endpoint(CGV_CMPOUT data[16], CGU_INT* uniform pPos, CGV_BYTE block_
         CGV_TYPEINT q = qbits_shifted&15;
         if ((flips_shifted&1)>0) q = (levels-1)-q;

-         if (k1==0 && k2==0)   cmp_Write8Bit(data, pPos, bits - 1, static_cast <CGV_BYTE>(q));
-         else                  cmp_Write8Bit(data, pPos, bits, static_cast<CGV_BYTE>(q));
+         if (k1==0 && k2==0)   cmp_Write8Bit(data, pPos, bits-1, CMP_STATIC_CAST(CGV_BYTE,q));
+         else                  cmp_Write8Bit(data, pPos, bits  , CMP_STATIC_CAST(CGV_BYTE,q));
         qbits_shifted >>= 4;
         flips_shifted >>= 1;
      }
@ -2236,10 +2089,10 @@ void  Encode_mode4( CGV_CMPOUT     cmp_out[COMPRESSED_BLOCK_SIZE],
    cmp_Write8Bit(cmp_out,&bitPosition,1,1);

    // rotation 2 bits
-    cmp_Write8Bit(cmp_out, &bitPosition, 2, static_cast <CGV_BYTE> (params->rotated_channel));
+    cmp_Write8Bit(cmp_out,&bitPosition,2, CMP_STATIC_CAST(CGV_BYTE,params->rotated_channel));

    // idxMode 1 bit
-    cmp_Write8Bit(cmp_out, &bitPosition, 1, static_cast <CGV_BYTE> (params->idxMode));
+    cmp_Write8Bit(cmp_out, &bitPosition, 1,CMP_STATIC_CAST(CGV_BYTE,params->idxMode));

    CGU_INT   idxBits[2] = {2,3};

@ -2264,14 +2117,14 @@ void  Encode_mode4( CGV_CMPOUT     cmp_out[COMPRESSED_BLOCK_SIZE],
   // B0 : B1
   for (CGU_INT component=0; component < 3; component++)
   {
-        cmp_Write8Bit(cmp_out, &bitPosition, 5, static_cast<CGV_BYTE> (params->color_qendpoint[component]));
-        cmp_Write8Bit(cmp_out, &bitPosition, 5, static_cast <CGV_BYTE> (params->color_qendpoint[4 + component]));
+         cmp_Write8Bit(cmp_out,&bitPosition,5,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[component]));
+         cmp_Write8Bit(cmp_out,&bitPosition,5,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[4 + component]));
   }

   // alpha endpoints (6 bits each)
   // A0 : A1
-   cmp_Write8Bit(cmp_out, &bitPosition, 6, static_cast<CGV_BYTE> (params->alpha_qendpoint[0]));
-   cmp_Write8Bit(cmp_out, &bitPosition, 6, static_cast<CGV_BYTE> (params->alpha_qendpoint[4]));
+   cmp_Write8Bit(cmp_out,&bitPosition,6,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[0]));
+   cmp_Write8Bit(cmp_out,&bitPosition,6,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[4]));

    // index 2 bits each  (31 bits total)
    cmp_encode_index(cmp_out, &bitPosition, params->color_index, 2);
@ -2289,7 +2142,7 @@ void  Encode_mode5( CGV_CMPOUT     cmp_out[COMPRESSED_BLOCK_SIZE],
    cmp_Write8Bit(cmp_out,&bitPosition,1,1);

    // Write 2 bit rotation
-    cmp_Write8Bit(cmp_out, &bitPosition, 2, static_cast<CGV_BYTE> (params->rotated_channel));
+    cmp_Write8Bit(cmp_out,&bitPosition,2, CMP_STATIC_CAST(CGV_BYTE,params->rotated_channel));

    cmp_encode_swap(params->color_qendpoint, 4, params->color_index,2);
    cmp_encode_swap(params->alpha_qendpoint, 4, params->alpha_index,2);
@ -2300,14 +2153,14 @@ void  Encode_mode5( CGV_CMPOUT     cmp_out[COMPRESSED_BLOCK_SIZE],
   // B0 : B1
   for (CGU_INT component=0; component < 3; component++)
   {
-        cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast<CGV_BYTE> (params->color_qendpoint[component]));
-        cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast <CGV_BYTE> (params->color_qendpoint[4 + component]));
+         cmp_Write8Bit(cmp_out,&bitPosition,7,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[component]));
+         cmp_Write8Bit(cmp_out,&bitPosition,7,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[4 + component]));
   }

   // alpha endpoints (8 bits each)
   // A0 : A1
-   cmp_Write8Bit(cmp_out, &bitPosition, 8, static_cast<CGV_BYTE> (params->alpha_qendpoint[0]));
-   cmp_Write8Bit(cmp_out, &bitPosition, 8, static_cast<CGV_BYTE> (params->alpha_qendpoint[4]));
+   cmp_Write8Bit(cmp_out,&bitPosition,8,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[0]));
+   cmp_Write8Bit(cmp_out,&bitPosition,8,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[4]));


   // color index 2 bits each  (31 bits total)
@ -2332,8 +2185,8 @@ void  Encode_mode6(
    // endpoints
    for (CGU_INT p=0; p<4; p++)
    {
-        cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast<CGV_BYTE> (epo_code[0 + p] >> 1));
-        cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast<CGV_BYTE> (epo_code[4 + p] >> 1));
+        cmp_Write8Bit(cmp_out, &bitPosition, 7, CMP_STATIC_CAST(CGV_BYTE,epo_code[0 + p] >> 1));
+        cmp_Write8Bit(cmp_out, &bitPosition, 7, CMP_STATIC_CAST(CGV_BYTE,epo_code[4 + p] >> 1));
    }

    // p bits
@ -2348,7 +2201,7 @@ void  Encode_mode6(
 void  Compress_mode01237(
                    CGU_INT             blockMode,
                    BC7_EncodeState     EncodeState[],
-uniform CMP_GLOBAL    BC7_Encode          u_BC7Encode[])
+uniform CMP_GLOBAL  BC7_Encode          u_BC7Encode[])
 {
    CGV_INDEX       storedBestindex[MAX_PARTITIONS][MAX_SUBSETS][MAX_SUBSET_SIZE];
    CGV_ERROR       storedError[MAX_PARTITIONS];
@ -2417,7 +2270,7 @@ uniform CMP_GLOBAL    BC7_Encode          u_BC7Encode[])
        GetPartitionSubSet_mode01237(
                  image_subsets,
                  subset_entryCount,
-                  static_cast<CGV_UINT8>(mode_blockPartition),
+                  CMP_STATIC_CAST(CGV_UINT8,mode_blockPartition),
                  EncodeState->image_src,
                  blockMode,
                  EncodeState->channels3or4);
@ -2526,7 +2379,7 @@ uniform CMP_GLOBAL    BC7_Encode          u_BC7Encode[])
                                   tmp_epo_code,
                                   src_image_block,
                                   numEntries,
-                                   static_cast<CGU_INT8>(EncodeState->clusters),  // Mi_
+                                   CMP_STATIC_CAST(CGU_INT8,EncodeState->clusters),  // Mi_
                                   EncodeState->bits,
                                   EncodeState->channels3or4,
                                   u_BC7Encode);
@ -2735,7 +2588,7 @@ uniform CMP_GLOBAL    BC7_Encode          u_BC7Encode[])
                                             src_color_Block,
                                             SOURCE_BLOCK_SIZE,
                                             EncodeState->numClusters0[idxMode],
-                                             static_cast<CGU_INT8>(EncodeState->modeBits[0]),
+                                             CMP_STATIC_CAST(CGU_UINT8,EncodeState->modeBits[0]),
                                             3,
                                             u_BC7Encode);

@ -2746,7 +2599,7 @@ uniform CMP_GLOBAL    BC7_Encode          u_BC7Encode[])
                                               src_alpha_Block,
                                               SOURCE_BLOCK_SIZE,
                                               EncodeState->numClusters1[idxMode],
-                                               static_cast<CGU_UINT8>(EncodeState->modeBits[1]),
+                                               CMP_STATIC_CAST(CGU_UINT8,EncodeState->modeBits[1]),
                                               3,
                                               u_BC7Encode) / 3.0f;

@ -4574,6 +4427,7 @@ uniform CMP_GLOBAL    BC7_Encode       u_BC7Encode[])
        CGU_INT      Mode = 0x0001 << blockMode;
        if (!(u_BC7Encode->validModeMask & Mode))
            continue;
+
        switch (blockMode)
        {
        // image processing with no alpha
@ -4802,8 +4656,8 @@ void GetBC7Ramp(CGU_UINT32 endpoint[][MAX_DIMENSION_BIG],
            ep[0][i] += (CGU_UINT32)(ep[0][i] >> componentBits[i]);
            ep[1][i] += (CGU_UINT32)(ep[1][i] >> componentBits[i]);

-            ep[0][i] = min8(255, max8(0, static_cast<CGU_UINT8>(ep[0][i])));
-            ep[1][i] = min8(255, max8(0, static_cast<CGU_UINT8>(ep[1][i])));
+            ep[0][i] = min8(255, max8(0,CMP_STATIC_CAST(CGU_UINT8,ep[0][i])));
+            ep[1][i] = min8(255, max8(0,CMP_STATIC_CAST(CGU_UINT8,ep[1][i])));
        }
    }

@ -4926,7 +4780,7 @@ void DecompressDualIndexBlock(
            // If this is a fixup index then clear the implicit bit
            if(j==0)
            {
-                blockIndices[i][j] &= ~(1 << (bti[m_blockMode].indexBits[i]-1));
+                blockIndices[i][j] &= ~(1 << (bti[m_blockMode].indexBits[i]-1U));
                for(k=0;k<static_cast <CGU_UINT32>(bti[m_blockMode].indexBits[i] - 1); k++)
                {
                    blockIndices[i][j] |= (CGU_UINT32)ReadBit(in,m_bitPosition) << k;
@ -5377,7 +5231,7 @@ int CMP_CDECL CompressBlockBC7( const unsigned char *srcBlock,
    EncodeState.best_err        = CMP_FLOAT_MAX;
    EncodeState.validModeMask   = u_BC7Encode->validModeMask;
    EncodeState.part_count      = u_BC7Encode->part_count;
-    EncodeState.channels        = static_cast<CGU_CHANNEL>(u_BC7Encode->channels);
+    EncodeState.channels        = CMP_STATIC_CAST(CGU_CHANNEL,u_BC7Encode->channels);

    CGU_UINT8 offsetR = 0;
    CGU_UINT8 offsetG = 16;
@ -5410,6 +5264,7 @@ int CMP_CDECL CompressBlockBC7( const unsigned char *srcBlock,
    return CGU_CORE_OK;
 }

+
 int  CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16],
                                  unsigned char srcBlock[64],
                                  const void *options = NULL) {
@ -5429,7 +5284,7 @@ int  CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16],
 #endif

 //============================================== OpenCL USER INTERFACE ====================================================
-#ifdef ASPM_GPU
+#ifdef ASPM_OPENCL
 CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const  CGU_Vec4uc      ImageSource[],
                                                  CMP_GLOBAL        CGV_CMPOUT      ImageDestination[],
                                          uniform CMP_GLOBAL        Source_Info     SourceInfo[],
@ -5438,21 +5293,21 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const  CGU_Vec4uc
    CGU_INT xID=0;
    CGU_INT yID=0;

-    xID = get_global_id(0);         // ToDo: Define a size_t 32 bit and 64 bit basd on clGetDeviceInfo
+    xID = get_global_id(0);         // ToDo: Define a size_t 32 bit and 64 bit based on clGetDeviceInfo
    yID = get_global_id(1);
-
    CGU_INT  srcWidth  = SourceInfo->m_src_width;
    CGU_INT  srcHeight = SourceInfo->m_src_height;
    if (xID >= (srcWidth  / BlockX)) return;
    if (yID >= (srcHeight / BlockY)) return;

+    //ASPM_PRINT(("[ASPM_OCL] %d %d  size %d\n",xID,yID,sizeof(BC7_Encode)));
+
    CGU_INT     destI = (xID*COMPRESSED_BLOCK_SIZE) + (yID*(srcWidth / BlockX)*COMPRESSED_BLOCK_SIZE);
    CGU_INT     srcindex = 4 * (yID * srcWidth + xID);
    CGU_INT     blkindex = 0;
    BC7_EncodeState EncodeState;
-    varying BC7_EncodeState* uniform state = &EncodeState;
-
-   copy_BC7_Encode_settings(state, BC7Encode);
+    cmp_memsetBC7(&EncodeState,0,sizeof(EncodeState));
+    copy_BC7_Encode_settings(&EncodeState, BC7Encode);

    //Check if it is a complete 4X4 block
    if (((xID + 1)*BlockX <= srcWidth) && ((yID + 1)*BlockY <= srcHeight))
@ -5460,10 +5315,10 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const  CGU_Vec4uc
        srcWidth = srcWidth - 4;
        for (CGU_INT j = 0; j < 4; j++) {
            for (CGU_INT i = 0; i < 4; i++) {
-                state->image_src[blkindex+0*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].x;
-                state->image_src[blkindex+1*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].y;
-                state->image_src[blkindex+2*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].z;
-                state->image_src[blkindex+3*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].w;
+                EncodeState.image_src[blkindex+0*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].x;
+                EncodeState.image_src[blkindex+1*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].y;
+                EncodeState.image_src[blkindex+2*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].z;
+                EncodeState.image_src[blkindex+3*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].w;
                blkindex++;
                srcindex++;
            }
@ -5471,13 +5326,21 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const  CGU_Vec4uc
            srcindex += srcWidth;
        }

-   copy_BC7_Encode_settings(state, BC7Encode);
-
    BC7_CompressBlock(&EncodeState, BC7Encode);

+    // printf("CMP %x %x %x %x %x %x %x",
+    // state->cmp_out[0],
+    // state->cmp_out[1],
+    // state->cmp_out[2],
+    // state->cmp_out[3],
+    // state->cmp_out[4],
+    // state->cmp_out[5],
+    // state->cmp_out[6]
+    // );
+
    for (CGU_INT i=0; i<COMPRESSED_BLOCK_SIZE; i++)
    {
-        ImageDestination[destI+i] = state->cmp_out[i];
+        ImageDestination[destI+i] = EncodeState.cmp_out[i];
    }

    }
--- a/extern/CMP_Core/shaders/BC7_Encode_Kernel.h
+++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.h
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020    Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -23,6 +23,8 @@
 #ifndef BC7_ENCODE_KERNEL_H
 #define BC7_ENCODE_KERNEL_H

+#pragma warning(disable:4505)  // disable warnings on unreferenced local function has been removed
+
 #if defined(ISPC)||defined(ASPM)
 //#include "..\..\Common\Common_Def.h"
 #include "Common_Def.h"
@ -252,7 +254,6 @@ BC7_Encode
    CGU_INT    refineIterations;
    CGU_INT    part_count;
    CGU_INT    channels;
-
 } 
 #ifndef ASPM
 BC7_Encode
@ -569,6 +570,163 @@ CMP_CONSTANT CGU_UINT8    par_vectors_nd[2][8][64][2][4] = {
 },
 };

+CMP_CONSTANT CGU_UINT32 subset_mask_table[] = {
+        // 2 subset region patterns
+        0x0000CCCCu, // 0   1100 1100 1100 1100  (MSB..LSB)
+        0x00008888u, // 1   1000 1000 1000 1000
+        0x0000EEEEu, // 2   1110 1110 1110 1110
+        0x0000ECC8u, // 3   1110 1100 1100 1000
+        0x0000C880u, // 4   1100 1000 1000 0000
+        0x0000FEECu, // 5   1111 1110 1110 1100
+        0x0000FEC8u, // 6   1111 1110 1100 1000
+        0x0000EC80u, // 7   1110 1100 1000 0000
+        0x0000C800u, // 8   1100 1000 0000 0000
+        0x0000FFECu, // 9   1111 1111 1110 1100
+        0x0000FE80u, // 10  1111 1110 1000 0000
+        0x0000E800u, // 11  1110 1000 0000 0000
+        0x0000FFE8u, // 12  1111 1111 1110 1000
+        0x0000FF00u, // 13  1111 1111 0000 0000
+        0x0000FFF0u, // 14  1111 1111 1111 0000
+        0x0000F000u, // 15  1111 0000 0000 0000
+        0x0000F710u, // 16  1111 0111 0001 0000
+        0x0000008Eu, // 17  0000 0000 1000 1110
+        0x00007100u, // 18  0111 0001 0000 0000
+        0x000008CEu, // 19  0000 1000 1100 1110
+        0x0000008Cu, // 20  0000 0000 1000 1100
+        0x00007310u, // 21  0111 0011 0001 0000
+        0x00003100u, // 22  0011 0001 0000 0000
+        0x00008CCEu, // 23  1000 1100 1100 1110
+        0x0000088Cu, // 24  0000 1000 1000 1100
+        0x00003110u, // 25  0011 0001 0001 0000
+        0x00006666u, // 26  0110 0110 0110 0110
+        0x0000366Cu, // 27  0011 0110 0110 1100
+        0x000017E8u, // 28  0001 0111 1110 1000
+        0x00000FF0u, // 29  0000 1111 1111 0000
+        0x0000718Eu, // 30  0111 0001 1000 1110
+        0x0000399Cu, // 31  0011 1001 1001 1100
+        0x0000AAAAu, // 32  1010 1010 1010 1010
+        0x0000F0F0u, // 33  1111 0000 1111 0000
+        0x00005A5Au, // 34  0101 1010 0101 1010
+        0x000033CCu, // 35  0011 0011 1100 1100
+        0x00003C3Cu, // 36  0011 1100 0011 1100
+        0x000055AAu, // 37  0101 0101 1010 1010
+        0x00009696u, // 38  1001 0110 1001 0110
+        0x0000A55Au, // 39  1010 0101 0101 1010
+        0x000073CEu, // 40  0111 0011 1100 1110
+        0x000013C8u, // 41  0001 0011 1100 1000
+        0x0000324Cu, // 42  0011 0010 0100 1100
+        0x00003BDCu, // 43  0011 1011 1101 1100
+        0x00006996u, // 44  0110 1001 1001 0110
+        0x0000C33Cu, // 45  1100 0011 0011 1100
+        0x00009966u, // 46  1001 1001 0110 0110
+        0x00000660u, // 47  0000 0110 0110 0000
+        0x00000272u, // 48  0000 0010 0111 0010
+        0x000004E4u, // 49  0000 0100 1110 0100
+        0x00004E40u, // 50  0100 1110 0100 0000
+        0x00002720u, // 51  0010 0111 0010 0000
+        0x0000C936u, // 52  1100 1001 0011 0110
+        0x0000936Cu, // 53  1001 0011 0110 1100
+        0x000039C6u, // 54  0011 1001 1100 0110
+        0x0000639Cu, // 55  0110 0011 1001 1100
+        0x00009336u, // 56  1001 0011 0011 0110
+        0x00009CC6u, // 57  1001 1100 1100 0110
+        0x0000817Eu, // 58  1000 0001 0111 1110
+        0x0000E718u, // 59  1110 0111 0001 1000
+        0x0000CCF0u, // 60  1100 1100 1111 0000
+        0x00000FCCu, // 61  0000 1111 1100 1100
+        0x00007744u, // 62  0111 0111 0100 0100
+        0x0000EE22u, // 63  1110 1110 0010 0010
+        // 3 Subset region patterns
+        0xF60008CCu,// 0    1111 0110 0000 0000 : 0000 1000 1100 1100 = 2222122011001100 (MSB...LSB)
+        0x73008CC8u,// 1    0111 0011 0000 0000 : 1000 1100 1100 1000 = 1222112211001000
+        0x3310CC80u,// 2    0011 0011 0001 0000 : 1100 1100 1000 0000 = 1122112210020000
+        0x00CEEC00u,// 3    0000 0000 1100 1110 : 1110 1100 0000 0000 = 1110110022002220
+        0xCC003300u,// 4    1100 1100 0000 0000 : 0011 0011 0000 0000 = 2211221100000000
+        0xCC0000CCu,// 5    1100 1100 0000 0000 : 0000 0000 1100 1100 = 2200220011001100
+        0x00CCFF00u,// 6    0000 0000 1100 1100 : 1111 1111 0000 0000 = 1111111122002200
+        0x3300CCCCu,// 7    0011 0011 0000 0000 : 1100 1100 1100 1100 = 1122112211001100
+        0xF0000F00u,// 8    1111 0000 0000 0000 : 0000 1111 0000 0000 = 2222111100000000
+        0xF0000FF0u,// 9    1111 0000 0000 0000 : 0000 1111 1111 0000 = 2222111111110000
+        0xFF0000F0u,// 10   1111 1111 0000 0000 : 0000 0000 1111 0000 = 2222222211110000
+        0x88884444u,// 11   1000 1000 1000 1000 : 0100 0100 0100 0100 = 2100210021002100
+        0x88886666u,// 12   1000 1000 1000 1000 : 0110 0110 0110 0110 = 2110211021102110
+        0xCCCC2222u,// 13   1100 1100 1100 1100 : 0010 0010 0010 0010 = 2210221022102210
+        0xEC80136Cu,// 14   1110 1100 1000 0000 : 0001 0011 0110 1100 = 2221221121101100
+        0x7310008Cu,// 15   0111 0011 0001 0000 : 0000 0000 1000 1100 = 0222002210021100
+        0xC80036C8u,// 16   1100 1000 0000 0000 : 0011 0110 1100 1000 = 2211211011001000
+        0x310008CEu,// 17   0011 0001 0000 0000 : 0000 1000 1100 1110 = 0022100211001110
+        0xCCC03330u,// 18   1100 1100 1100 0000 : 0011 0011 0011 0000 = 2211221122110000
+        0x0CCCF000u,// 19   0000 1100 1100 1100 : 1111 0000 0000 0000 = 1111220022002200
+        0xEE0000EEu,// 20   1110 1110 0000 0000 : 0000 0000 1110 1110 = 2220222011101110
+        0x77008888u,// 21   0111 0111 0000 0000 : 1000 1000 1000 1000 = 1222122210001000
+        0xCC0022C0u,// 22   1100 1100 0000 0000 : 0010 0010 1100 0000 = 2210221011000000
+        0x33004430u,// 23   0011 0011 0000 0000 : 0100 0100 0011 0000 = 0122012200110000
+        0x00CC0C22u,// 24   0000 0000 1100 1100 : 0000 1100 0010 0010 = 0000110022102210
+        0xFC880344u,// 25   1111 1100 1000 1000 : 0000 0011 0100 0100 = 2222221121002100
+        0x06606996u,// 26   0000 0110 0110 0000 : 0110 1001 1001 0110 = 0110122112210110
+        0x66009960u,// 27   0110 0110 0000 0000 : 1001 1001 0110 0000 = 1221122101100000
+        0xC88C0330u,// 28   1100 1000 1000 1100 : 0000 0011 0011 0000 = 2200201120112200
+        0xF9000066u,// 29   1111 1001 0000 0000 : 0000 0000 0110 0110 = 2222200201100110
+        0x0CC0C22Cu,// 30   0000 1100 1100 0000 : 1100 0010 0010 1100 = 1100221022101100
+        0x73108C00u,// 31   0111 0011 0001 0000 : 1000 1100 0000 0000 = 1222112200020000
+        0xEC801300u,// 32   1110 1100 1000 0000 : 0001 0011 0000 0000 = 2221221120000000
+        0x08CEC400u,// 33   0000 1000 1100 1110 : 1100 0100 0000 0000 = 1100210022002220
+        0xEC80004Cu,// 34   1110 1100 1000 0000 : 0000 0000 0100 1100 = 2220220021001100
+        0x44442222u,// 35   0100 0100 0100 0100 : 0010 0010 0010 0010 = 0210021002100210
+        0x0F0000F0u,// 36   0000 1111 0000 0000 : 0000 0000 1111 0000 = 0000222211110000
+        0x49242492u,// 37   0100 1001 0010 0100 : 0010 0100 1001 0010 = 0210210210210210
+        0x42942942u,// 38   0100 0010 1001 0100 : 0010 1001 0100 0010 = 0210102121020210
+        0x0C30C30Cu,// 39   0000 1100 0011 0000 : 1100 0011 0000 1100 = 1100221100221100
+        0x03C0C03Cu,// 40   0000 0011 1100 0000 : 1100 0000 0011 1100 = 1100002222111100
+        0xFF0000AAu,// 41   1111 1111 0000 0000 : 0000 0000 1010 1010 = 2222222210101010
+        0x5500AA00u,// 42   0101 0101 0000 0000 : 1010 1010 0000 0000 = 1212121200000000
+        0xCCCC3030u,// 43   1100 1100 1100 1100 : 0011 0000 0011 0000 = 2211220022112200
+        0x0C0CC0C0u,// 44   0000 1100 0000 1100 : 1100 0000 1100 0000 = 1100220011002200
+        0x66669090u,// 45   0110 0110 0110 0110 : 1001 0000 1001 0000 = 1221022012210220
+        0x0FF0A00Au,// 46   0000 1111 1111 0000 : 1010 0000 0000 1010 = 1010222222221010
+        0x5550AAA0u,// 47   0101 0101 0101 0000 : 1010 1010 1010 0000 = 1212121212120000
+        0xF0000AAAu,// 48   1111 0000 0000 0000 : 0000 1010 1010 1010 = 2222101010101010
+        0x0E0EE0E0u,// 49   0000 1110 0000 1110 : 1110 0000 1110 0000 = 1110222011102220
+        0x88887070u,// 50   1000 1000 1000 1000 : 0111 0000 0111 0000 = 2111200021112000
+        0x99906660u,// 51   1001 1001 1001 0000 : 0110 0110 0110 0000 = 2112211221120000
+        0xE00E0EE0u,// 52   1110 0000 0000 1110 : 0000 1110 1110 0000 = 2220111011102220
+        0x88880770u,// 53   1000 1000 1000 1000 : 0000 0111 0111 0000 = 2000211121112000
+        0xF0000666u,// 54   1111 0000 0000 0000 : 0000 0110 0110 0110 = 2222011001100110
+        0x99006600u,// 55   1001 1001 0000 0000 : 0110 0110 0000 0000 = 2112211200000000
+        0xFF000066u,// 56   1111 1111 0000 0000 : 0000 0000 0110 0110 = 2222222201100110
+        0xC00C0CC0u,// 57   1100 0000 0000 1100 : 0000 1100 1100 0000 = 2200110011002200
+        0xCCCC0330u,// 58   1100 1100 1100 1100 : 0000 0011 0011 0000 = 2200221122112200
+        0x90006000u,// 59   1001 0000 0000 0000 : 0110 0000 0000 0000 = 2112000000000000
+        0x08088080u,// 60   0000 1000 0000 1000 : 1000 0000 1000 0000 = 1000200010002000
+        0xEEEE1010u,// 61   1110 1110 1110 1110 : 0001 0000 0001 0000 = 2221222022212220
+        0xFFF0000Au,// 62   1111 1111 1111 0000 : 0000 0000 0000 1010 = 2222222222221010
+        0x731008CEu,// 63   0111 0011 0001 0000 : 0000 1000 1100 1110 = 0222102211021110
+        };
+
+   CMP_CONSTANT CGV_EPOCODE rampI[5*SOURCE_BLOCK_SIZE] = {
+    0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 0 bit index
+    0 ,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 1 bit index
+    0 ,21,43,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 2 bit index
+    0 ,9 ,18,27,37,46,55,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 3 bit index
+    0 ,4 ,9 ,13,17,21,26,30,34,38,43,47,51,55,60,64  // 4 bit index
+   };
+
+   // same as  CMP SDK v3.1 BC7_FIXUPINDEX1 &  BC7_FIXUPINDEX2 for each partition range 0..63
+   // The data is saved as a packed INT = (BC7_FIXUPINDEX1 << 4 + BC7_FIXUPINDEX2)
+   CMP_CONSTANT CGV_FIXUPINDEX  FIXUPINDEX[] = {
+       // 2 subset partitions 0..63
+        0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u,
+        0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0x20u, 0x20u,
+        0xf0u, 0xf0u, 0x60u, 0x80u, 0x20u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0x60u,
+        0x60u, 0x20u, 0x60u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u,
+        // 3 subset partitions 64..128
+        0x3fu, 0x38u, 0xf8u, 0xf3u, 0x8fu, 0x3fu, 0xf3u, 0xf8u, 0x8fu, 0x8fu, 0x6fu, 0x6fu, 0x6fu, 0x5fu, 0x3fu, 0x38u,
+        0x3fu, 0x38u, 0x8fu, 0xf3u, 0x3fu, 0x38u, 0x6fu, 0xa8u, 0x53u, 0x8fu, 0x86u, 0x6au, 0x8fu, 0x5fu, 0xfau, 0xf8u,
+        0x8fu, 0xf3u, 0x3fu, 0x5au, 0x6au, 0xa8u, 0x89u, 0xfau, 0xf6u, 0x3fu, 0xf8u, 0x5fu, 0xf3u, 0xf6u, 0xf6u, 0xf8u,
+        0x3fu, 0xf3u, 0x5fu, 0x5fu, 0x5fu, 0x8fu, 0x5fu, 0xafu, 0x5fu, 0xafu, 0x8fu, 0xdfu, 0xf3u, 0xcfu, 0x3fu, 0x38u 
+   };
+
+
 #ifndef ASPM_GPU
 // =============================== USED BY DECODER THIS CODE NEEDS TO BE UPDATED =========================================
 CMP_CONSTANT CGU_UINT32  BC7_FIXUPINDICES_LOCAL[MAX_SUBSETS][MAX_PARTITIONS][3] =
--- a/extern/CMP_Core/shaders/BC7_Encode_kernel.hlsl
+++ b/extern/CMP_Core/shaders/BC7_Encode_kernel.hlsl
--- a/extern/CMP_Core/shaders/BCn_Common_Kernel.h
+++ b/extern/CMP_Core/shaders/BCn_Common_Kernel.h
--- a/extern/CMP_Core/shaders/Common_Def.h
+++ b/extern/CMP_Core/shaders/Common_Def.h
@ -1,8 +1,5 @@
-#ifndef _COMMON_DEFINITIONS_H
-#define _COMMON_DEFINITIONS_H
-
 //===============================================================================
-// Copyright (c) 2007-2019 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2007-2020 Advanced Micro Devices, Inc. All rights reserved.
 // Copyright (c) 2004-2006 ATI Technologies Inc.
 //===============================================================================
 //
@ -25,11 +22,26 @@
 // THE SOFTWARE.
 //
 //
-//  File Name:   Common_Def.h
+//  File Name:   Common_Def
 //  Description: common definitions used for CPU/HPC/GPU
 //
 //////////////////////////////////////////////////////////////////////////////

+#ifndef _COMMON_DEFINITIONS_H
+#define _COMMON_DEFINITIONS_H
+
+// The shaders for UE4 require extension in the form of .ush in place of standard .h
+// this directive is used to make the change without users requiring to modify all of the include extensions
+// specific to UE4
+
+#ifdef ASPM_HLSL_UE4
+#pragma once
+#define INC_cmp_math_vec4   "cmp_math_vec4.ush"
+#define INC_cmp_math_func   "cmp_math_func.ush"
+#else
+#define INC_cmp_math_vec4   "cmp_math_vec4.h"
+#define INC_cmp_math_func   "cmp_math_func.h"
+#endif

 // Features
 #ifdef _WIN32
@ -44,15 +56,24 @@
 // Using OpenCL Compiler
 #ifdef __OPENCL_VERSION__
 #define  ASPM_GPU
+#define  ASPM_OPENCL
 #endif

+// Using DirectX fxc Compiler
+// Note use the /DASPM_HLSL command line to define this 
+#ifdef ASPM_HLSL
+#define  ASPM_GPU
+#endif

 #ifdef _LINUX
 #undef ASPM_GPU
+#undef ASPM_OPENCL
+#ifndef ASPM_HLSL
 #include <cstring>
 #include <cmath>
 #include <stdio.h>
-#include "cmp_math_vec4.h"
+#include INC_cmp_math_vec4
+#endif
 #endif

 #ifndef CMP_MAX
@ -63,6 +84,13 @@
 #define CMP_MIN(x, y) (((x) < (y)) ? (x) : (y))
 #endif

+#ifndef ASPM_GPU
+#define CMP_STATIC_CAST(x,y) static_cast<x>(y)
+#else
+#define CMP_STATIC_CAST(x,y) (x)(y)
+#endif
+
+
 #define CMP_SET_BC13_DECODER_RGBA       //  Sets mapping BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha 
                                        //       RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3]
                                        //  BC4 alpha always maps as AAAA to channels [0,1,2,3] 
@ -70,8 +98,8 @@

 //#define USE_BLOCK_LINEAR

-#define CMP_FLOAT_MAX       3.402823466e+38F // max value used to detect an Error in processing
-#define CMP_FLOAT_MAX_EXP   38
+#define CMP_FLOAT_MAX                   3.402823466e+38F // max value used to detect an Error in processing
+#define CMP_FLOAT_MAX_EXP               38
 #define USE_PROCESS_SEPERATE_ALPHA          // Enable this to use higher quality code using CompressDualIndexBlock
 #define COMPRESSED_BLOCK_SIZE           16  // Size of a compressed block in bytes
 #define MAX_DIMENSION_BIG               4   // Max number of channels  (RGBA)
@ -84,6 +112,75 @@
 //#define USE_BLOCK_LINEAR    // Source Data is organized in linear form for each block : Experimental Code not fully developed 
 //#define USE_DOUBLE          // Default is to use float, enable to use double data types only for float definitions

+//---------------------------------------------
+// Predefinitions for GPU and CPU compiled code
+//---------------------------------------------
+
+#ifdef ASPM_HLSL
+       // ==== Vectors ====
+       typedef float2  CGU_Vec2f;
+       typedef float2  CGV_Vec2f;
+       typedef float3  CGU_Vec3f;
+       typedef float3  CGV_Vec3f;
+       typedef float4  CGU_Vec4f;
+       typedef float4  CGV_Vec4f;
+
+       typedef int2    CGU_Vec2i;
+       typedef int2    CGV_Vec2i;
+       typedef uint2   CGU_Vec2ui;
+       typedef uint2   CGV_Vec2ui;
+
+       typedef int3    CGU_Vec3i;
+       typedef int3    CGV_Vec3i;
+       typedef uint3   CGU_Vec3ui;
+       typedef uint3   CGV_Vec3ui;
+
+       typedef uint4   CGU_Vec4ui;
+       typedef uint4   CGV_Vec4ui;
+
+       // ==== Scalar Types ==== to remove from code
+       typedef int                  CGU_INT8;
+       typedef uint                 CGU_INT;
+       typedef int                  CGV_INT;
+       typedef uint                 CGU_UINT8;
+       typedef uint                 CGU_UINT;
+
+       // ==== Scalar Types ====
+       typedef int                  CGU_BOOL;
+       typedef int                  CGV_BOOL;
+       typedef int                  CGU_INT32;
+       typedef int                  CGV_INT32;
+       typedef uint                 CGU_UINT32;
+       typedef uint                 CGV_UINT32;
+       typedef float                CGV_FLOAT;
+       typedef float                CGU_FLOAT;
+       typedef min16float           CGU_MIN16_FLOAT;    // FP16 GPU support defaults to 32 bit if no HW support
+
+        #define TRUE  1
+        #define FALSE 0
+        #define CMP_CDECL
+
+        #define BC7_ENCODECLASS
+        #define CMP_EXPORT
+        #define INLINE
+        #define uniform
+        #define varying
+        #define CMP_GLOBAL
+        #define CMP_KERNEL
+        #define CMP_CONSTANT
+        #define CMP_STATIC
+        #define CMP_REFINOUT
+        #define CMP_PTRINOUT
+        #define CMP_INOUT       inout
+        #define CMP_OUT         out
+        #define CMP_IN          in
+        #define CMP_UNUSED(x)   (x);
+        #define CMP_UNROLL      [unroll]
+
+
+
+#else
+
 typedef enum {
    CGU_CORE_OK = 0,                          // No errors, call was successfull
    CGU_CORE_ERR_UNKOWN,                      // An unknown error occurred
@ -95,26 +192,41 @@ typedef enum {
 } CGU_ERROR_CODES;


-//---------------------------------------------
-// Predefinitions for GPU and CPU compiled code
-//---------------------------------------------
-
-#ifdef ASPM_GPU  // GPU Based code
+#ifdef ASPM_OPENCL  // GPU Based code using OpenCL
        // ==== Vectors ====
        typedef float2  CGU_Vec2f;
        typedef float2  CGV_Vec2f;
        typedef float3  CMP_Vec3f;
        typedef float3  CGU_Vec3f;
        typedef float3  CGV_Vec3f;
+        typedef float4  CGU_Vec4f;
+        typedef float4  CGV_Vec4f;
+
        typedef uchar3  CGU_Vec3uc;
        typedef uchar3  CGV_Vec3uc;
+
        typedef uchar4  CMP_Vec4uc;
        typedef uchar4  CGU_Vec4uc;
        typedef uchar4  CGV_Vec4uc;

+        typedef int2   CGU_Vec2i;
+        typedef int2   CGV_Vec2i;
+        typedef int3   CGU_Vec3i;
+        typedef int3   CGV_Vec3i;
+        typedef int4   CGU_Vec4i;
+        typedef int4   CGV_Vec4i;
+
+        typedef uint2  CGU_Vec2ui;
+        typedef uint2  CGV_Vec2ui;
+        typedef uint3  CGU_Vec3ui;
+        typedef uint3  CGV_Vec3ui;
+        typedef uint4  CGU_Vec4ui;
+        typedef uint4  CGV_Vec4ui;
+
+
        #define USE_BC7_SP_ERR_IDX
-        #define ASPM_PRINT(args)      printf args
        #define BC7_ENCODECLASS
+        #define ASPM_PRINT(args)      printf args

        #define CMP_EXPORT
        #define INLINE
@ -124,13 +236,20 @@ typedef enum {
        #define CMP_KERNEL          __kernel
        #define CMP_CONSTANT        __constant
        #define CMP_STATIC
-
+        #define CMP_REFINOUT        &
+        #define CMP_PTRINOUT        *
+        #define CMP_INOUT
+        #define CMP_OUT
+        #define CMP_IN
+        #define CMP_UNUSED(x)
+        #define CMP_UNROLL

        typedef unsigned int        CGU_DWORD;      //32bits
        typedef int                 CGU_INT;        //32bits
-        typedef int                 CGU_BOOL;
+        typedef bool                CGU_BOOL;
        typedef unsigned short      CGU_SHORT;      //16bits
        typedef float               CGU_FLOAT;
+        typedef half                CGU_MIN16_FLOAT;    // FP16 GPU support defaults to 32 bit if no HW support
        typedef unsigned int        uint32;     // need to remove this def

        typedef int                 CGV_INT;
@ -163,6 +282,15 @@ typedef enum {
 #else
    // CPU & ASPM definitions

+    #define CMP_REFINOUT        &
+    #define CMP_PTRINOUT        *
+    #define CMP_INOUT
+    #define CMP_OUT
+    #define CMP_IN
+    #define CMP_UNUSED(x)       (void)(x);
+    #define CMP_UNROLL
+
+
    #ifdef ASPM // SPMD ,SIMD CPU code
        // using hybrid (CPU/GPU) aspm compiler 
        #define ASPM_PRINT(args)       print args
@ -185,6 +313,8 @@ typedef enum {
        typedef unsigned int64  uint64;
        typedef uniform float   CGU_FLOAT;
        typedef varying float   CGV_FLOAT;
+        typedef uniform float   CGU_MIN16_FLOAT;
+
        typedef uniform uint8   CGU_UINT8;
        typedef varying uint8   CGV_UINT8;

@ -192,18 +322,24 @@ typedef enum {
        typedef CGV_UINT8<4> CGV_Vec4uc;
        typedef CGU_UINT8<4> CGU_Vec4uc;

-        typedef CGU_FLOAT<3> CGU_Vec3f;
-        typedef CGV_FLOAT<3> CGV_Vec3f;
-
        typedef CGU_FLOAT<2> CGU_Vec2f;
        typedef CGV_FLOAT<2> CGV_Vec2f;
+        typedef CGU_FLOAT<3> CGU_Vec3f;
+        typedef CGV_FLOAT<3> CGV_Vec3f;
+        typedef CGU_FLOAT<4> CGU_Vec4f;
+        typedef CGV_FLOAT<4> CGV_Vec4f;
+
+        typedef CGU_UINT32<3> CGU_Vec3ui;
+        typedef CGV_UINT32<3> CGV_Vec3ui;
+
+        typedef CGU_UINT32<4> CGU_Vec4ui;
+        typedef CGV_UINT32<4> CGV_Vec4ui;

        #define CMP_CDECL
-
    #else   // standard CPU code
        #include <stdio.h>
        #include <string>
-        #include "cmp_math_vec4.h"
+        #include INC_cmp_math_vec4

        // using CPU compiler
        #define ASPM_PRINT(args)  printf args
@ -227,7 +363,7 @@ typedef enum {
        typedef unsigned long   uint64;

        typedef int8            CGV_BOOL;
-        typedef int8            CGU_BOOL;
+        typedef bool            CGU_BOOL;
        typedef int16           CGU_WORD;
        typedef uint8           CGU_SHORT;
        typedef int64           CGU_LONG;
@ -235,8 +371,19 @@ typedef enum {

        typedef uniform float   CGU_FLOAT;
        typedef varying float   CGV_FLOAT;
+        typedef uniform float   CGU_MIN16_FLOAT;
+
        typedef uniform uint8   CGU_UINT8;
        typedef varying uint8   CGV_UINT8;
+
+        typedef CMP_Vec3ui      CGU_Vec3ui;
+        typedef CMP_Vec3ui      CGV_Vec3ui;
+
+        typedef CMP_Vec4ui      CGU_Vec4ui;
+        typedef CMP_Vec4ui      CGV_Vec4ui;
+        typedef CMP_Vec4f       CGU_Vec4f;
+        typedef CMP_Vec4f       CGV_Vec4f;
+
        #if defined(WIN32) || defined(_WIN64)
        #define CMP_CDECL __cdecl
        #else
@ -275,9 +422,10 @@ typedef enum {
    typedef uint16                  CGV_UINT16;
    typedef uint32                  CGV_UINT32;
    typedef uint64                  CGV_UINT64;
-#endif // ASPM_GPU


+#endif // else ASPM_GPU
+
 typedef struct 
 {
    CGU_UINT32     m_src_width;
@ -287,14 +435,20 @@ typedef struct
    CGU_FLOAT      m_fquality;
 } Source_Info;

+typedef unsigned char*  CGU_PTR;
+
 // Ref Compute_CPU_HPC
 struct texture_surface
 {
-    CGU_UINT8*  ptr;
+    CGU_PTR     ptr;
    CGU_INT     width,
                height,
                stride;
    CGU_INT     channels;
 };

-#endif
+
+#endif // else ASPM_HLSL
+
+#endif // Common_Def.h
+ 
--- a/extern/CMP_Core/shaders/CopyFiles.bat
+++ b/extern/CMP_Core/shaders/CopyFiles.bat
@ -12,36 +12,55 @@ echo %mypath:~0,-1%
 IF NOT EXIST "%outpath%"\Plugins mkdir %BUILD_OUTDIR%Plugins
 IF NOT EXIST "%outpath%"\Plugins\Compute mkdir %BUILD_OUTDIR%Plugins\Compute

-REM Build Vulkan Shader Binary
-REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1.comp -o %BUILD_OUTDIR%\Plugins\Compute\BC1.spv
+REM ToDo: Build Vulkan based shaders
+REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1... -o %BUILD_OUTDIR%\Plugins\Compute\BC1....spv
 REM IF %ERRORLEVEL% GTR 0 exit 123

-REM Enabled in v4.0
+REM Remove any OpenCL compiled Binaries
 REM 
-REM del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_Kernel.cpp.cmp
-REM del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_Kernel.cpp.cmp
-REM del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_Kernel.cpp.cmp
-REM del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_Kernel.cpp.cmp
-REM del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_Kernel.cpp.cmp
-REM del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_Kernel.cpp.cmp
-REM del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_kernel.cpp.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_kernel.hlsl.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_kernel.cpp.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_kernel.hlsl.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_kernel.cpp.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_kernel.hlsl.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_kernel.cpp.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_kernel.hlsl.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_kernel.cpp.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_kernel.hlsl.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.cpp.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.0.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.1.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.0.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.1.cmp
+del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.2.cmp

-XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h"        %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
-XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h"            %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.hlsl"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.hlsl"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.hlsl"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.hlsl"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.hlsl"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.hlsl"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.hlsl"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\

 echo "Dependencies copied done"

--- a/extern/CMP_Core/source/CMP_Core.h
+++ b/extern/CMP_Core/source/CMP_Core.h
@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright (c) 2019   Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2020   Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -19,7 +19,7 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 //
-/// \file CMP_Core.h
+/// \file CMP_Core.h  CPU User Interface
 //
 //=====================================================================

--- a/extern/CMP_Core/source/cmp_math_func.h
+++ b/extern/CMP_Core/source/cmp_math_func.h
@ -0,0 +1,143 @@
+//=====================================================================
+// Copyright 2020 (c), Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef CMP_MATH_FUNC_H
+#define CMP_MATH_FUNC_H
+
+
+#include "Common_Def.h"
+
+#ifndef ASPM_GPU
+
+//============================================================================
+// Core API which have have GPU equivalents, defined here for HPC_CPU usage
+//============================================================================
+
+#include <algorithm>
+using namespace std;
+
+static CGU_INT QSortFCmp(const void *Elem1, const void *Elem2) {
+  CGU_INT ret = 0;
+
+  if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 < 0.)
+    ret = -1;
+  else if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 > 0.)
+    ret = 1;
+  return ret;
+}
+
+static int QSortIntCmp(const void *Elem1, const void *Elem2) 
+{
+  return (*(CGU_INT32 *)Elem1 - *(CGU_INT32 *)Elem2);
+}
+
+static CGU_FLOAT  dot(CMP_IN CGU_Vec3f Color,CMP_IN CGU_Vec3f Color2)
+{
+    CGU_FLOAT  ColorDot;
+    ColorDot = (Color.x * Color2.x) + (Color.y * Color2.y) + (Color.z * Color2.z);
+    return ColorDot;
+}
+
+static CGU_FLOAT  dot(CMP_IN CGU_Vec2f Color,CMP_IN CGU_Vec2f Color2)
+{
+    CGU_FLOAT  ColorDot;
+    ColorDot = Color.x * Color2.x + Color.y * Color2.y;
+    return ColorDot;
+}
+
+static CGU_Vec2f abs(CMP_IN CGU_Vec2f Color)
+{
+    CGU_Vec2f  ColorAbs;
+    ColorAbs.x = std::abs(Color.x);
+    ColorAbs.y = std::abs(Color.y);
+    return ColorAbs;
+}
+
+static CGU_Vec3f fabs(CMP_IN CGU_Vec3f Color)
+{
+    CGU_Vec3f  ColorAbs;
+    ColorAbs.x = std::abs(Color.x);
+    ColorAbs.y = std::abs(Color.y);
+    ColorAbs.z = std::abs(Color.z);
+    return ColorAbs;
+}
+
+static CGU_Vec3f round(CMP_IN CGU_Vec3f Color)
+{
+    CGU_Vec3f ColorRound;
+    ColorRound.x = std::round(Color.x);
+    ColorRound.y = std::round(Color.y);
+    ColorRound.z = std::round(Color.z);
+    return ColorRound;
+}
+
+static CGU_Vec2f round(CMP_IN CGU_Vec2f Color)
+{
+    CGU_Vec2f ColorRound;
+    ColorRound.x = std::round(Color.x);
+    ColorRound.y = std::round(Color.y);
+    return ColorRound;
+}
+
+static CGU_Vec3f ceil(CMP_IN CGU_Vec3f Color)
+{
+    CGU_Vec3f ColorCeil;
+    ColorCeil.x = std::ceil(Color.x);
+    ColorCeil.y = std::ceil(Color.y);
+    ColorCeil.z = std::ceil(Color.z);
+    return ColorCeil;
+}
+
+static CGU_Vec3f floor(CMP_IN CGU_Vec3f Color)
+{
+    CGU_Vec3f Colorfloor;
+    Colorfloor.x = std::floor(Color.x);
+    Colorfloor.y = std::floor(Color.y);
+    Colorfloor.z = std::floor(Color.z);
+    return Colorfloor;
+}
+
+static CGU_Vec3f saturate(CGU_Vec3f value)
+{
+    if (value.x > 1.0f) value.x = 1.0f;
+    else
+    if (value.x < 0.0f) value.x = 0.0f;
+
+    if (value.y > 1.0f) value.y = 1.0f;
+    else
+    if (value.y < 0.0f) value.y = 0.0f;
+
+    if (value.z > 1.0f) value.z = 1.0f;
+    else
+    if (value.z < 0.0f) value.z = 0.0f;
+
+    return value;
+}
+
+#endif
+
+//============================================================================
+// Core API which are shared between GPU & CPU
+//============================================================================
+
+#endif // Header Guard
+
--- a/extern/CMP_Core/source/cmp_math_vec4.h
+++ b/extern/CMP_Core/source/cmp_math_vec4.h
@ -30,14 +30,16 @@
 #if defined (_LINUX) || defined (_WIN32)

 //============================================= VEC2 ==================================================
+template <class T> class vec3;
+
 template<class T>
 class Vec2
 {
 public:
-
    T x;
    T y;

+
    // *****************************************
    //     Constructors
    // *****************************************
@ -54,7 +56,6 @@ public:
    /// Single value constructor.  Sets all components to the given value
    Vec2(const T& v) : x(v), y(v) {};

-
    // *****************************************
    //     Conversions/Assignment/Indexing
    // *****************************************
@ -92,6 +93,13 @@ public:
    /// Subtraction
    const Vec2<T> operator-(const Vec2<T>& rhs) const { return Vec2<T>(x - rhs.x, y - rhs.y); };

+    /// Multiply
+    const Vec2<T> operator*(const Vec2<T>& rhs) const { return Vec2<T>(x * rhs.x, y * rhs.y); };
+
+    /// Divide
+    const Vec2<T> operator/(const Vec2<T>& rhs) const { return Vec2<T>(x / rhs.x, y / rhs.y); };
+
+
    /// Multiply by scalar
    const Vec2<T> operator*(const T& v) const { return Vec2<T>(x * v, y * v); };

@ -113,11 +121,12 @@ public:

 };

-typedef Vec2<float>  CMP_Vec2f;
-typedef Vec2<float>  CGU_Vec2f;
-typedef Vec2<float>  CGV_Vec2f;
-typedef Vec2<double> CMP_Vec2d;
-typedef Vec2<int>    CMP_Vec2i;
+typedef Vec2<float>           CMP_Vec2f;
+typedef Vec2<float>           CGU_Vec2f;
+typedef Vec2<float>           CGV_Vec2f;
+typedef Vec2<double>          CMP_Vec2d;
+typedef Vec2<int>             CMP_Vec2i;
+typedef Vec2<unsigned int>    CGU_Vec2ui;

 //}

@ -134,6 +143,7 @@ public:
    T y;
    T z;

+
    // *****************************************
    //     Constructors
    // *****************************************
@ -180,21 +190,24 @@ public:
    //    Arithmetic
    // *****************************************

-    /// Addition
+    /// Addition by vector
    const Vec3<T> operator+(const Vec3<T>& rhs) const { return Vec3<T>(x + rhs.x, y + rhs.y, z + rhs.z); };

-    /// Subtraction
+    /// Subtraction by vector
    const Vec3<T> operator-(const Vec3<T>& rhs) const { return Vec3<T>(x - rhs.x, y - rhs.y, z - rhs.z); };

+    /// Multiply by vector
+    const Vec3<T> operator*(const Vec3<T>& rhs) const { return Vec3<T>(x * rhs.x, y * rhs.y, z * rhs.z); };
+
+    /// Divide by vector
+    const Vec3<T> operator/(const Vec3<T>& rhs) const { return Vec3<T>(x / rhs.x, y / rhs.y, z / rhs.z); };
+
    /// Multiply by scalar
    const Vec3<T> operator*(const T& v) const { return Vec3<T>(x * v, y * v, z * v); };

    /// Divide by scalar
    const Vec3<T> operator/(const T& v) const { return Vec3<T>(x / v, y / v, z / v); };

-    /// Divide by vector
-    const Vec3<T> operator/(const Vec3<T>& rhs) const { return Vec3<T>(x / rhs.x, y / rhs.y, z / rhs.z); };
-
    /// Addition in-place
    Vec3<T>& operator+= (const Vec3<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; };

@ -208,6 +221,7 @@ public:
    Vec3<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; return *this; };
 };

+typedef Vec3<bool>              CGU_Vec3bool;
 typedef Vec3<float>             CGU_Vec3f;
 typedef Vec3<float>             CGV_Vec3f;
 typedef Vec3<unsigned char>     CGU_Vec3uc;
@ -217,6 +231,7 @@ typedef Vec3<float>             CMP_Vec3f;
 typedef Vec3<double>            CMP_Vec3d;
 typedef Vec3<int>               CMP_Vec3i;
 typedef Vec3<unsigned char>     CMP_Vec3uc;
+typedef Vec3<unsigned int>      CMP_Vec3ui;

 //============================================= VEC4 ==================================================
 template<class T>
@ -275,21 +290,24 @@ public:
    //    Arithmetic
    // *****************************************

-    /// Addition
+    /// Addition by vector
    const Vec4<T> operator+(const Vec4<T>& rhs) const { return Vec4<T>(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); };

-    /// Subtraction
+    /// Subtraction  by vector
    const Vec4<T> operator-(const Vec4<T>& rhs) const { return Vec4<T>(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); };

+    /// Multiply  by vector
+    const Vec4<T> operator*(const Vec4<T>& rhs) const { return Vec4<T>(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w); };
+
+    /// Divide by vector
+    const Vec4<T> operator/(const Vec4<T>& rhs) const { return Vec4<T>(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); };
+
    /// Multiply by scalar
    const Vec4<T> operator*(const T& v) const { return Vec4<T>(x * v, y * v, z * v, w * v); };

    /// Divide by scalar
    const Vec4<T> operator/(const T& v) const { return Vec4<T>(x / v, y / v, z / v, w / v); };

-    /// Divide by vector
-    const Vec4<T> operator/(const Vec4<T>& rhs) const { return Vec4<T>(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); };
-
    /// Addition in-place
    Vec4<T>& operator+= (const Vec4<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; };

--- a/extern/CMP_Core/test/BlockConstants.h
+++ b/extern/CMP_Core/test/BlockConstants.h
@ -3,6 +3,7 @@
 #include <string>
 #include <unordered_map>
 struct Block { const unsigned char* data; const unsigned char* color; };
+struct BlockBC6 { const unsigned char* data; const float* color; };

 static const unsigned char BC1_Red_Ignore_Alpha [] {0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
 static const unsigned char BC1_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
@ -76,6 +77,102 @@ static const unsigned char BC3_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0
 static const unsigned char BC3_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
 static const unsigned char BC3_Red_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
 static const unsigned char BC3_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Red_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_White_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Black_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_Red_Blue_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Red_Green_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Green_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_Red_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_White_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Green_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_White_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Black_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_Red_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Red_Green_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Green_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC4_Red_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC4_Green_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Red_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_White_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Black_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Red_Blue_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Red_Green_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Green_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Red_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_White_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Green_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_White_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Black_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Red_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Red_Green_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Green_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC5_Red_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24};
+static const unsigned char BC5_Green_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Ignore_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Blue_Half_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_White_Half_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Black_Half_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Blue_Half_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Green_Half_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Green_Blue_Half_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Full_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Green_Full_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Blue_Full_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_White_Full_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Green_Ignore_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Black_Full_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Blue_Full_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Green_Full_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Green_Blue_Full_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Blue_Ignore_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_White_Ignore_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Black_Ignore_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Blue_Ignore_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Green_Ignore_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Green_Blue_Ignore_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Red_Half_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC6_Green_Half_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Ignore_Alpha [] {0x10, 0xff, 0x3 , 0x0 , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Blue_Half_Alpha [] {0x20, 0x0 , 0x0 , 0x0 , 0xf0, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_White_Half_Alpha [] {0x20, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Black_Half_Alpha [] {0x20, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Blue_Half_Alpha [] {0x20, 0xff, 0x3f, 0x0 , 0xf0, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Green_Half_Alpha [] {0x20, 0xff, 0xff, 0xff, 0xf , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Green_Blue_Half_Alpha [] {0x20, 0x0 , 0xc0, 0xff, 0xff, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Full_Alpha [] {0x10, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Green_Full_Alpha [] {0x10, 0x0 , 0xfc, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Blue_Full_Alpha [] {0x10, 0x0 , 0x0 , 0xf0, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_White_Full_Alpha [] {0x10, 0xff, 0xff, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Green_Ignore_Alpha [] {0x10, 0x0 , 0xfc, 0xf , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Black_Full_Alpha [] {0x10, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Blue_Full_Alpha [] {0x10, 0xff, 0x3 , 0xf0, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Green_Full_Alpha [] {0x10, 0xff, 0xff, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Green_Blue_Full_Alpha [] {0x10, 0x0 , 0xfc, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Blue_Ignore_Alpha [] {0x10, 0x0 , 0x0 , 0xf0, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_White_Ignore_Alpha [] {0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Black_Ignore_Alpha [] {0x10, 0x0 , 0x0 , 0x0 , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Blue_Ignore_Alpha [] {0x10, 0xff, 0x3 , 0xf0, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Green_Ignore_Alpha [] {0x10, 0xff, 0xff, 0xf , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Green_Blue_Ignore_Alpha [] {0x10, 0x0 , 0xfc, 0xff, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Red_Half_Alpha [] {0x20, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC7_Green_Half_Alpha [] {0x20, 0x0 , 0xc0, 0xff, 0xf , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };

 Block BC1_Red_Ignore_Alpha_Block = {BC1_Red_Ignore_Alpha, nullptr};
 Block BC1_Blue_Half_Alpha_Block = {BC1_Blue_Half_Alpha, nullptr};
@ -149,6 +246,102 @@ Block BC3_Red_Green_Ignore_Alpha_Block = {BC3_Red_Green_Ignore_Alpha, nullptr};
 Block BC3_Green_Blue_Ignore_Alpha_Block = {BC3_Green_Blue_Ignore_Alpha, nullptr};
 Block BC3_Red_Half_Alpha_Block = {BC3_Red_Half_Alpha, nullptr};
 Block BC3_Green_Half_Alpha_Block = {BC3_Green_Half_Alpha, nullptr};
+Block BC4_Red_Ignore_Alpha_Block = {BC4_Red_Ignore_Alpha, nullptr};
+Block BC4_Blue_Half_Alpha_Block = {BC4_Blue_Half_Alpha, nullptr};
+Block BC4_White_Half_Alpha_Block = {BC4_White_Half_Alpha, nullptr};
+Block BC4_Black_Half_Alpha_Block = {BC4_Black_Half_Alpha, nullptr};
+Block BC4_Red_Blue_Half_Alpha_Block = {BC4_Red_Blue_Half_Alpha, nullptr};
+Block BC4_Red_Green_Half_Alpha_Block = {BC4_Red_Green_Half_Alpha, nullptr};
+Block BC4_Green_Blue_Half_Alpha_Block = {BC4_Green_Blue_Half_Alpha, nullptr};
+Block BC4_Red_Full_Alpha_Block = {BC4_Red_Full_Alpha, nullptr};
+Block BC4_Green_Full_Alpha_Block = {BC4_Green_Full_Alpha, nullptr};
+Block BC4_Blue_Full_Alpha_Block = {BC4_Blue_Full_Alpha, nullptr};
+Block BC4_White_Full_Alpha_Block = {BC4_White_Full_Alpha, nullptr};
+Block BC4_Green_Ignore_Alpha_Block = {BC4_Green_Ignore_Alpha, nullptr};
+Block BC4_Black_Full_Alpha_Block = {BC4_Black_Full_Alpha, nullptr};
+Block BC4_Red_Blue_Full_Alpha_Block = {BC4_Red_Blue_Full_Alpha, nullptr};
+Block BC4_Red_Green_Full_Alpha_Block = {BC4_Red_Green_Full_Alpha, nullptr};
+Block BC4_Green_Blue_Full_Alpha_Block = {BC4_Green_Blue_Full_Alpha, nullptr};
+Block BC4_Blue_Ignore_Alpha_Block = {BC4_Blue_Ignore_Alpha, nullptr};
+Block BC4_White_Ignore_Alpha_Block = {BC4_White_Ignore_Alpha, nullptr};
+Block BC4_Black_Ignore_Alpha_Block = {BC4_Black_Ignore_Alpha, nullptr};
+Block BC4_Red_Blue_Ignore_Alpha_Block = {BC4_Red_Blue_Ignore_Alpha, nullptr};
+Block BC4_Red_Green_Ignore_Alpha_Block = {BC4_Red_Green_Ignore_Alpha, nullptr};
+Block BC4_Green_Blue_Ignore_Alpha_Block = {BC4_Green_Blue_Ignore_Alpha, nullptr};
+Block BC4_Red_Half_Alpha_Block = {BC4_Red_Half_Alpha, nullptr};
+Block BC4_Green_Half_Alpha_Block = {BC4_Green_Half_Alpha, nullptr};
+Block BC5_Red_Ignore_Alpha_Block = {BC5_Red_Ignore_Alpha, nullptr};
+Block BC5_Blue_Half_Alpha_Block = {BC5_Blue_Half_Alpha, nullptr};
+Block BC5_White_Half_Alpha_Block = {BC5_White_Half_Alpha, nullptr};
+Block BC5_Black_Half_Alpha_Block = {BC5_Black_Half_Alpha, nullptr};
+Block BC5_Red_Blue_Half_Alpha_Block = {BC5_Red_Blue_Half_Alpha, nullptr};
+Block BC5_Red_Green_Half_Alpha_Block = {BC5_Red_Green_Half_Alpha, nullptr};
+Block BC5_Green_Blue_Half_Alpha_Block = {BC5_Green_Blue_Half_Alpha, nullptr};
+Block BC5_Red_Full_Alpha_Block = {BC5_Red_Full_Alpha, nullptr};
+Block BC5_Green_Full_Alpha_Block = {BC5_Green_Full_Alpha, nullptr};
+Block BC5_Blue_Full_Alpha_Block = {BC5_Blue_Full_Alpha, nullptr};
+Block BC5_White_Full_Alpha_Block = {BC5_White_Full_Alpha, nullptr};
+Block BC5_Green_Ignore_Alpha_Block = {BC5_Green_Ignore_Alpha, nullptr};
+Block BC5_Black_Full_Alpha_Block = {BC5_Black_Full_Alpha, nullptr};
+Block BC5_Red_Blue_Full_Alpha_Block = {BC5_Red_Blue_Full_Alpha, nullptr};
+Block BC5_Red_Green_Full_Alpha_Block = {BC5_Red_Green_Full_Alpha, nullptr};
+Block BC5_Green_Blue_Full_Alpha_Block = {BC5_Green_Blue_Full_Alpha, nullptr};
+Block BC5_Blue_Ignore_Alpha_Block = {BC5_Blue_Ignore_Alpha, nullptr};
+Block BC5_White_Ignore_Alpha_Block = {BC5_White_Ignore_Alpha, nullptr};
+Block BC5_Black_Ignore_Alpha_Block = {BC5_Black_Ignore_Alpha, nullptr};
+Block BC5_Red_Blue_Ignore_Alpha_Block = {BC5_Red_Blue_Ignore_Alpha, nullptr};
+Block BC5_Red_Green_Ignore_Alpha_Block = {BC5_Red_Green_Ignore_Alpha, nullptr};
+Block BC5_Green_Blue_Ignore_Alpha_Block = {BC5_Green_Blue_Ignore_Alpha, nullptr};
+Block BC5_Red_Half_Alpha_Block = {BC5_Red_Half_Alpha, nullptr};
+Block BC5_Green_Half_Alpha_Block = {BC5_Green_Half_Alpha, nullptr};
+BlockBC6 BC6_Red_Ignore_Alpha_Block = {BC6_Red_Ignore_Alpha, nullptr};
+BlockBC6 BC6_Blue_Half_Alpha_Block = {BC6_Blue_Half_Alpha, nullptr};
+BlockBC6 BC6_White_Half_Alpha_Block = {BC6_White_Half_Alpha, nullptr};
+BlockBC6 BC6_Black_Half_Alpha_Block = {BC6_Black_Half_Alpha, nullptr};
+BlockBC6 BC6_Red_Blue_Half_Alpha_Block = {BC6_Red_Blue_Half_Alpha, nullptr};
+BlockBC6 BC6_Red_Green_Half_Alpha_Block = {BC6_Red_Green_Half_Alpha, nullptr};
+BlockBC6 BC6_Green_Blue_Half_Alpha_Block = {BC6_Green_Blue_Half_Alpha, nullptr};
+BlockBC6 BC6_Red_Full_Alpha_Block = {BC6_Red_Full_Alpha, nullptr};
+BlockBC6 BC6_Green_Full_Alpha_Block = {BC6_Green_Full_Alpha, nullptr};
+BlockBC6 BC6_Blue_Full_Alpha_Block = {BC6_Blue_Full_Alpha, nullptr};
+BlockBC6 BC6_White_Full_Alpha_Block = {BC6_White_Full_Alpha, nullptr};
+BlockBC6 BC6_Green_Ignore_Alpha_Block = {BC6_Green_Ignore_Alpha, nullptr};
+BlockBC6 BC6_Black_Full_Alpha_Block = {BC6_Black_Full_Alpha, nullptr};
+BlockBC6 BC6_Red_Blue_Full_Alpha_Block = {BC6_Red_Blue_Full_Alpha, nullptr};
+BlockBC6 BC6_Red_Green_Full_Alpha_Block = {BC6_Red_Green_Full_Alpha, nullptr};
+BlockBC6 BC6_Green_Blue_Full_Alpha_Block = {BC6_Green_Blue_Full_Alpha, nullptr};
+BlockBC6 BC6_Blue_Ignore_Alpha_Block = {BC6_Blue_Ignore_Alpha, nullptr};
+BlockBC6 BC6_White_Ignore_Alpha_Block = {BC6_White_Ignore_Alpha, nullptr};
+BlockBC6 BC6_Black_Ignore_Alpha_Block = {BC6_Black_Ignore_Alpha, nullptr};
+BlockBC6 BC6_Red_Blue_Ignore_Alpha_Block = {BC6_Red_Blue_Ignore_Alpha, nullptr};
+BlockBC6 BC6_Red_Green_Ignore_Alpha_Block = {BC6_Red_Green_Ignore_Alpha, nullptr};
+BlockBC6 BC6_Green_Blue_Ignore_Alpha_Block = {BC6_Green_Blue_Ignore_Alpha, nullptr};
+BlockBC6 BC6_Red_Half_Alpha_Block = {BC6_Red_Half_Alpha, nullptr};
+BlockBC6 BC6_Green_Half_Alpha_Block = {BC6_Green_Half_Alpha, nullptr};
+Block BC7_Red_Ignore_Alpha_Block = {BC7_Red_Ignore_Alpha, nullptr};
+Block BC7_Blue_Half_Alpha_Block = {BC7_Blue_Half_Alpha, nullptr};
+Block BC7_White_Half_Alpha_Block = {BC7_White_Half_Alpha, nullptr};
+Block BC7_Black_Half_Alpha_Block = {BC7_Black_Half_Alpha, nullptr};
+Block BC7_Red_Blue_Half_Alpha_Block = {BC7_Red_Blue_Half_Alpha, nullptr};
+Block BC7_Red_Green_Half_Alpha_Block = {BC7_Red_Green_Half_Alpha, nullptr};
+Block BC7_Green_Blue_Half_Alpha_Block = {BC7_Green_Blue_Half_Alpha, nullptr};
+Block BC7_Red_Full_Alpha_Block = {BC7_Red_Full_Alpha, nullptr};
+Block BC7_Green_Full_Alpha_Block = {BC7_Green_Full_Alpha, nullptr};
+Block BC7_Blue_Full_Alpha_Block = {BC7_Blue_Full_Alpha, nullptr};
+Block BC7_White_Full_Alpha_Block = {BC7_White_Full_Alpha, nullptr};
+Block BC7_Green_Ignore_Alpha_Block = {BC7_Green_Ignore_Alpha, nullptr};
+Block BC7_Black_Full_Alpha_Block = {BC7_Black_Full_Alpha, nullptr};
+Block BC7_Red_Blue_Full_Alpha_Block = {BC7_Red_Blue_Full_Alpha, nullptr};
+Block BC7_Red_Green_Full_Alpha_Block = {BC7_Red_Green_Full_Alpha, nullptr};
+Block BC7_Green_Blue_Full_Alpha_Block = {BC7_Green_Blue_Full_Alpha, nullptr};
+Block BC7_Blue_Ignore_Alpha_Block = {BC7_Blue_Ignore_Alpha, nullptr};
+Block BC7_White_Ignore_Alpha_Block = {BC7_White_Ignore_Alpha, nullptr};
+Block BC7_Black_Ignore_Alpha_Block = {BC7_Black_Ignore_Alpha, nullptr};
+Block BC7_Red_Blue_Ignore_Alpha_Block = {BC7_Red_Blue_Ignore_Alpha, nullptr};
+Block BC7_Red_Green_Ignore_Alpha_Block = {BC7_Red_Green_Ignore_Alpha, nullptr};
+Block BC7_Green_Blue_Ignore_Alpha_Block = {BC7_Green_Blue_Ignore_Alpha, nullptr};
+Block BC7_Red_Half_Alpha_Block = {BC7_Red_Half_Alpha, nullptr};
+Block BC7_Green_Half_Alpha_Block = {BC7_Green_Half_Alpha, nullptr};

 static std::unordered_map<std::string, Block> blocks {
 	{ "BC1_Red_Ignore_Alpha", BC1_Red_Ignore_Alpha_Block},
@ -222,7 +415,106 @@ static std::unordered_map<std::string, Block> blocks {
 	{ "BC3_Red_Green_Ignore_Alpha", BC3_Red_Green_Ignore_Alpha_Block},
 	{ "BC3_Green_Blue_Ignore_Alpha", BC3_Green_Blue_Ignore_Alpha_Block},
 	{ "BC3_Red_Half_Alpha", BC3_Red_Half_Alpha_Block},
-	{ "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block}
+	{ "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block},
+	{ "BC4_Red_Ignore_Alpha", BC4_Red_Ignore_Alpha_Block},
+	{ "BC4_Blue_Half_Alpha", BC4_Blue_Half_Alpha_Block},
+	{ "BC4_White_Half_Alpha", BC4_White_Half_Alpha_Block},
+	{ "BC4_Black_Half_Alpha", BC4_Black_Half_Alpha_Block},
+	{ "BC4_Red_Blue_Half_Alpha", BC4_Red_Blue_Half_Alpha_Block},
+	{ "BC4_Red_Green_Half_Alpha", BC4_Red_Green_Half_Alpha_Block},
+	{ "BC4_Green_Blue_Half_Alpha", BC4_Green_Blue_Half_Alpha_Block},
+	{ "BC4_Red_Full_Alpha", BC4_Red_Full_Alpha_Block},
+	{ "BC4_Green_Full_Alpha", BC4_Green_Full_Alpha_Block},
+	{ "BC4_Blue_Full_Alpha", BC4_Blue_Full_Alpha_Block},
+	{ "BC4_White_Full_Alpha", BC4_White_Full_Alpha_Block},
+	{ "BC4_Green_Ignore_Alpha", BC4_Green_Ignore_Alpha_Block},
+	{ "BC4_Black_Full_Alpha", BC4_Black_Full_Alpha_Block},
+	{ "BC4_Red_Blue_Full_Alpha", BC4_Red_Blue_Full_Alpha_Block},
+	{ "BC4_Red_Green_Full_Alpha", BC4_Red_Green_Full_Alpha_Block},
+	{ "BC4_Green_Blue_Full_Alpha", BC4_Green_Blue_Full_Alpha_Block},
+	{ "BC4_Blue_Ignore_Alpha", BC4_Blue_Ignore_Alpha_Block},
+	{ "BC4_White_Ignore_Alpha", BC4_White_Ignore_Alpha_Block},
+	{ "BC4_Black_Ignore_Alpha", BC4_Black_Ignore_Alpha_Block},
+	{ "BC4_Red_Blue_Ignore_Alpha", BC4_Red_Blue_Ignore_Alpha_Block},
+	{ "BC4_Red_Green_Ignore_Alpha", BC4_Red_Green_Ignore_Alpha_Block},
+	{ "BC4_Green_Blue_Ignore_Alpha", BC4_Green_Blue_Ignore_Alpha_Block},
+	{ "BC4_Red_Half_Alpha", BC4_Red_Half_Alpha_Block},
+	{ "BC4_Green_Half_Alpha", BC4_Green_Half_Alpha_Block},
+	{ "BC5_Red_Ignore_Alpha", BC5_Red_Ignore_Alpha_Block},
+	{ "BC5_Blue_Half_Alpha", BC5_Blue_Half_Alpha_Block},
+	{ "BC5_White_Half_Alpha", BC5_White_Half_Alpha_Block},
+	{ "BC5_Black_Half_Alpha", BC5_Black_Half_Alpha_Block},
+	{ "BC5_Red_Blue_Half_Alpha", BC5_Red_Blue_Half_Alpha_Block},
+	{ "BC5_Red_Green_Half_Alpha", BC5_Red_Green_Half_Alpha_Block},
+	{ "BC5_Green_Blue_Half_Alpha", BC5_Green_Blue_Half_Alpha_Block},
+	{ "BC5_Red_Full_Alpha", BC5_Red_Full_Alpha_Block},
+	{ "BC5_Green_Full_Alpha", BC5_Green_Full_Alpha_Block},
+	{ "BC5_Blue_Full_Alpha", BC5_Blue_Full_Alpha_Block},
+	{ "BC5_White_Full_Alpha", BC5_White_Full_Alpha_Block},
+	{ "BC5_Green_Ignore_Alpha", BC5_Green_Ignore_Alpha_Block},
+	{ "BC5_Black_Full_Alpha", BC5_Black_Full_Alpha_Block},
+	{ "BC5_Red_Blue_Full_Alpha", BC5_Red_Blue_Full_Alpha_Block},
+	{ "BC5_Red_Green_Full_Alpha", BC5_Red_Green_Full_Alpha_Block},
+	{ "BC5_Green_Blue_Full_Alpha", BC5_Green_Blue_Full_Alpha_Block},
+	{ "BC5_Blue_Ignore_Alpha", BC5_Blue_Ignore_Alpha_Block},
+	{ "BC5_White_Ignore_Alpha", BC5_White_Ignore_Alpha_Block},
+	{ "BC5_Black_Ignore_Alpha", BC5_Black_Ignore_Alpha_Block},
+	{ "BC5_Red_Blue_Ignore_Alpha", BC5_Red_Blue_Ignore_Alpha_Block},
+	{ "BC5_Red_Green_Ignore_Alpha", BC5_Red_Green_Ignore_Alpha_Block},
+	{ "BC5_Green_Blue_Ignore_Alpha", BC5_Green_Blue_Ignore_Alpha_Block},
+	{ "BC5_Red_Half_Alpha", BC5_Red_Half_Alpha_Block},
+	{ "BC5_Green_Half_Alpha", BC5_Green_Half_Alpha_Block},
+	{ "BC7_Red_Ignore_Alpha", BC7_Red_Ignore_Alpha_Block},
+	{ "BC7_Blue_Half_Alpha", BC7_Blue_Half_Alpha_Block},
+	{ "BC7_White_Half_Alpha", BC7_White_Half_Alpha_Block},
+	{ "BC7_Black_Half_Alpha", BC7_Black_Half_Alpha_Block},
+	{ "BC7_Red_Blue_Half_Alpha", BC7_Red_Blue_Half_Alpha_Block},
+	{ "BC7_Red_Green_Half_Alpha", BC7_Red_Green_Half_Alpha_Block},
+	{ "BC7_Green_Blue_Half_Alpha", BC7_Green_Blue_Half_Alpha_Block},
+	{ "BC7_Red_Full_Alpha", BC7_Red_Full_Alpha_Block},
+	{ "BC7_Green_Full_Alpha", BC7_Green_Full_Alpha_Block},
+	{ "BC7_Blue_Full_Alpha", BC7_Blue_Full_Alpha_Block},
+	{ "BC7_White_Full_Alpha", BC7_White_Full_Alpha_Block},
+	{ "BC7_Green_Ignore_Alpha", BC7_Green_Ignore_Alpha_Block},
+	{ "BC7_Black_Full_Alpha", BC7_Black_Full_Alpha_Block},
+	{ "BC7_Red_Blue_Full_Alpha", BC7_Red_Blue_Full_Alpha_Block},
+	{ "BC7_Red_Green_Full_Alpha", BC7_Red_Green_Full_Alpha_Block},
+	{ "BC7_Green_Blue_Full_Alpha", BC7_Green_Blue_Full_Alpha_Block},
+	{ "BC7_Blue_Ignore_Alpha", BC7_Blue_Ignore_Alpha_Block},
+	{ "BC7_White_Ignore_Alpha", BC7_White_Ignore_Alpha_Block},
+	{ "BC7_Black_Ignore_Alpha", BC7_Black_Ignore_Alpha_Block},
+	{ "BC7_Red_Blue_Ignore_Alpha", BC7_Red_Blue_Ignore_Alpha_Block},
+	{ "BC7_Red_Green_Ignore_Alpha", BC7_Red_Green_Ignore_Alpha_Block},
+	{ "BC7_Green_Blue_Ignore_Alpha", BC7_Green_Blue_Ignore_Alpha_Block},
+	{ "BC7_Red_Half_Alpha", BC7_Red_Half_Alpha_Block},
+	{ "BC7_Green_Half_Alpha", BC7_Green_Half_Alpha_Block}
+};
+
+static std::unordered_map<std::string, BlockBC6> blocksBC6 {
+	{ "BC6_Red_Ignore_Alpha", BC6_Red_Ignore_Alpha_Block},
+	{ "BC6_Blue_Half_Alpha", BC6_Blue_Half_Alpha_Block},
+	{ "BC6_White_Half_Alpha", BC6_White_Half_Alpha_Block},
+	{ "BC6_Black_Half_Alpha", BC6_Black_Half_Alpha_Block},
+	{ "BC6_Red_Blue_Half_Alpha", BC6_Red_Blue_Half_Alpha_Block},
+	{ "BC6_Red_Green_Half_Alpha", BC6_Red_Green_Half_Alpha_Block},
+	{ "BC6_Green_Blue_Half_Alpha", BC6_Green_Blue_Half_Alpha_Block},
+	{ "BC6_Red_Full_Alpha", BC6_Red_Full_Alpha_Block},
+	{ "BC6_Green_Full_Alpha", BC6_Green_Full_Alpha_Block},
+	{ "BC6_Blue_Full_Alpha", BC6_Blue_Full_Alpha_Block},
+	{ "BC6_White_Full_Alpha", BC6_White_Full_Alpha_Block},
+	{ "BC6_Green_Ignore_Alpha", BC6_Green_Ignore_Alpha_Block},
+	{ "BC6_Black_Full_Alpha", BC6_Black_Full_Alpha_Block},
+	{ "BC6_Red_Blue_Full_Alpha", BC6_Red_Blue_Full_Alpha_Block},
+	{ "BC6_Red_Green_Full_Alpha", BC6_Red_Green_Full_Alpha_Block},
+	{ "BC6_Green_Blue_Full_Alpha", BC6_Green_Blue_Full_Alpha_Block},
+	{ "BC6_Blue_Ignore_Alpha", BC6_Blue_Ignore_Alpha_Block},
+	{ "BC6_White_Ignore_Alpha", BC6_White_Ignore_Alpha_Block},
+	{ "BC6_Black_Ignore_Alpha", BC6_Black_Ignore_Alpha_Block},
+	{ "BC6_Red_Blue_Ignore_Alpha", BC6_Red_Blue_Ignore_Alpha_Block},
+	{ "BC6_Red_Green_Ignore_Alpha", BC6_Red_Green_Ignore_Alpha_Block},
+	{ "BC6_Green_Blue_Ignore_Alpha", BC6_Green_Blue_Ignore_Alpha_Block},
+	{ "BC6_Red_Half_Alpha", BC6_Red_Half_Alpha_Block},
+	{ "BC6_Green_Half_Alpha", BC6_Green_Half_Alpha_Block}
 };

 #endif
--- a/extern/CMP_Core/test/CMakeLists.txt
+++ b/extern/CMP_Core/test/CMakeLists.txt
@ -9,5 +9,7 @@ target_sources(Tests
                CompressonatorTests.cpp
                CompressonatorTests.h
                BlockConstants.h
+		../../Applications/_Plugins/Common/UtilFuncs.cpp
+		../../Applications/_Plugins/Common/UtilFuncs.h
                )
 target_link_libraries(Tests Catch2::Catch2 CMP_Core)
--- a/extern/CMP_Core/test/CompressonatorTests.cpp
+++ b/extern/CMP_Core/test/CompressonatorTests.cpp