Add external libs for comparisons and benchmarks.

2020-03-23 10:07:38 -07:00
parent 4a33d1ac75
commit 9a16bebf8f
67 changed files with 24230 additions and 1 deletions
--- a/extern/CMP_Core/CMP_Core.def
+++ b/extern/CMP_Core/CMP_Core.def
@ -0,0 +1,56 @@
+; Core def : Declares the module parameters for the DLL.
+
+EXPORTS
+CreateOptionsBC1
+CreateOptionsBC2
+CreateOptionsBC3
+CreateOptionsBC4
+CreateOptionsBC5
+CreateOptionsBC6
+CreateOptionsBC7
+
+DestroyOptionsBC1
+DestroyOptionsBC2
+DestroyOptionsBC3
+DestroyOptionsBC4
+DestroyOptionsBC5
+DestroyOptionsBC6
+DestroyOptionsBC7
+
+SetDecodeChannelMapping
+
+SetChannelWeightsBC1
+SetChannelWeightsBC2
+SetChannelWeightsBC3
+
+SetQualityBC1
+SetQualityBC2
+SetQualityBC3
+SetQualityBC4
+SetQualityBC5
+SetQualityBC6
+SetQualityBC7
+
+SetAlphaThresholdBC1
+
+SetMaskBC6
+SetMaskBC7
+
+SetErrorThresholdBC7
+SetAlphaOptionsBC7
+
+CompressBlockBC1
+CompressBlockBC2
+CompressBlockBC3
+CompressBlockBC4
+CompressBlockBC5
+CompressBlockBC6
+CompressBlockBC7
+
+DecompressBlockBC1
+DecompressBlockBC2
+DecompressBlockBC3
+DecompressBlockBC4
+DecompressBlockBC5
+DecompressBlockBC6
+DecompressBlockBC7
--- a/extern/CMP_Core/CMakeLists.txt
+++ b/extern/CMP_Core/CMakeLists.txt
@ -0,0 +1,33 @@
+cmake_minimum_required(VERSION 3.10)
+
+add_library(CMP_Core STATIC "")
+
+target_sources(CMP_Core
+               PRIVATE
+                   shaders/BC1_Encode_kernel.h
+                   shaders/BC1_Encode_kernel.cpp
+                   shaders/BC2_Encode_kernel.h
+                   shaders/BC2_Encode_kernel.cpp
+                   shaders/BC3_Encode_kernel.h
+                   shaders/BC3_Encode_kernel.cpp
+                   shaders/BC4_Encode_kernel.h
+                   shaders/BC4_Encode_kernel.cpp
+                   shaders/BC5_Encode_kernel.h
+                   shaders/BC5_Encode_kernel.cpp
+                   shaders/BC6_Encode_kernel.h
+                   shaders/BC6_Encode_kernel.cpp
+                   shaders/BC7_Encode_Kernel.h
+                   shaders/BC7_Encode_Kernel.cpp
+                   shaders/BCn_Common_Kernel.h
+                   shaders/Common_Def.h
+                   )
+
+target_include_directories(CMP_Core
+                           PRIVATE
+                           shaders
+                           source)
+#add_subdirectory(test)
+
+if (UNIX)
+target_compile_definitions(CMP_Core PRIVATE _LINUX ASPM_GPU)
+endif()
--- a/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp
@ -0,0 +1,582 @@
+//=====================================================================
+// Copyright (c) 2019    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC1_Encode_kernel.h"
+
+//============================================== BC1 INTERFACES  =======================================================
+void CompressBlockBC1_Fast(
+    CMP_Vec4uc  srcBlockTemp[16],
+    CMP_GLOBAL CGU_UINT32 compressedBlock[2])
+{
+    int i, k;
+
+    CMP_Vec3f rgb;
+    CMP_Vec3f average_rgb;                  // The centrepoint of the axis
+    CMP_Vec3f v_rgb;                        // The axis
+    CMP_Vec3f uniques[16];                  // The list of unique colours
+    int unique_pixels;                     // The number of unique pixels
+    CGU_FLOAT unique_recip;                    // Reciprocal of the above for fast multiplication
+    int index_map[16];                     // The map of source pixels to unique indices
+                                    
+    CGU_FLOAT pos_on_axis[16];                 // The distance each unique falls along the compression axis
+    CGU_FLOAT dist_from_axis[16];              // The distance each unique falls from the compression axis
+    CGU_FLOAT left = 0, right = 0, centre = 0; // The extremities and centre (average of left/right) of uniques along the compression axis
+    CGU_FLOAT axis_mapping_error = 0;          // The total computed error in mapping pixels to the axis
+
+    int swap;                              // Indicator if the RGB values need swapping to generate an opaque result
+
+    // -------------------------------------------------------------------------------------
+    // (3) Find the array of unique pixel values and sum them to find their average position
+    // -------------------------------------------------------------------------------------
+    {
+        // Find the array of unique pixel values and sum them to find their average position      
+        int current_pixel, firstdiff;
+        current_pixel = unique_pixels = 0;
+        average_rgb = 0.0f;
+        firstdiff = -1;
+        for (i = 0; i<16; i++)
+        {
+                for (k = 0; k<i; k++)
+                    if ((((srcBlockTemp[k].x ^ srcBlockTemp[i].x) & 0xf8) == 0) && (((srcBlockTemp[k].y ^ srcBlockTemp[i].y) & 0xfc) == 0) && (((srcBlockTemp[k].z ^ srcBlockTemp[i].z) & 0xf8) == 0))
+                        break;
+                index_map[i] = current_pixel++;
+                //pixel_count[i] = 1;
+                CMP_Vec3f trgb;
+                rgb.x = (CGU_FLOAT)((srcBlockTemp[i].x) & 0xff);
+                rgb.y = (CGU_FLOAT)((srcBlockTemp[i].y) & 0xff);
+                rgb.z = (CGU_FLOAT)((srcBlockTemp[i].z) & 0xff);
+
+                trgb.x = CS_RED(rgb.x, rgb.y, rgb.z);
+                trgb.y = CS_GREEN(rgb.x, rgb.y, rgb.z);
+                trgb.z = CS_BLUE(rgb.x, rgb.y, rgb.z);
+                uniques[i] = trgb;
+
+                if (k == i)
+                {
+                    unique_pixels++;
+                    if ((i != 0) && (firstdiff < 0)) firstdiff = i;
+                }
+                average_rgb = average_rgb + trgb;
+        }
+
+        unique_pixels = 16;
+        // Compute average of the uniques
+        unique_recip = 1.0f / (CGU_FLOAT)unique_pixels;
+        average_rgb = average_rgb * unique_recip;
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (4) For each component, reflect points about the average so all lie on the same side
+    // of the average, and compute the new average - this gives a second point that defines the axis
+    // To compute the sign of the axis sum the positive differences of G for each of R and B (the
+    // G axis is always positive in this implementation
+    // -------------------------------------------------------------------------------------
+    // An interesting situation occurs if the G axis contains no information, in which case the RB
+    // axis is also compared. I am not entirely sure if this is the correct implementation - should
+    // the priority axis be determined by magnitude?
+    {
+
+        CGU_FLOAT rg_pos, bg_pos, rb_pos;
+        v_rgb = 0.0f;
+        rg_pos = bg_pos = rb_pos = 0;
+
+        for (i = 0; i < unique_pixels; i++)
+        {
+            rgb = uniques[i] - average_rgb;
+
+#ifndef ASPM_GPU
+            v_rgb.x += (CGU_FLOAT)fabs(rgb.x);
+            v_rgb.y += (CGU_FLOAT)fabs(rgb.y);
+            v_rgb.z += (CGU_FLOAT)fabs(rgb.z);
+#else
+            v_rgb = v_rgb + fabs(rgb);
+#endif
+
+            if (rgb.x > 0) { rg_pos += rgb.y; rb_pos += rgb.z; }
+            if (rgb.z > 0) bg_pos += rgb.y;
+        }
+        v_rgb = v_rgb*unique_recip;
+        if (rg_pos < 0) v_rgb.x = -v_rgb.x;
+        if (bg_pos < 0) v_rgb.z = -v_rgb.z;
+        if ((rg_pos == bg_pos) && (rg_pos == 0))
+            if (rb_pos < 0) v_rgb.z = -v_rgb.z;
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (5) Axis projection and remapping
+    // -------------------------------------------------------------------------------------
+    {
+        CGU_FLOAT v2_recip;
+        // Normalise the axis for simplicity of future calculation
+        v2_recip = (v_rgb.x*v_rgb.x + v_rgb.y*v_rgb.y + v_rgb.z*v_rgb.z);
+        if (v2_recip > 0)
+            v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip);
+        else
+            v2_recip = 1.0f;
+        v_rgb = v_rgb*v2_recip;
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (6) Map the axis
+    // -------------------------------------------------------------------------------------
+    // the line joining (and extended on either side of) average and axis
+    // defines the axis onto which the points will be projected
+    // Project all the points onto the axis, calculate the distance along
+    // the axis from the centre of the axis (average)
+    // From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is
+    //                            P + ((R-P).v) / (v.v))v
+    // The distance along v is therefore (R-P).v / (v.v)
+    // (v.v) is 1 if v is a unit vector.
+    //
+    // Calculate the extremities at the same time - these need to be reasonably accurately
+    // represented in all cases
+    //
+    // In this first calculation, also find the error of mapping the points to the axis - this
+    // is our major indicator of whether or not the block has compressed well - if the points
+    // map well onto the axis then most of the noise introduced is high-frequency noise
+    {
+        left = 10000.0f;
+        right = -10000.0f;
+        axis_mapping_error = 0;
+        for (i = 0; i < unique_pixels; i++)
+        {
+            // Compute the distance along the axis of the point of closest approach
+            CMP_Vec3f temp = (uniques[i] - average_rgb);
+            pos_on_axis[i] = (temp.x * v_rgb.x) + (temp.y * v_rgb.y) + (temp.z * v_rgb.z);
+
+            // Compute the actual point and thence the mapping error
+            rgb = uniques[i] - (average_rgb + (v_rgb * pos_on_axis[i]));
+            dist_from_axis[i] = rgb.x*rgb.x + rgb.y*rgb.y + rgb.z*rgb.z;
+            axis_mapping_error += dist_from_axis[i];
+
+            // Work out the extremities
+            if (pos_on_axis[i] < left)
+                left = pos_on_axis[i];
+            if (pos_on_axis[i] > right)
+                right = pos_on_axis[i];
+        }
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (7) Now we have a good axis and the basic information about how the points are mapped
+    // to it
+    // Our initial guess is to represent the endpoints accurately, by moving the average
+    // to the centre and recalculating the point positions along the line
+    // -------------------------------------------------------------------------------------
+    {
+        centre = (left + right) / 2;
+        average_rgb = average_rgb + (v_rgb*centre);
+        for (i = 0; i<unique_pixels; i++)
+            pos_on_axis[i] -= centre;
+        right -= centre;
+        left -= centre;
+
+        // Accumulate our final resultant error
+        axis_mapping_error *= unique_recip * (1 / 255.0f);
+
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (8) Calculate the high and low output colour values
+    // Involved in this is a rounding procedure which is undoubtedly slightly twitchy. A
+    // straight rounded average is not correct, as the decompressor 'unrounds' by replicating
+    // the top bits to the bottom.
+    // In order to take account of this process, we don't just apply a straight rounding correction,
+    // but base our rounding on the input value (a straight rounding is actually pretty good in terms of
+    // error measure, but creates a visual colour and/or brightness shift relative to the original image)
+    // The method used here is to apply a centre-biased rounding dependent on the input value, which was
+    // (mostly by experiment) found to give minimum MSE while preserving the visual characteristics of
+    // the image.
+    // rgb = (average_rgb + (left|right)*v_rgb);
+    // -------------------------------------------------------------------------------------
+    {
+        CGU_UINT32 c0, c1, t;
+        int rd, gd, bd;
+        rgb = (average_rgb + (v_rgb * left));
+        rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
+        gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
+        bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
+        ROUND_AND_CLAMP(rd, 5);
+        ROUND_AND_CLAMP(gd, 6);
+        ROUND_AND_CLAMP(bd, 5);
+        c0 = ((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3);
+
+        rgb = average_rgb + (v_rgb * right);
+        rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
+        gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
+        bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
+        ROUND_AND_CLAMP(rd, 5);
+        ROUND_AND_CLAMP(gd, 6);
+        ROUND_AND_CLAMP(bd, 5);
+        c1 = (((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3));
+
+        // Force to be a 4-colour opaque block - in which case, c0 is greater than c1
+        // blocktype == 4
+        {
+            if (c0 < c1)
+            {
+                t = c0;
+                c0 = c1;
+                c1 = t;
+                swap = 1;
+            }
+            else if (c0 == c1)
+            {
+                // This block will always be encoded in 3-colour mode
+                // Need to ensure that only one of the two points gets used,
+                // avoiding accidentally setting some transparent pixels into the block
+                for (i = 0; i<unique_pixels; i++)
+                    pos_on_axis[i] = left;
+                swap = 0;
+            }
+            else
+                swap = 0;
+        }
+
+        compressedBlock[0] = c0 | (c1 << 16);
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (9) Final clustering, creating the 2-bit values that define the output
+    // -------------------------------------------------------------------------------------
+    {
+        CGU_UINT32 bit;
+        CGU_FLOAT division;
+        CGU_FLOAT cluster_x[4];
+        CGU_FLOAT cluster_y[4];
+        int cluster_count[4];
+
+        // (blocktype == 4)
+        {
+            compressedBlock[1] = 0;
+            division = right*2.0f / 3.0f;
+            centre = (left + right) / 2;        // Actually, this code only works if centre is 0 or approximately so
+
+            for (i = 0; i<4; i++)
+            {
+                cluster_x[i] = cluster_y[i] = 0.0f;
+                cluster_count[i] = 0;
+            }
+
+
+            for (i = 0; i<16; i++)
+            {
+                rgb.z = pos_on_axis[index_map[i]];
+                // Endpoints (indicated by block > average) are 0 and 1, while
+                // interpolants are 2 and 3
+                if (fabs(rgb.z) >= division)
+                    bit = 0;
+                else
+                    bit = 2;
+                // Positive is in the latter half of the block
+                if (rgb.z >= centre)
+                    bit += 1;
+                // Set the output, taking swapping into account
+                compressedBlock[1] |= ((bit^swap) << (2 * i));
+
+                // Average the X and Y locations for each cluster
+                cluster_x[bit] += (CGU_FLOAT)(i & 3);
+                cluster_y[bit] += (CGU_FLOAT)(i >> 2);
+                cluster_count[bit]++;
+            }
+
+            for (i = 0; i<4; i++)
+            {
+                CGU_FLOAT cr;
+                if (cluster_count[i])
+                {
+                    cr = 1.0f / cluster_count[i];
+                    cluster_x[i] *= cr;
+                    cluster_y[i] *= cr;
+                }
+                else
+                {
+                    cluster_x[i] = cluster_y[i] = -1;
+                }
+            }
+
+            // patterns in axis position detection
+            // (same algorithm as used in the SSE version)
+            if ((compressedBlock[0] & 0xffff) != (compressedBlock[0] >> 16))
+            {
+                CGU_UINT32 i1, k1;
+                CGU_UINT32 x = 0, y = 0;
+                int xstep = 0, ystep = 0;
+
+                // Find a corner to search from
+                for (k1 = 0; k1<4; k1++)
+                {
+                    switch (k1)
+                    {
+                    case 0:
+                        x = 0; y = 0; xstep = 1; ystep = 1;
+                        break;
+                    case 1:
+                        x = 0; y = 3; xstep = 1; ystep = -1;
+                        break;
+                    case 2:
+                        x = 3; y = 0; xstep = -1; ystep = 1;
+                        break;
+                    case 3:
+                        x = 3; y = 3; xstep = -1; ystep = -1;
+                        break;
+                    }
+
+                    for (i1 = 0; i1<4; i1++)
+                    {
+                        if ((POS(x, y + ystep*i1)                < POS(x + xstep, y + ystep*i1)) ||
+                            (POS(x + xstep, y + ystep*i1)        < POS(x + 2 * xstep, y + ystep*i1)) ||
+                            (POS(x + 2 * xstep, y + ystep*i1)    < POS(x + 3 * xstep, y + ystep*i1))
+                            )
+                            break;
+                        if ((POS(x + xstep*i1, y)                < POS(x + xstep*i1, y + ystep)) ||
+                            (POS(x + xstep*i1, y + ystep)        < POS(x + xstep*i1, y + 2 * ystep)) ||
+                            (POS(x + xstep*i1, y + 2 * ystep)    < POS(x + xstep*i1, y + 3 * ystep))
+                            )
+                            break;
+                    }
+                    if (i1 == 4)
+                        break;
+                }
+            }
+        }
+
+    }
+    // done
+}
+
+INLINE void store_uint8(CMP_GLOBAL CGU_UINT8 u_dstptr[8], CGU_UINT32 data[2])
+{
+   int shift = 0;
+   for (CGU_INT k=0; k<4; k++)
+   {
+      u_dstptr[k] = (data[0] >> shift)&0xFF;
+      shift += 8;
+   }
+   shift = 0;
+   for (CGU_INT k=4; k<8; k++)
+   {
+      u_dstptr[k] = (data[1] >> shift)&0xFF;
+      shift += 8;
+   }
+}
+
+void  CompressBlockBC1_Internal(
+    const CMP_Vec4uc  srcBlockTemp[16],
+    CMP_GLOBAL  CGU_UINT32      compressedBlock[2],
+    CMP_GLOBAL  const CMP_BC15Options *BC15options)
+{
+    CGU_UINT8    blkindex = 0;
+    CGU_UINT8    srcindex = 0;
+    CGU_UINT8    rgbBlock[64];
+    for ( CGU_INT32 j = 0; j < 4; j++) {
+     for ( CGU_INT32 i = 0; i < 4; i++) {
+        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
+        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
+        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
+        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
+        srcindex++;
+        }
+    }
+
+    CMP_BC15Options internalOptions = *BC15options;
+    CalculateColourWeightings(rgbBlock, &internalOptions);
+
+    CompressRGBBlock(rgbBlock,
+                     compressedBlock,
+                     &internalOptions,
+                     TRUE,
+                     FALSE, 
+                     internalOptions.m_nAlphaThreshold);
+}
+
+//============================================== USER INTERFACES  ========================================================
+#ifndef ASPM_GPU
+int CMP_CDECL CreateOptionsBC1(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+      SetDefaultBC15Options(BC15optionsDefault);
+      (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DestroyOptionsBC1(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC1(void *options, 
+                            CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_NEWMEM;
+    CMP_BC15Options *BC15optionsDefault =  reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+    if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+
+int CMP_CDECL SetAlphaThresholdBC1(void *options, 
+                                   CGU_UINT8 alphaThreshold)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault =  reinterpret_cast <CMP_BC15Options *>(options);
+    BC15optionsDefault->m_nAlphaThreshold = alphaThreshold;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetDecodeChannelMapping(void *options,
+                              CGU_BOOL mapRGBA)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault =  reinterpret_cast <CMP_BC15Options *>(options);
+    BC15optionsDefault->m_mapDecodeRGBA = mapRGBA;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetChannelWeightsBC1(void *options,
+                              CGU_FLOAT WeightRed,
+                              CGU_FLOAT WeightGreen,
+                              CGU_FLOAT WeightBlue) {
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
+
+    if ((WeightRed < 0.0f)   || (WeightRed > 1.0f))      return CGU_CORE_ERR_RANGERED;
+    if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f))    return CGU_CORE_ERR_RANGEGREEN;
+    if ((WeightBlue < 0.0f)  || (WeightBlue > 1.0f))     return CGU_CORE_ERR_RANGEBLUE;
+
+    BC15optionsDefault->m_bUseChannelWeighting = true;
+    BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
+    BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
+    BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock,
+                               unsigned int srcStrideInBytes,
+                               CMP_GLOBAL unsigned char cmpBlock[8],
+                               const void *options = NULL) {
+    CMP_Vec4uc inBlock[16];
+
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row=0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
+            dstptr++;
+        }
+    }
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options     = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+
+    CompressBlockBC1_Internal(inBlock, (CMP_GLOBAL  CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8], 
+                                 CMP_GLOBAL unsigned char srcBlock[64],
+                                 const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options     = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressDXTRGB_Internal(srcBlock, ( CGU_UINT32 *)cmpBlock, BC15options);
+
+
+    return CGU_CORE_OK;
+}
+#endif
+
+//============================================== OpenCL USER INTERFACE ========================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
+    CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
+    CMP_GLOBAL  CGU_UINT8*          ImageDestination,
+    CMP_GLOBAL  Source_Info*        SourceInfo,
+    CMP_GLOBAL  CMP_BC15Options*    BC15options
+)
+{
+    CGU_UINT32 xID;
+    CGU_UINT32 yID;
+
+//printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality);
+#ifdef ASPM_GPU
+    xID = get_global_id(0);
+    yID = get_global_id(1);
+#else
+    xID = 0;
+    yID = 0;
+#endif
+
+    if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+    if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+    int  srcWidth = SourceInfo->m_src_width;
+
+    CGU_UINT32 destI = (xID*BC1CompBlockSize) + (yID*(srcWidth / BlockX)*BC1CompBlockSize);
+    int srcindex = 4 * (yID * srcWidth + xID);
+    int blkindex = 0;
+    CMP_Vec4uc srcData[16];
+    srcWidth = srcWidth - 4;
+
+    for ( CGU_INT32 j = 0; j < 4; j++) {
+        for ( CGU_INT32 i = 0; i < 4; i++) {
+            srcData[blkindex++] = ImageSource[srcindex++];
+        }
+        srcindex += srcWidth;
+    }
+
+    // fast low quality mode that matches v3.1 code
+    if (SourceInfo->m_fquality <= 0.04f)
+        CompressBlockBC1_Fast(srcData, (CMP_GLOBAL  CGU_UINT32 *)&ImageDestination[destI]);
+    else
+        CompressBlockBC1_Internal(srcData, (CMP_GLOBAL  CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
--- a/extern/CMP_Core/shaders/BC1_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.h
@ -0,0 +1,48 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC1_ENCODE_KERNEL_H
+#define BC1_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define CS_RED(r, g, b)        (r)
+#define CS_GREEN(r, g, b)    (g)
+#define CS_BLUE(r, g, b)    ((b+g)*0.5f)
+#define DCS_RED(r, g, b)    (r)
+#define DCS_GREEN(r, g, b)    (g)
+#define DCS_BLUE(r, g, b)    ((2.0f*b)-g)
+#define BYTEPP 4
+#define BC1CompBlockSize    8
+
+
+#define ROUND_AND_CLAMP(v, shift)    \
+{\
+    if (v < 0) v = 0;\
+    else if (v > 255) v = 255;\
+    else v += (0x80>>shift) - (v>>shift);\
+}
+
+#define POS(x,y) (pos_on_axis[(x)+(y)*4])
+
+#endif
--- a/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp
@ -0,0 +1,261 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC2_Encode_kernel.h"
+
+//============================================== BC2 INTERFACES =======================================================
+
+void DXTCV11CompressExplicitAlphaBlock(const CGU_UINT8 block_8[16], CMP_GLOBAL CGU_UINT32 block_dxtc[2])
+{
+    CGU_UINT8 i;
+    block_dxtc[0] = block_dxtc[1] = 0;
+    for (i = 0; i < 16; i++)
+    {
+        int v = block_8[i];
+        v = (v + 7 - (v >> 4));
+        v >>= 4;
+        if (v < 0)
+            v = 0;
+        if (v > 0xf)
+            v = 0xf;
+        if (i < 8)
+            block_dxtc[0] |= v << (4 * i);
+        else
+            block_dxtc[1] |= v << (4 * (i - 8));
+    }
+}
+
+#define EXPLICIT_ALPHA_PIXEL_MASK 0xf
+#define EXPLICIT_ALPHA_PIXEL_BPP  4
+
+CGU_INT CompressExplicitAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], 
+    CMP_GLOBAL CGU_UINT32 compressedBlock[2])
+{
+    DXTCV11CompressExplicitAlphaBlock(alphaBlock, compressedBlock);
+    return CGU_CORE_OK;
+}
+
+void  CompressBlockBC2_Internal(const CMP_Vec4uc srcBlockTemp[16],
+                                CMP_GLOBAL CGU_UINT32 compressedBlock[4],
+                                CMP_GLOBAL const CMP_BC15Options *BC15options)
+{
+    CGU_UINT8    blkindex = 0;
+    CGU_UINT8    srcindex = 0;
+    CGU_UINT8    rgbaBlock[64];
+    for (CGU_INT32 j = 0; j < 4; j++) {
+        for (CGU_INT32 i = 0; i < 4; i++) {
+            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
+            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
+            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
+            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
+            srcindex++;
+        }
+    }
+
+    CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+    for (CGU_INT32 i = 0; i < 16; i++)
+        alphaBlock[i] = (CGU_UINT8)(((CGU_INT32*)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
+
+    // Need a copy, as CalculateColourWeightings sets variables in the BC15options
+    CMP_BC15Options internalOptions = *BC15options;
+    CalculateColourWeightings(rgbaBlock, &internalOptions);
+
+    CGU_INT err = CompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+    if (err != 0)
+        return;
+
+    CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,FALSE,FALSE,0);
+}
+
+//============================================== USER INTERFACES ========================================================
+#ifndef ASPM_GPU
+
+int CMP_CDECL CreateOptionsBC2(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+        SetDefaultBC15Options(BC15optionsDefault);
+        (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DestroyOptionsBC2(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC2(void *options,
+    CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+        if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetChannelWeightsBC2(void *options,
+    CGU_FLOAT WeightRed,
+    CGU_FLOAT WeightGreen,
+    CGU_FLOAT WeightBlue) {
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
+
+    if ((WeightRed < 0.0f) || (WeightRed > 1.0f))       return CGU_CORE_ERR_RANGERED;
+    if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f))   return CGU_CORE_ERR_RANGEGREEN;
+    if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f))     return CGU_CORE_ERR_RANGEBLUE;
+
+    BC15optionsDefault->m_bUseChannelWeighting = true;
+    BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
+    BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
+    BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
+    return CGU_CORE_OK;
+}
+
+// Decompresses an explicit alpha block (DXT3)
+void DecompressExplicitAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4],
+    const CGU_UINT32 compressedBlock[2])
+{
+    for (int i = 0; i < 16; i++)
+    {
+        int nBlock = i < 8 ? 0 : 1;
+        CGU_UINT8 cAlpha = (CGU_UINT8)((compressedBlock[nBlock] >> ((i % 8) * EXPLICIT_ALPHA_PIXEL_BPP)) & EXPLICIT_ALPHA_PIXEL_MASK);
+        alphaBlock[i] = (CGU_UINT8)((cAlpha << EXPLICIT_ALPHA_PIXEL_BPP) | cAlpha);
+    }
+}
+
+void DecompressBC2_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[BLOCK_SIZE_4X4X4],
+    const CGU_UINT32 compressedBlock[4],
+    const CMP_BC15Options *BC15options)
+{
+    CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+
+    DecompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+    DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
+
+    for (CGU_UINT32 i = 0; i < 16; i++)
+        ((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] = (alphaBlock[i] << RGBA8888_OFFSET_A) | (((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] & ~(BYTE_MASK << RGBA8888_OFFSET_A));
+}
+
+int CMP_CDECL CompressBlockBC2(const unsigned char *srcBlock,
+                               unsigned int srcStrideInBytes,
+                               CMP_GLOBAL unsigned char cmpBlock[16],
+                               CMP_GLOBAL const void *options = NULL) {
+
+    CMP_Vec4uc inBlock[16];
+
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
+            dstptr++;
+        }
+    }
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    CompressBlockBC2_Internal(inBlock, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16], 
+                                 CMP_GLOBAL unsigned char srcBlock[64],
+                                 const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressBC2_Internal(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
+
+    return CGU_CORE_OK;
+}
+#endif
+
+//============================================== OpenCL USER INTERFACE ========================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
+    CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
+    CMP_GLOBAL  CGU_UINT8*          ImageDestination,
+    CMP_GLOBAL  Source_Info*        SourceInfo,
+    CMP_GLOBAL  CMP_BC15Options*    BC15options
+)
+{
+    CGU_UINT32 xID;
+    CGU_UINT32 yID;
+
+#ifdef ASPM_GPU
+    xID = get_global_id(0);
+    yID = get_global_id(1);
+#else
+    xID = 0;
+    yID = 0;
+#endif
+
+    if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+    if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+    int  srcWidth = SourceInfo->m_src_width;
+
+    CGU_UINT32 destI = (xID*BC2CompBlockSize) + (yID*(srcWidth / BlockX)*BC2CompBlockSize);
+    int srcindex = 4 * (yID * srcWidth + xID);
+    int blkindex = 0;
+    CMP_Vec4uc srcData[16];
+    srcWidth = srcWidth - 4;
+
+    for ( CGU_INT32 j = 0; j < 4; j++) {
+        for ( CGU_INT32 i = 0; i < 4; i++) {
+            srcData[blkindex++] = ImageSource[srcindex++];
+        }
+        srcindex += srcWidth;
+    }
+
+    CompressBlockBC2_Internal(srcData,(CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
+
--- a/extern/CMP_Core/shaders/BC2_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.h
@ -0,0 +1,34 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC2_ENCODE_KERNEL_H
+#define BC2_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define BC2CompBlockSize    16
+#define NUM_CHANNELS        4
+#define NUM_ENDPOINTS       2
+
+
+#endif
--- a/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp
@ -0,0 +1,218 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC3_Encode_kernel.h"
+
+//============================================== BC3 INTERFACES =======================================================
+
+void CompressBlockBC3_Internal(const CMP_Vec4uc srcBlockTemp[16],
+                               CMP_GLOBAL CGU_UINT32 compressedBlock[4],
+                               CMP_GLOBAL const CMP_BC15Options *BC15options) {
+  CGU_UINT8 blkindex = 0;
+  CGU_UINT8 srcindex = 0;
+  CGU_UINT8 rgbaBlock[64];
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
+      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
+      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
+      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
+      srcindex++;
+    }
+  }
+
+  CMP_BC15Options internalOptions = *BC15options;
+  CalculateColourWeightings(rgbaBlock, &internalOptions);
+
+  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+  for (CGU_INT32 i = 0; i < 16; i++)
+    alphaBlock[i] =
+        (CGU_UINT8)(((CGU_INT32 *)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
+
+  CGU_INT err = CompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+  if (err != 0) return;
+
+  CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,
+                   FALSE, FALSE, 0);
+}
+
+//============================================== USER INTERFACES ========================================================
+#ifndef ASPM_GPU
+
+int CMP_CDECL CreateOptionsBC3(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+        SetDefaultBC15Options(BC15optionsDefault);
+        (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+
+int CMP_CDECL DestroyOptionsBC3(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC3(void *options,
+    CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+        if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetChannelWeightsBC3(void *options,
+    CGU_FLOAT WeightRed,
+    CGU_FLOAT WeightGreen,
+    CGU_FLOAT WeightBlue) {
+    if (!options) return 1;
+    CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
+
+    if ((WeightRed < 0.0f) || (WeightRed > 1.0f))       return CGU_CORE_ERR_RANGERED;
+    if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f))   return CGU_CORE_ERR_RANGEGREEN;
+    if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f))     return CGU_CORE_ERR_RANGEBLUE;
+
+    BC15optionsDefault->m_bUseChannelWeighting = true;
+    BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
+    BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
+    BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
+    return CGU_CORE_OK;
+}
+
+
+void DecompressBC3_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
+                            const CGU_UINT32 compressedBlock[4],
+                            const CMP_BC15Options *BC15options) {
+  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+
+  DecompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+  DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
+
+  for (CGU_UINT32 i = 0; i < 16; i++)
+    ((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] =
+        (alphaBlock[i] << RGBA8888_OFFSET_A) |
+        (((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] &
+         ~(BYTE_MASK << RGBA8888_OFFSET_A));
+}
+
+int CMP_CDECL CompressBlockBC3( const unsigned char *srcBlock,
+                                unsigned int srcStrideInBytes,
+                                CMP_GLOBAL unsigned char cmpBlock[16],
+                                const void *options = NULL) {
+    CMP_Vec4uc inBlock[16];
+
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
+            dstptr++;
+        }
+    }
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL) {
+      BC15options = &BC15optionsDefault;
+      SetDefaultBC15Options(BC15options);
+    }
+
+    CompressBlockBC3_Internal(inBlock,(CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16],
+                                 CMP_GLOBAL unsigned char srcBlock[64],
+                                 const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressBC3_Internal(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
+    return CGU_CORE_OK;
+}
+#endif
+
+//============================================== OpenCL USER INTERFACE ====================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
+    CMP_GLOBAL const CMP_Vec4uc *ImageSource,
+    CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,
+    CMP_GLOBAL CMP_BC15Options *BC15options) {
+  CGU_UINT32 xID;
+  CGU_UINT32 yID;
+
+#ifdef ASPM_GPU
+  xID = get_global_id(0);
+  yID = get_global_id(1);
+#else
+  xID = 0;
+  yID = 0;
+#endif
+
+  if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+  if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+  int srcWidth = SourceInfo->m_src_width;
+
+  CGU_UINT32 destI =
+      (xID * BC3CompBlockSize) + (yID * (srcWidth / BlockX) * BC3CompBlockSize);
+  int srcindex = 4 * (yID * srcWidth + xID);
+  int blkindex = 0;
+  CMP_Vec4uc srcData[16];
+  srcWidth = srcWidth - 4;
+
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      srcData[blkindex++] = ImageSource[srcindex++];
+    }
+    srcindex += srcWidth;
+  }
+
+  CompressBlockBC3_Internal(
+      srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
--- a/extern/CMP_Core/shaders/BC3_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.h
@ -0,0 +1,31 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC3_ENCODE_KERNEL_H
+#define BC3_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define BC3CompBlockSize 16
+
+#endif
--- a/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp
@ -0,0 +1,200 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC4_Encode_kernel.h"
+
+//============================================== BC4 INTERFACES =======================================================
+
+void CompressBlockBC4_Internal(const CMP_Vec4uc srcBlockTemp[16],
+                               CMP_GLOBAL CGU_UINT32 compressedBlock[2],
+                               CMP_GLOBAL const CMP_BC15Options *BC15options) {
+  if (BC15options->m_fquality) {
+    // Reserved!
+  }
+  CGU_UINT8 blkindex = 0;
+  CGU_UINT8 srcindex = 0;
+  CGU_UINT8 alphaBlock[16];
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      alphaBlock[blkindex++] =
+          (CGU_UINT8)srcBlockTemp[srcindex].x;  // Red channel
+      srcindex++;
+    }
+  }
+  CompressAlphaBlock(alphaBlock, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
+}
+
+void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
+                            const CGU_UINT32 compressedBlock[2],
+                            const CMP_BC15Options *BC15options) {
+  if (BC15options) {}
+  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+  DecompressAlphaBlock(alphaBlock, compressedBlock);
+
+  CGU_UINT8 blkindex = 0;
+  CGU_UINT8 srcindex = 0;
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex];  // R
+      rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex];  // G
+      rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex];  // B
+      rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex];  // A
+      srcindex++;
+    }
+  }
+}
+
+void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[16],
+                               CMP_GLOBAL CGU_UINT32 compressedBlock[2],
+                               CMP_GLOBAL const CMP_BC15Options *BC15options) {
+  if (BC15options) {}
+  CompressAlphaBlock(srcBlockTemp, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
+}
+
+void DecompressBlockBC4_SingleChannel(CGU_UINT8 srcBlockTemp[16],
+                            const CGU_UINT32 compressedBlock[2],
+                            const CMP_BC15Options *BC15options) {
+  if (BC15options) {}
+  DecompressAlphaBlock(srcBlockTemp, compressedBlock);
+}
+
+//============================================== USER INTERFACES ========================================================
+#ifndef ASPM_GPU
+
+int CMP_CDECL CreateOptionsBC4(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+        SetDefaultBC15Options(BC15optionsDefault);
+        (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DestroyOptionsBC4(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC4(void *options,
+    CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+        if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL CompressBlockBC4(const unsigned char *srcBlock,
+                               unsigned int srcStrideInBytes,
+                               CMP_GLOBAL unsigned char cmpBlock[8],
+                               const void *options = NULL) {
+
+    unsigned char inBlock[16];
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlock[dstptr++] = CGU_UINT8(srcBlock[srcpos++]);
+        }
+    }
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    if (BC15options == NULL) {
+      CMP_BC15Options BC15optionsDefault;
+      BC15options = &BC15optionsDefault;
+      SetDefaultBC15Options(BC15options);
+    }
+
+    CompressBlockBC4_SingleChannel(inBlock,(CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8],
+                            CMP_GLOBAL unsigned char srcBlock[16],
+                            const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressBlockBC4_SingleChannel(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
+    return CGU_CORE_OK;
+}
+#endif
+
+//============================================== OpenCL USER INTERFACE ====================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
+    CMP_GLOBAL const CMP_Vec4uc *ImageSource,
+    CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,
+    CMP_GLOBAL CMP_BC15Options *BC15options) {
+  CGU_UINT32 xID;
+  CGU_UINT32 yID;
+
+#ifdef ASPM_GPU
+  xID = get_global_id(0);
+  yID = get_global_id(1);
+#else
+  xID = 0;
+  yID = 0;
+#endif
+
+  if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+  if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+  int srcWidth = SourceInfo->m_src_width;
+
+  CGU_UINT32 destI =
+      (xID * BC4CompBlockSize) + (yID * (srcWidth / BlockX) * BC4CompBlockSize);
+  int srcindex = 4 * (yID * srcWidth + xID);
+  int blkindex = 0;
+  CMP_Vec4uc srcData[16];
+  srcWidth = srcWidth - 4;
+
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      srcData[blkindex++] = ImageSource[srcindex++];
+    }
+    srcindex += srcWidth;
+  }
+
+  CompressBlockBC4_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
--- a/extern/CMP_Core/shaders/BC4_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.h
@ -0,0 +1,31 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC4_ENCODE_KERNEL_H
+#define BC4_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define BC4CompBlockSize 8
+
+#endif
--- a/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp
@ -0,0 +1,264 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC5_Encode_kernel.h"
+
+//============================================== BC5 INTERFACES =======================================================
+
+void  CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16],
+                                CMP_GLOBAL CGU_UINT32 compressedBlock[4],
+                                CMP_GLOBAL  CMP_BC15Options *BC15options)
+{
+    if (BC15options->m_fquality) {
+        // Resreved
+    }
+    CGU_UINT8    blkindex = 0;
+    CGU_UINT8    srcindex = 0;
+    CGU_UINT8    alphaBlock[16];
+    for (CGU_INT32 j = 0; j < 4; j++) {
+        for (CGU_INT32 i = 0; i < 4; i++) {
+            alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // Red channel
+            srcindex++;
+        }
+    }
+    CompressAlphaBlock(alphaBlock,&compressedBlock[0]);
+
+    blkindex = 0;
+    srcindex = 0;
+    for (CGU_INT32 j = 0; j < 4; j++) {
+        for (CGU_INT32 i = 0; i < 4; i++) {
+            alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // Green channel
+            srcindex++;
+        }
+    }
+    CompressAlphaBlock(alphaBlock,&compressedBlock[2]);
+
+}
+
+void  DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], 
+                             CGU_UINT32 compressedBlock[4],
+                             CMP_BC15Options *BC15options)
+{
+    CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4];
+    CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4];
+
+    DecompressAlphaBlock(alphaBlockR, &compressedBlock[0]);
+    DecompressAlphaBlock(alphaBlockG, &compressedBlock[2]);
+ 
+    CGU_UINT8    blkindex = 0;
+    CGU_UINT8    srcindex = 0;
+
+    if (BC15options->m_mapDecodeRGBA)
+    {
+        for (CGU_INT32 j = 0; j < 4; j++) {
+            for (CGU_INT32 i = 0; i < 4; i++) {
+                rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex];
+                rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex];
+                rgbaBlock[blkindex++] = 0;
+                rgbaBlock[blkindex++] = 255;
+                srcindex++;
+            }
+        }
+    }
+    else
+    {
+        for (CGU_INT32 j = 0; j < 4; j++) {
+            for (CGU_INT32 i = 0; i < 4; i++) {
+                rgbaBlock[blkindex++] = 0;
+                rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex];
+                rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex];
+                rgbaBlock[blkindex++] = 255;
+                srcindex++;
+            }
+        }
+    }
+
+}
+
+
+void  CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[16],
+                                            const CGU_UINT8 srcBlockG[16],
+                                            CMP_GLOBAL  CGU_UINT32 compressedBlock[4],
+                                            CMP_GLOBAL  const CMP_BC15Options *BC15options)
+{
+    if (BC15options) {}
+    CompressAlphaBlock(srcBlockR,&compressedBlock[0]);
+    CompressAlphaBlock(srcBlockG,&compressedBlock[2]);
+}
+
+void  DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16],
+                                         CMP_GLOBAL CGU_UINT8 srcBlockG[16], 
+                                         const CGU_UINT32 compressedBlock[4],
+                                         const CMP_BC15Options *BC15options)
+{
+    if (BC15options) {}
+    DecompressAlphaBlock(srcBlockR, &compressedBlock[0]);
+    DecompressAlphaBlock(srcBlockG, &compressedBlock[2]);
+}
+
+
+//============================================== USER INTERFACES ========================================================
+#ifndef ASPM_GPU
+
+int CMP_CDECL CreateOptionsBC5(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+        SetDefaultBC15Options(BC15optionsDefault);
+        (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DestroyOptionsBC5(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC5(void *options,
+    CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+        if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+
+int CMP_CDECL CompressBlockBC5(const CGU_UINT8 *srcBlockR,
+                               unsigned int srcStrideInBytes1,
+                               const CGU_UINT8 *srcBlockG,
+                               unsigned int srcStrideInBytes2,
+                               CMP_GLOBAL CGU_UINT8 cmpBlock[16],
+                               const void *options = NULL) {
+    CGU_UINT8 inBlockR[16];
+
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes1;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlockR[dstptr++] = CGU_UINT8(srcBlockR[srcpos++]);
+        }
+    }
+
+
+    CGU_UINT8 inBlockG[16];
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    srcpos = 0;
+    dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes2;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlockG[dstptr++] = CGU_UINT8(srcBlockG[srcpos++]);
+        }
+    }
+
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+
+    CompressBlockBC5_DualChannel_Internal(inBlockR,inBlockG, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int  CMP_CDECL DecompressBlockBC5(const CGU_UINT8 cmpBlock[16],
+                              CMP_GLOBAL CGU_UINT8 srcBlockR[16],
+                              CMP_GLOBAL CGU_UINT8 srcBlockG[16],
+                              const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressBC5_DualChannel_Internal(srcBlockR,srcBlockG,(CGU_UINT32 *)cmpBlock,BC15options);
+
+    return CGU_CORE_OK;
+}
+
+#endif
+
+//============================================== OpenCL USER INTERFACE ====================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
+                                          CMP_GLOBAL  CGU_UINT8*          ImageDestination,
+                                          CMP_GLOBAL  Source_Info*        SourceInfo,
+                                          CMP_GLOBAL  CMP_BC15Options*    BC15options
+)
+{
+    CGU_UINT32 xID;
+    CGU_UINT32 yID;
+
+#ifdef ASPM_GPU
+    xID = get_global_id(0);
+    yID = get_global_id(1);
+#else
+    xID = 0;
+    yID = 0;
+#endif
+
+    if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+    if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+    int  srcWidth = SourceInfo->m_src_width;
+
+    CGU_UINT32 destI = (xID*BC5CompBlockSize) + (yID*(srcWidth / BlockX)*BC5CompBlockSize);
+    int srcindex = 4 * (yID * srcWidth + xID);
+    int blkindex = 0;
+    CMP_Vec4uc srcData[16];
+    srcWidth = srcWidth - 4;
+
+    for ( CGU_INT32 j = 0; j < 4; j++) {
+        for ( CGU_INT32 i = 0; i < 4; i++) {
+            srcData[blkindex++] = ImageSource[srcindex++];
+        }
+        srcindex += srcWidth;
+    }
+
+    CompressBlockBC5_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
--- a/extern/CMP_Core/shaders/BC5_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.h
@ -0,0 +1,31 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC5_ENCODE_KERNEL_H
+#define BC5_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define BC5CompBlockSize 16
+
+#endif
--- a/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp
--- a/extern/CMP_Core/shaders/BC6_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.h
@ -0,0 +1,480 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC6_ENCODE_KERNEL_H
+#define BC6_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+
+#define MAX_TRACE                       10
+#define MAX_ENTRIES_QUANT_TRACE         16
+#define BlockX                          4
+#define BlockY                          4
+#define BYTEPP                          4
+#define COMPRESSED_BLOCK_SIZE           16             // Size of a compressed block in bytes
+#define MAX_DIMENSION_BIG               4
+#define MAX_SUBSET_SIZE                 16              // Largest possible size for an individual subset
+#define NUM_BLOCK_TYPES                 8               // Number of block types in the format
+#define MAX_SUBSETS                     3               // Maximum number of possible subsets
+#define MAX_PARTITIONS                  64              // Maximum number of partition types
+#define MAX_ENTRIES                     64
+#define MAX_TRY                         20
+
+#define MAX_PARTITIONS_TABLE            (1+64+64)
+#define DIMENSION                       4
+#define MAX_CLUSTERS_BIG                16
+#define EPSILON                         0.000001
+#define MAX_CLUSTERS_QUANT_TRACE        8
+
+//# Image Quality will increase as this number gets larger and end-to-end performance time will reduce
+#define MAX_INDEX_BITS                  4
+#define HIGHQULITY_THRESHOLD            0.7F
+#define qFAST_THRESHOLD                 0.5F
+
+#define F16NEGPREC_LIMIT_VAL            -2048.0f //f16 negative precision limit value
+
+#define LOG_CL_RANGE                    5
+#define LOG_CL_BASE                     2
+#define BIT_BASE                        5
+#define BIT_RANGE                       9
+#define MAX_CLUSTERS                    8
+#define BTT(bits)                       (bits-BIT_BASE)
+#define CLT(cl)                         (cl-LOG_CL_BASE)
+#define MASK(n)                         ((1<<(n))-1)
+#define SIGN_EXTEND_TYPELESS(x,nb)      ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x))
+#define CMP_HALF_MAX                    65504.0f // positive half max
+
+#ifndef ASPM_GPU
+#include <bitset>
+#include <assert.h>
+//typedef uint8_t        byte;
+#else
+//typedef bitset       uint8_t;
+//typedef uint8          byte;
+#endif
+
+#define BC6CompBlockSize 16
+#define BC6BlockX   4
+#define BC6BlockY   4
+
+typedef struct
+{
+    CGU_INT  k;
+    CGU_FLOAT d;
+} BC6H_TRACE;
+
+#define NCHANNELS                        3
+#define MAX_END_POINTS                   2
+#define MAX_BC6H_MODES                  14
+#define MAX_BC6H_PARTITIONS             32
+#define MAX_TWOREGION_MODES             10
+#define COMPRESSED_BLOCK_SIZE           16        // Size of a compressed block in bytes
+#define ONE_REGION_INDEX_OFFSET         65        // bit location to start saving color index values for single region shape
+#define TWO_REGION_INDEX_OFFSET         82        // bit location to start saving color index values for two region shapes
+#define MIN_MODE_FOR_ONE_REGION         11        // Two regions shapes use modes 1..9 and single use 11..14 
+#define R_0(ep)                         (ep)[0][0][i]
+#define R_1(ep)                         (ep)[0][1][i]
+#define R_2(ep)                         (ep)[1][0][i]
+#define R_3(ep)                         (ep)[1][1][i]
+#define FLT16_MAX                       0x7bff
+
+#ifndef ASPM_GPU
+#define USE_SHAKERHD
+#endif
+
+#define USE_NEWRAMP
+
+typedef struct
+{
+    CGU_FLOAT A[NCHANNELS];
+    CGU_FLOAT B[NCHANNELS];
+} END_Points;
+
+typedef struct
+{
+    CGU_FLOAT x, y, z;
+} BC6H_Vec3f;
+
+typedef struct
+{
+    CGU_INT nbits;              // Number of bits
+    CGU_INT prec[3];            // precission of the Qunatized RGB endpoints
+    CGU_INT transformed;        // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
+    CGU_INT modebits;           // number of mode bits
+    CGU_INT IndexPrec;          // Index Precision
+    CGU_INT mode;               // Mode value to save
+    CGU_INT lowestPrec;         // Step size of each precesion incriment
+}  ModePartitions;
+
+__constant ModePartitions ModePartition[MAX_BC6H_MODES + 1] =
+{
+   0,    0,0,0,        0,    0,    0,    0,     0,   // Mode = Invaild
+
+   // Two region Partition
+   10,   5,5,5,        1,    2,    3,    0x00,  31,    // Mode = 1
+   7,    6,6,6,        1,    2,    3,    0x01,  248,   // Mode = 2
+   11,   5,4,4,        1,    5,    3,    0x02,  15,    // Mode = 3
+   11,   4,5,4,        1,    5,    3,    0x06,  15,    // Mode = 4 
+   11,   4,4,5,        1,    5,    3,    0x0a,  15,    // Mode = 5
+   9,    5,5,5,        1,    5,    3,    0x0e,  62,    // Mode = 6
+   8,    6,5,5,        1,    5,    3,    0x12,  124,   // Mode = 7
+   8,    5,6,5,        1,    5,    3,    0x16,  124,   // Mode = 8
+   8,    5,5,6,        1,    5,    3,    0x1a,  124,   // Mode = 9
+   6,    6,6,6,        0,    5,    3,    0x1e,  496,   // Mode = 10
+
+   // One region Partition    
+   10,   10,10,10,     0,    5,    4,    0x03,  31,    // Mode = 11
+   11,   9,9,9,        1,    5,    4,    0x07,  15,    // Mode = 12
+   12,   8,8,8,        1,    5,    4,    0x0b,  7,     // Mode = 13
+   16,   4,4,4,        1,    5,    4,    0x0f,  1,     // Mode = 14
+};
+
+//================================================
+// Mode Pathern order to try on endpoints
+// The order can be rearranged to set which modes gets processed first
+// for now it is set in order.
+//================================================
+__constant CGU_INT8 ModeFitOrder[MAX_BC6H_MODES + 1] =
+{
+   0,                //0: N/A
+    // ----  2 region lower bits ---
+    1,                // 10 5 5 5
+    2,                // 7  6 6 6 
+    3,                // 11 5 4 5
+    4,                // 11 4 5 4
+    5,                // 11 4 4 5
+    6,                // 9  5 5 5
+    7,                // 8  6 5 5
+    8,                // 8  5 6 5
+    9,                // 8  5 5 6
+    10,               // 6  6 6 6
+    //------ 1 region high bits ---
+    11,               // 10 10 10 10
+    12,               // 11 9  9  9
+    13,               // 12 8  8  8
+    14                // 16 4  4  4
+};
+
+// The Region2FixUps are for our index[subset = 2][16][3] locations
+// indexed by shape region 2
+__constant CGU_INT g_Region2FixUp[32] =
+{
+   7 , 3 , 11, 7,
+   3 , 11, 9 , 5,
+   2 , 12, 7 , 3,
+   11, 7 , 11, 3,
+   7 , 1 , 0 , 1,
+   0 , 1 , 0 , 7,
+   0 , 1 , 1 , 0,
+   4 , 4 , 1 , 0,
+};
+
+// Indexed by all shape regions 
+// Partition Set Fixups for region 1 note region 0 is always at 0
+// that means normally we use 3 bits to define an index value
+// if its at the fix up location then its one bit less
+__constant CGU_INT g_indexfixups[32] =
+{
+   15,15,15,15,
+   15,15,15,15,
+   15,15,15,15,
+   15,15,15,15,
+   15, 2, 8, 2,
+   2, 8, 8,15,
+   2, 8, 2, 2,
+   8, 8, 2, 2,
+};
+
+typedef struct
+{
+    CGU_INT8 region;                // one or two
+    CGU_INT8 m_mode;                // m
+    CGU_INT8 d_shape_index;         // d
+    CGU_INT rw;                            // endpt[0].A[0]
+    CGU_INT rx;                            // endpt[0].B[0]
+    CGU_INT ry;                            // endpt[1].A[0]
+    CGU_INT rz;                            // endpt[1].B[0] 
+    CGU_INT gw;                            // endpt[0].A[1]
+    CGU_INT gx;                            // endpt[0].B[1]
+    CGU_INT gy;                            // endpt[1].A[1]
+    CGU_INT gz;                            // endpt[1].B[1]
+    CGU_INT bw;                            // endpt[0].A[2]
+    CGU_INT bx;                            // endpt[0].B[2]
+    CGU_INT by;                            // endpt[1].A[2]
+    CGU_INT bz;                            // endpt[1].B[2]
+
+    union
+    {
+        CGU_UINT8 indices[4][4];            // Indices data after header block
+        CGU_UINT8 indices16[16];
+    };
+
+    union
+    {
+        CGU_FLOAT         din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];   // Original data input as floats
+        unsigned char     cdin[256];                                 // as uchar to match float
+    };
+
+    END_Points    EC[MAX_END_POINTS];    // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[]
+    END_Points    E[MAX_END_POINTS];     // decompressed endpoints 
+    CGU_BOOL      issigned;            // Format is 16 bit signed floating point 
+    CGU_BOOL      istransformed;       // region two: all modes = true except mode=10
+    short         wBits;               // number of bits for the root endpoint
+    short         tBits[NCHANNELS];    // number of bits used for the transformed endpoints
+    CGU_INT           format;              // floating point format are we using for decompression
+    BC6H_Vec3f     Paletef[2][16];
+
+    CGU_INT           index;               // for debugging
+    CGU_FLOAT     fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    CGU_FLOAT     cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    CGU_INT           shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    CGU_INT           cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    CGU_INT           entryCount[MAX_SUBSETS];
+    CGU_INT           cur_best_entryCount[MAX_SUBSETS];
+    CGU_FLOAT     partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    CGU_FLOAT     cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    CGU_BOOL      optimized;           // were end points optimized during final encoding
+
+} BC6H_Encode_local;
+
+#ifndef ASPM_GPU
+using namespace std;
+class BitHeader
+{
+public:
+    BitHeader(const CGU_UINT8 in[], CGU_INT sizeinbytes)
+    {
+        m_bits.reset();
+        m_sizeinbytes = sizeinbytes;
+
+        if ((in != NULL) && (sizeinbytes <= 16))
+        {
+            // Init bits set with given data
+            CGU_INT bitpos = 0;
+            for (CGU_INT i = 0; i < sizeinbytes; i++)
+            {
+                CGU_INT bit = 1;
+                for (CGU_INT j = 0; j < 8; j++)
+                {
+                    m_bits[bitpos] = in[i] & bit ? 1 : 0;
+                    bit = bit << 1;
+                    bitpos++;
+                }
+            }
+        }
+    }
+
+    ~BitHeader()
+    {
+    }
+
+    void transferbits(CGU_UINT8 in[], CGU_INT sizeinbytes)
+    {
+        if ((sizeinbytes <= m_sizeinbytes) && (in != NULL))
+        {
+            // Init bits set with given data
+            memset(in, 0, sizeinbytes);
+            CGU_INT bitpos = 0;
+            for (CGU_INT i = 0; i < sizeinbytes; i++)
+            {
+                CGU_INT bit = 1;
+                for (CGU_INT j = 0; j < 8; j++)
+                {
+                    if (m_bits[bitpos]) in[i] |= bit;
+                    bit = bit << 1;
+                    bitpos++;
+                }
+            }
+        }
+    }
+
+    CGU_INT getvalue(CGU_INT start, CGU_INT bitsize)
+    {
+        CGU_INT value = 0;
+        CGU_INT end = start + bitsize - 1;
+        for (; end >= start; end--)
+        {
+            value |= m_bits[end] ? 1 : 0;
+            if (end > start) value <<= 1;
+        }
+
+        return value;
+    }
+
+    void setvalue(CGU_INT start, CGU_INT bitsize, CGU_INT value, CGU_INT maskshift = 0)
+    {
+        CGU_INT end = start + bitsize - 1;
+        CGU_INT mask = 0x1 << maskshift;
+        for (; start <= end; start++)
+        {
+            m_bits[start] = (value&mask) ? 1 : 0;
+            mask <<= 1;
+        }
+    }
+
+    bitset<128> m_bits;        // 16 bytes max
+    CGU_INT     m_sizeinbytes;
+};
+
+//==================== DECODER CODE ======================
+#define MAXENDPOINTS                    2
+#define U16MAX                          0xffff
+#define S16MAX                          0x7fff
+#define SIGN_EXTEND(w,tbits)            ((((signed(w))&(1<<((tbits)-1)))?((~0)<<(tbits)):0)|(signed(w)))
+
+enum
+{
+    UNSIGNED_F16 = 1,
+    SIGNED_F16   = 2
+};
+
+enum
+{
+    BC6_ONE = 0,
+    BC6_TWO
+};
+
+enum
+{
+    C_RED = 0,
+    C_GREEN,
+    C_BLUE
+};
+
+struct BC6H_Vec3
+{
+    int x,y,z;
+};
+
+struct AMD_BC6H_Format
+{
+    unsigned short region;             // one or two
+    unsigned short m_mode;             // m
+    int d_shape_index;                 // d
+    int rw;                            // endpt[0].A[0]
+    int rx;                            // endpt[0].B[0]
+    int ry;                            // endpt[1].A[0]
+    int rz;                            // endpt[1].B[0] 
+    int gw;                            // endpt[0].A[1]
+    int gx;                            // endpt[0].B[1]
+    int gy;                            // endpt[1].A[1]
+    int gz;                            // endpt[1].B[1]
+    int bw;                            // endpt[0].A[2]
+    int bx;                            // endpt[0].B[2]
+    int by;                            // endpt[1].A[2]
+    int bz;                            // endpt[1].B[2]
+    
+    union
+    {
+        CGU_UINT8 indices[4][4];            // Indices data after header block
+        CGU_UINT8 indices16[16];
+    };
+
+    float         din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];   // Original data input
+    END_Points    EC[MAXENDPOINTS];    // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[]
+    END_Points    E[MAXENDPOINTS];     // decompressed endpoints 
+    bool          issigned;            // Format is 16 bit signed floating point 
+    bool          istransformed;       // region two: all modes = true except mode=10
+    short         wBits;               // number of bits for the root endpoint
+    short         tBits[NCHANNELS];    // number of bits used for the transformed endpoints
+    int           format;              // floating point format are we using for decompression
+    BC6H_Vec3      Palete[2][16];
+    BC6H_Vec3f     Paletef[2][16];
+
+    int           index;               // for debugging
+    float         fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    float         cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    int           shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    int           cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    int           entryCount[MAX_SUBSETS];
+    int           cur_best_entryCount[MAX_SUBSETS];
+    float         partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    float         cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    bool          optimized;           // were end points optimized during final encoding
+};
+
+// ===================================  END OF DECODER CODE ========================================================
+#endif
+
+//-------------------------------------------------
+// Set by Host : Read only in kernel
+//-------------------------------------------------
+typedef struct
+{
+    // Setup at initialization time
+    CGU_FLOAT  m_quality;
+    CGU_FLOAT  m_performance;
+    CGU_FLOAT  m_errorThreshold;
+    CGU_DWORD  m_validModeMask;
+    CGU_BOOL   m_imageNeedsAlpha;
+    CGU_BOOL   m_colourRestrict;
+    CGU_BOOL   m_alphaRestrict;
+    CGU_BOOL   m_isSigned;
+} CMP_BC6HOptions;
+
+typedef struct
+{
+    // These are quality parameters used to select when to use the high precision quantizer
+    // and shaker paths
+    CGU_FLOAT m_quantizerRangeThreshold;
+    CGU_FLOAT m_shakerRangeThreshold;
+    CGU_FLOAT m_partitionSearchSize;
+
+    // Setup at initialization time
+    CGU_FLOAT  m_quality;
+    CGU_FLOAT  m_performance;
+    CGU_FLOAT  m_errorThreshold;
+    CGU_DWORD  m_validModeMask;
+    CGU_BOOL   m_imageNeedsAlpha;
+    CGU_BOOL   m_colourRestrict;
+    CGU_BOOL   m_alphaRestrict;
+    CGU_BOOL   m_isSigned;
+
+    // Source image info : must be set prior to use in kernel
+    CGU_UINT32   m_src_width;
+    CGU_UINT32   m_src_height;
+    CGU_UINT32   m_src_stride;
+
+} BC6H_Encode;
+
+CMP_STATIC void SetDefaultBC6Options(BC6H_Encode *BC6Encode)
+{
+    if (BC6Encode)
+    {
+        BC6Encode->m_quality = 1.0f;
+        BC6Encode->m_quantizerRangeThreshold = 0.0f;
+        BC6Encode->m_shakerRangeThreshold = 0.0f;
+        BC6Encode->m_partitionSearchSize = 0.20f;
+        BC6Encode->m_performance = 0.0f;
+        BC6Encode->m_errorThreshold = 0.0f;
+        BC6Encode->m_validModeMask = 0;
+        BC6Encode->m_imageNeedsAlpha = 0;
+        BC6Encode->m_colourRestrict = 0;
+        BC6Encode->m_alphaRestrict = 0;
+        BC6Encode->m_isSigned = 0;
+        BC6Encode->m_src_width = 4;
+        BC6Encode->m_src_height = 4;
+        BC6Encode->m_src_stride = 0;
+    }
+}
+
+#endif
--- a/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp
+++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp
--- a/extern/CMP_Core/shaders/BC7_Encode_Kernel.h
+++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.h
--- a/extern/CMP_Core/shaders/BCn_Common_Kernel.h
+++ b/extern/CMP_Core/shaders/BCn_Common_Kernel.h
--- a/extern/CMP_Core/shaders/Common_Def.h
+++ b/extern/CMP_Core/shaders/Common_Def.h
@ -0,0 +1,300 @@
+#ifndef _COMMON_DEFINITIONS_H
+#define _COMMON_DEFINITIONS_H
+
+//===============================================================================
+// Copyright (c) 2007-2019 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2004-2006 ATI Technologies Inc.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//
+//  File Name:   Common_Def.h
+//  Description: common definitions used for CPU/HPC/GPU
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+// Features
+#ifdef _WIN32
+//#define USE_ASPM_CODE
+#endif
+
+// Proxy ISPC compiler (Warning! Not all ASPM features will be available : expect build errors for specialized ASPM code!
+#ifdef ISPC
+#define ASPM
+#endif
+
+// Using OpenCL Compiler
+#ifdef __OPENCL_VERSION__
+#define  ASPM_GPU
+#endif
+
+
+#ifdef _LINUX
+#undef ASPM_GPU
+#include <cstring>
+#include <cmath>
+#include <stdio.h>
+#include "cmp_math_vec4.h"
+#endif
+
+#ifndef CMP_MAX
+#define CMP_MAX(x, y) (((x) > (y)) ? (x) : (y))
+#endif
+
+#ifndef CMP_MIN
+#define CMP_MIN(x, y) (((x) < (y)) ? (x) : (y))
+#endif
+
+#define CMP_SET_BC13_DECODER_RGBA       //  Sets mapping BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha 
+                                        //       RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3]
+                                        //  BC4 alpha always maps as AAAA to channels [0,1,2,3] 
+                                        //  BC5 decoded (Red&Green) maps R,G,B=0,A=255 to [0,1,2,3] else  maps [B=0,G,R,A=255] to [0,1,2,3]
+
+//#define USE_BLOCK_LINEAR
+
+#define CMP_FLOAT_MAX       3.402823466e+38F // max value used to detect an Error in processing
+#define CMP_FLOAT_MAX_EXP   38
+#define USE_PROCESS_SEPERATE_ALPHA          // Enable this to use higher quality code using CompressDualIndexBlock
+#define COMPRESSED_BLOCK_SIZE           16  // Size of a compressed block in bytes
+#define MAX_DIMENSION_BIG               4   // Max number of channels  (RGBA)
+#define MAX_SUBSETS                     3   // Maximum number of possible subsets
+#define MAX_SUBSET_SIZE                 16  // Largest possible size for an individual subset
+#define BLOCK_SIZE_4X4X4                64
+#define BLOCK_SIZE_4X4                  16
+#define BlockX                          4
+#define BlockY                          4
+//#define USE_BLOCK_LINEAR    // Source Data is organized in linear form for each block : Experimental Code not fully developed 
+//#define USE_DOUBLE          // Default is to use float, enable to use double data types only for float definitions
+
+typedef enum {
+    CGU_CORE_OK = 0,                          // No errors, call was successfull
+    CGU_CORE_ERR_UNKOWN,                      // An unknown error occurred
+    CGU_CORE_ERR_NEWMEM,                      // New Memory Allocation Failed
+    CGU_CORE_ERR_INVALIDPTR,                  // The pointer value used is invalid or null
+    CGU_CORE_ERR_RANGERED,                    // values for Red   Channel is out of range (too high or too low)
+    CGU_CORE_ERR_RANGEGREEN,                  // values for Green Channel is out of range (too high or too low)
+    CGU_CORE_ERR_RANGEBLUE,                   // values for Blue  Channel is out of range (too high or too low)
+} CGU_ERROR_CODES;
+
+
+//---------------------------------------------
+// Predefinitions for GPU and CPU compiled code
+//---------------------------------------------
+
+#ifdef ASPM_GPU  // GPU Based code
+        // ==== Vectors ====
+        typedef float2  CGU_Vec2f;
+        typedef float2  CGV_Vec2f;
+        typedef float3  CMP_Vec3f;
+        typedef float3  CGU_Vec3f;
+        typedef float3  CGV_Vec3f;
+        typedef uchar3  CGU_Vec3uc;
+        typedef uchar3  CGV_Vec3uc;
+        typedef uchar4  CMP_Vec4uc;
+        typedef uchar4  CGU_Vec4uc;
+        typedef uchar4  CGV_Vec4uc;
+
+        #define USE_BC7_SP_ERR_IDX
+        #define ASPM_PRINT(args)      printf args
+        #define BC7_ENCODECLASS
+
+        #define CMP_EXPORT
+        #define INLINE
+        #define uniform
+        #define varying
+        #define CMP_GLOBAL          __global
+        #define CMP_KERNEL          __kernel
+        #define CMP_CONSTANT        __constant
+        #define CMP_STATIC
+
+
+        typedef unsigned int        CGU_DWORD;      //32bits
+        typedef int                 CGU_INT;        //32bits
+        typedef int                 CGU_BOOL;
+        typedef unsigned short      CGU_SHORT;      //16bits
+        typedef float               CGU_FLOAT;
+        typedef unsigned int        uint32;     // need to remove this def
+
+        typedef int                 CGV_INT;
+        typedef unsigned int        CGU_UINT;
+        typedef int                 CGUV_INT;
+        typedef int                 CGV_BOOL;
+
+        typedef char                CGU_INT8;
+        typedef unsigned char       CGU_UINT8;
+        typedef short               CGU_INT16;
+        typedef unsigned short      CGU_UINT16;
+        typedef int                 CGU_INT32;
+        typedef unsigned int        CGU_UINT32;
+        typedef unsigned long       CGU_UINT64;
+
+        typedef char                CGV_INT8;
+        typedef unsigned char       CGV_UINT8;
+        typedef short               CGV_INT16;
+        typedef unsigned short      CGV_UINT16;
+        typedef int                 CGV_INT32;
+        typedef unsigned int        CGV_UINT32;
+        typedef unsigned long       CGV_UINT64;
+
+        typedef float               CGV_FLOAT;
+
+        #define TRUE  1
+        #define FALSE 0
+        #define CMP_CDECL
+
+#else
+    // CPU & ASPM definitions
+
+    #ifdef ASPM // SPMD ,SIMD CPU code
+        // using hybrid (CPU/GPU) aspm compiler 
+        #define ASPM_PRINT(args)       print args
+        #define CMP_USE_FOREACH_ASPM
+        #define __ASPM__
+        #define BC7_ENCODECLASS
+
+        #define USE_BC7_SP_ERR_IDX
+        //#define USE_BC7_RAMP
+
+        #define CMP_EXPORT          export
+        #define TRUE            true
+        #define FALSE           false
+        typedef uniform bool    CGU_BOOL;
+        typedef bool            CGV_BOOL;
+
+        typedef unsigned int8   uint8;
+        typedef unsigned int16  uint16;
+        typedef unsigned int32  uint32;
+        typedef unsigned int64  uint64;
+        typedef uniform float   CGU_FLOAT;
+        typedef varying float   CGV_FLOAT;
+        typedef uniform uint8   CGU_UINT8;
+        typedef varying uint8   CGV_UINT8;
+
+
+        typedef CGV_UINT8<4> CGV_Vec4uc;
+        typedef CGU_UINT8<4> CGU_Vec4uc;
+
+        typedef CGU_FLOAT<3> CGU_Vec3f;
+        typedef CGV_FLOAT<3> CGV_Vec3f;
+
+        typedef CGU_FLOAT<2> CGU_Vec2f;
+        typedef CGV_FLOAT<2> CGV_Vec2f;
+
+        #define CMP_CDECL
+
+    #else   // standard CPU code
+        #include <stdio.h>
+        #include <string>
+        #include "cmp_math_vec4.h"
+
+        // using CPU compiler
+        #define ASPM_PRINT(args)  printf args
+        #define USE_BC7_RAMP
+        #define USE_BC7_SP_ERR_IDX
+
+        #define CMP_EXPORT
+        #define BC7_ENCODECLASS BC7_EncodeClass::
+        #define TRUE            1
+        #define FALSE           0
+        #define uniform
+        #define varying
+
+        typedef char            int8;
+        typedef short           int16;
+        typedef int             int32;
+        typedef long            int64;
+        typedef unsigned char   uint8;
+        typedef unsigned short  uint16;
+        typedef unsigned int    uint32;
+        typedef unsigned long   uint64;
+
+        typedef int8            CGV_BOOL;
+        typedef int8            CGU_BOOL;
+        typedef int16           CGU_WORD;
+        typedef uint8           CGU_SHORT;
+        typedef int64           CGU_LONG;
+        typedef uint64          CGU_ULONG;
+
+        typedef uniform float   CGU_FLOAT;
+        typedef varying float   CGV_FLOAT;
+        typedef uniform uint8   CGU_UINT8;
+        typedef varying uint8   CGV_UINT8;
+        #if defined(WIN32) || defined(_WIN64)
+        #define CMP_CDECL __cdecl
+        #else
+        #define CMP_CDECL
+        #endif
+    #endif
+
+    // Common CPU & ASPM definitions
+    #define CMP_ASSERT(arg)
+
+    #define CMP_GLOBAL
+
+    #define CMP_KERNEL
+    #define __local                 const
+    #define __constant              const
+    #define CMP_CONSTANT            const
+    #define INLINE                  inline
+    #define CMP_STATIC              static
+
+
+    typedef uniform int32           CGU_DWORD;
+    typedef uniform uint8           CGU_UBYTE;
+    typedef uniform int             CGU_INT;
+    typedef uniform int8            CGU_INT8;
+
+    typedef uniform int16           CGU_INT16;
+    typedef uniform uint16          CGU_UINT16;
+    typedef uniform int32           CGU_INT32;
+    typedef uniform uint32          CGU_UINT32;
+    typedef uniform uint64          CGU_UINT64;
+
+    typedef int                     CGV_INT;
+    typedef int8                    CGV_INT8;
+    typedef int16                   CGV_INT16;
+    typedef int32                   CGV_INT32;
+    typedef uint16                  CGV_UINT16;
+    typedef uint32                  CGV_UINT32;
+    typedef uint64                  CGV_UINT64;
+#endif // ASPM_GPU
+
+
+typedef struct 
+{
+    CGU_UINT32     m_src_width;
+    CGU_UINT32     m_src_height;
+    CGU_UINT32     m_width_in_blocks;
+    CGU_UINT32     m_height_in_blocks;
+    CGU_FLOAT      m_fquality;
+} Source_Info;
+
+// Ref Compute_CPU_HPC
+struct texture_surface
+{
+    CGU_UINT8*  ptr;
+    CGU_INT     width,
+                height,
+                stride;
+    CGU_INT     channels;
+};
+
+#endif
--- a/extern/CMP_Core/shaders/CopyFiles.bat
+++ b/extern/CMP_Core/shaders/CopyFiles.bat
@ -0,0 +1,50 @@
+REM ====================================
+REM Hybrid Codecs: Full support in v4.0
+REM ====================================
+
+REM gets the output dir
+set BUILD_OUTDIR=%1
+
+REM get the batch files dir 
+SET mypath=%~dp0
+echo %mypath:~0,-1%
+
+IF NOT EXIST "%outpath%"\Plugins mkdir %BUILD_OUTDIR%Plugins
+IF NOT EXIST "%outpath%"\Plugins\Compute mkdir %BUILD_OUTDIR%Plugins\Compute
+
+REM Build Vulkan Shader Binary
+REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1.comp -o %BUILD_OUTDIR%\Plugins\Compute\BC1.spv
+REM IF %ERRORLEVEL% GTR 0 exit 123
+
+REM Enabled in v4.0
+REM 
+REM del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp
+
+XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h"        %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+
+echo "Dependencies copied done"
+
+
+
+
--- a/extern/CMP_Core/source/CMP_Core.h
+++ b/extern/CMP_Core/source/CMP_Core.h
@ -0,0 +1,153 @@
+//=====================================================================
+// Copyright (c) 2019   Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+/// \file CMP_Core.h
+//
+//=====================================================================
+
+#ifndef CMP_CORE_H
+#define CMP_CORE_H
+
+#include <stdint.h>
+#ifdef _WIN32
+#define CMP_CDECL __cdecl
+#else
+#define CMP_CDECL
+#endif
+
+//====================================================================================
+// API Definitions for Core API
+//------------------------------------------------------------------------------------
+// All API return 0 on success else error codes > 0
+// See Common_Def.h CGU_CORE_ values for the error codes
+//=====================================================================================
+
+//======================================================================================================
+// Block level setting option: Create and Destroy Reference Pointers
+//======================================================================================================
+// Context create and destroy to use for BCn codec settings, where n is the set [1,2,3,4,5,6,7]
+// All codecs will use default max quality settings, users can create multiple contexts to 
+// set quality levels, masks , channel mapping, etc...
+
+int CMP_CDECL CreateOptionsBC1(void **optionsBC1);
+int CMP_CDECL CreateOptionsBC2(void **optionsBC2);
+int CMP_CDECL CreateOptionsBC3(void **optionsBC3);
+int CMP_CDECL CreateOptionsBC4(void **optionsBC4);
+int CMP_CDECL CreateOptionsBC5(void **optionsBC5);
+int CMP_CDECL CreateOptionsBC6(void **optionsBC6);
+int CMP_CDECL CreateOptionsBC7(void **optionsBC7);
+
+int CMP_CDECL DestroyOptionsBC1(void *optionsBC1);
+int CMP_CDECL DestroyOptionsBC2(void *optionsBC2);
+int CMP_CDECL DestroyOptionsBC3(void *optionsBC3);
+int CMP_CDECL DestroyOptionsBC4(void *optionsBC4);
+int CMP_CDECL DestroyOptionsBC5(void *optionsBC5);
+int CMP_CDECL DestroyOptionsBC6(void *optionsBC6);
+int CMP_CDECL DestroyOptionsBC7(void *optionsBC7);
+
+
+//======================================================================================================
+// Block level settings using the options Reference Pointers
+//======================================================================================================
+
+// Setting channel Weights : Applies to BC1, BC2 and BC3 valid ranges are [0..1.0f] Default is {1.0f, 1.0f , 1.0f}
+// Use channel weightings. With swizzled formats the weighting applies to the data within the specified channel not the channel itself.
+int CMP_CDECL SetChannelWeightsBC1(void *options, float WeightRed, float WeightGreen, float WeightBlue);
+int CMP_CDECL SetChannelWeightsBC2(void *options, float WeightRed, float WeightGreen, float WeightBlue);
+int CMP_CDECL SetChannelWeightsBC3(void *options, float WeightRed, float WeightGreen, float WeightBlue);
+
+
+//  True sets mapping CMP_Core BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha as
+//       RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3]
+//  Default is set to true.
+int CMP_CDECL SetDecodeChannelMapping(void *options, bool mapRGBA);
+
+int CMP_CDECL SetQualityBC1(void *options, float fquality);
+int CMP_CDECL SetQualityBC2(void *options, float fquality);
+int CMP_CDECL SetQualityBC3(void *options, float fquality);
+int CMP_CDECL SetQualityBC4(void *options, float fquality);
+int CMP_CDECL SetQualityBC5(void *options, float fquality);
+int CMP_CDECL SetQualityBC6(void *options, float fquality);
+int CMP_CDECL SetQualityBC7(void *options, float fquality);
+
+
+int CMP_CDECL SetAlphaThresholdBC1(void *options, unsigned char alphaThreshold);
+
+int CMP_CDECL SetMaskBC6(void *options, unsigned int  mask);
+int CMP_CDECL SetMaskBC7(void *options, unsigned char mask);
+
+int CMP_CDECL SetAlphaOptionsBC7(void *options, bool imageNeedsAlpha, bool colourRestrict, bool alphaRestrict);
+int CMP_CDECL SetErrorThresholdBC7(void *options, float minThreshold, float maxThreshold);
+
+//======================================================================================================
+// (4x4) Block level 4 channel source CompressBlock and DecompressBlock API for BCn Codecs
+//======================================================================================================
+// The options parameter for these API can be set to null in the calls if defaults settings is sufficient
+// Example: CompressBlockBC1(srcBlock,16,cmpBlock,NULL);   For "C" call
+//          CompressBlockBC1(srcBlock,16,cmpBlock);        For "C++" calls
+//
+// To use this parameter first create the options context using the CreateOptions call
+// then use the Set Options to set various codec settings and pass them to the appropriate 
+// Compress or Decompress API.
+// The source (srcBlock) channel format is expected to be RGBA:8888 by default for LDR Codecs
+// for BC6H the format is RGBA Half float (16 bits per channel)
+//------------------------------------------------------------------------------------------------------
+#ifdef __cplusplus
+#define CMP_DEFAULTNULL  =NULL
+#else
+#define CMP_DEFAULTNULL
+#endif
+
+//=========================================================================================================
+// 4 channel Sources, default format RGBA:8888 is processed as a 4x4 block starting at srcBlock location
+// where each row of the block is calculated from srcStride
+//=========================================================================================================
+int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[8 ], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL CompressBlockBC2(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL CompressBlockBC3(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL CompressBlockBC7(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
+
+int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8 ], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
+
+//================================================
+// 1 channel Source 4x4 8 bits per block
+//================================================
+int CMP_CDECL CompressBlockBC4(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[8], const void *options  CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8], unsigned char srcBlock[16], const void *options  CMP_DEFAULTNULL);
+
+//================================================
+// 2 channel Source 2x(4x4 8 bits)
+//================================================
+int CMP_CDECL CompressBlockBC5(const unsigned char *srcBlock1, unsigned int srcStrideInBytes1,
+                               const unsigned char *srcBlock2, unsigned int srcStrideInBytes2,
+                               unsigned char cmpBlock[16], const void *options  CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC5(const unsigned char cmpBlock[16], unsigned char srcBlock1[16], unsigned char srcBlock2[16], const void *options  CMP_DEFAULTNULL);
+
+//========================================================================================
+// For 3 channel Source  RGB_16, Note srcStride is in unsigned short steps (2 bytes each)
+//========================================================================================
+int CMP_CDECL CompressBlockBC6(const unsigned short *srcBlock, unsigned int srcStrideInShorts, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16], unsigned short srcBlock[48], const void *options CMP_DEFAULTNULL);
+
+#endif  // CMP_CORE
--- a/extern/CMP_Core/source/cmp_math_vec4.h
+++ b/extern/CMP_Core/source/cmp_math_vec4.h
@ -0,0 +1,417 @@
+//=====================================================================
+// Copyright 2019 (c), Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef CMP_MATH_VEC4_H
+#define CMP_MATH_VEC4_H
+
+//====================================================
+// Vector Class definitions for CPU & Intrinsics
+//====================================================
+
+#if defined (_LINUX) || defined (_WIN32)
+
+//============================================= VEC2 ==================================================
+template<class T>
+class Vec2
+{
+public:
+
+    T x;
+    T y;
+
+    // *****************************************
+    //     Constructors
+    // *****************************************
+
+    /// Default constructor
+    Vec2() : x((T)0), y((T)0) {};
+
+    /// Value constructor
+    Vec2(const T& vx, const T& vy) : x(vx), y(vy) {};
+
+    /// Copy constructor
+    Vec2(const Vec2<T>& val) : x(val.x), y(val.y) {};
+
+    /// Single value constructor.  Sets all components to the given value
+    Vec2(const T& v) : x(v), y(v) {};
+
+
+    // *****************************************
+    //     Conversions/Assignment/Indexing
+    // *****************************************
+
+    /// cast to T*
+    operator const T* () const { return (const T*)this; };
+
+    /// cast to T*
+    operator T* () { return (T*)this; };
+
+    /// Indexing
+    const T& operator[](int i) const { return ((const T*)this)[i]; };
+    T& operator[](int i) { return ((T*)this)[i]; };
+
+    /// Assignment
+    const Vec2<T>& operator=(const Vec2<T>& rhs) { x = rhs.x; y = rhs.y; return *this; };
+
+    // *****************************************
+    //    Comparison
+    // *****************************************
+
+    /// Equality comparison
+    bool operator==(const Vec2<T>& rhs) const { return (x == rhs.x && y == rhs.y); };
+
+    /// Inequality comparision
+    bool operator!=(const Vec2<T>& rhs) const { return (x != rhs.x || y != rhs.y); };
+
+    // *****************************************
+    //    Arithmetic
+    // *****************************************
+
+    /// Addition
+    const Vec2<T> operator+(const Vec2<T>& rhs) const { return Vec2<T>(x + rhs.x, y + rhs.y); };
+
+    /// Subtraction
+    const Vec2<T> operator-(const Vec2<T>& rhs) const { return Vec2<T>(x - rhs.x, y - rhs.y); };
+
+    /// Multiply by scalar
+    const Vec2<T> operator*(const T& v) const { return Vec2<T>(x * v, y * v); };
+
+    /// Divide by scalar
+    const Vec2<T> operator/(const T& v) const { return Vec2<T>(x / v, y / v); };
+
+    /// Addition in-place
+    Vec2<T>& operator+= (const Vec2<T>& rhs) { x += rhs.x; y += rhs.y; return *this; };
+
+    /// Subtract in-place
+    Vec2<T>& operator-= (const Vec2<T>& rhs) { x -= rhs.x; y -= rhs.y; return *this; };
+
+    /// Scalar multiply in-place
+    Vec2<T>& operator*= (const T& v) { x *= v; y *= v; return *this; };
+
+    /// Scalar divide in-place
+    Vec2<T>& operator/= (const T& v) { x /= v; y /= v; return *this; };
+
+
+};
+
+typedef Vec2<float>  CMP_Vec2f;
+typedef Vec2<float>  CGU_Vec2f;
+typedef Vec2<float>  CGV_Vec2f;
+typedef Vec2<double> CMP_Vec2d;
+typedef Vec2<int>    CMP_Vec2i;
+
+//}
+
+
+
+
+//============================================= VEC3 ==================================================
+template<class T>
+class Vec3
+{
+public:
+
+    T x;
+    T y;
+    T z;
+
+    // *****************************************
+    //     Constructors
+    // *****************************************
+
+    /// Default constructor
+    Vec3() : x((T)0), y((T)0), z((T)0) {};
+
+    /// Value constructor
+    Vec3(const T& vx, const T& vy, const T& vz) : x(vx), y(vy), z(vz) {};
+
+    /// Copy constructor
+    Vec3(const Vec3<T>& val) : x(val.x), y(val.y), z(val.z) {};
+
+    /// Single value constructor.  Sets all components to the given value
+    Vec3(const T& v) : x(v), y(v), z(v) {};
+
+    /// Array constructor.  Assumes a 3-component array
+    Vec3(const T* v) : x(v[0]), y(v[1]), z(v[2]) {};
+
+    // *****************************************
+    //     Conversions/Assignment/Indexing
+    // *****************************************
+
+    /// cast to T*
+    operator const T* () const { return (const T*)this; };
+
+    /// cast to T*
+    operator T* () { return (T*)this; };
+
+    /// Assignment
+    const Vec3<T>& operator=(const Vec3<T>& rhs) { x = rhs.x; y = rhs.y; z = rhs.z; return *this; };
+
+    // *****************************************
+    //    Comparison
+    // *****************************************
+
+    /// Equality comparison
+    bool operator==(const Vec3<T>& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); };
+
+    /// Inequality comparision
+    bool operator!=(const Vec3<T>& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z); };
+
+    // *****************************************
+    //    Arithmetic
+    // *****************************************
+
+    /// Addition
+    const Vec3<T> operator+(const Vec3<T>& rhs) const { return Vec3<T>(x + rhs.x, y + rhs.y, z + rhs.z); };
+
+    /// Subtraction
+    const Vec3<T> operator-(const Vec3<T>& rhs) const { return Vec3<T>(x - rhs.x, y - rhs.y, z - rhs.z); };
+
+    /// Multiply by scalar
+    const Vec3<T> operator*(const T& v) const { return Vec3<T>(x * v, y * v, z * v); };
+
+    /// Divide by scalar
+    const Vec3<T> operator/(const T& v) const { return Vec3<T>(x / v, y / v, z / v); };
+
+    /// Divide by vector
+    const Vec3<T> operator/(const Vec3<T>& rhs) const { return Vec3<T>(x / rhs.x, y / rhs.y, z / rhs.z); };
+
+    /// Addition in-place
+    Vec3<T>& operator+= (const Vec3<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; };
+
+    /// Subtract in-place
+    Vec3<T>& operator-= (const Vec3<T>& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; return *this; };
+
+    /// Scalar multiply in-place
+    Vec3<T>& operator*= (const T& v) { x *= v; y *= v; z *= v; return *this; };
+
+    /// Scalar divide in-place
+    Vec3<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; return *this; };
+};
+
+typedef Vec3<float>             CGU_Vec3f;
+typedef Vec3<float>             CGV_Vec3f;
+typedef Vec3<unsigned char>     CGU_Vec3uc;
+typedef Vec3<unsigned char>     CGV_Vec3uc;
+
+typedef Vec3<float>             CMP_Vec3f;
+typedef Vec3<double>            CMP_Vec3d;
+typedef Vec3<int>               CMP_Vec3i;
+typedef Vec3<unsigned char>     CMP_Vec3uc;
+
+//============================================= VEC4 ==================================================
+template<class T>
+class Vec4
+{
+public:
+
+    T x;
+    T y;
+    T z;
+    T w;
+
+    // *****************************************
+    //     Constructors
+    // *****************************************
+
+    /// Default constructor
+    Vec4() : x((T)0), y((T)0), z((T)0), w((T)0) {};
+
+    /// Value constructor
+    Vec4(const T& vx, const T& vy, const T& vz, const T& vw) : x(vx), y(vy), z(vz), w(vw) {};
+
+    /// Copy constructor
+    Vec4(const Vec4<T>& val) : x(val.x), y(val.y), z(val.z), w(val.w) {};
+
+    /// Single value constructor.  Sets all components to the given value
+    Vec4(const T& v) : x(v), y(v), z(v), w(v) {};
+
+    /// Array constructor.  Assumes a 4-component array
+    Vec4(const T* v) : x(v[0]), y(v[1]), z(v[2]), w(v[3]) {};
+
+    // *****************************************
+    //     Conversions/Assignment/Indexing
+    // *****************************************
+
+    /// cast to T*
+    operator const T* () const { return (const T*)this; };
+
+    /// cast to T*
+    operator T* () { return (T*)this; };
+
+    /// Assignment
+    const Vec4<T>& operator=(const Vec4<T>& rhs) { x = rhs.x; y = rhs.y; z = rhs.z;  w = rhs.w; return *this; };
+
+    // *****************************************
+    //    Comparison
+    // *****************************************
+
+    /// Equality comparison
+    bool operator==(const Vec4<T>& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w); };
+
+    /// Inequality comparision
+    bool operator!=(const Vec4<T>& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z || w != rhs.w); };
+
+    // *****************************************
+    //    Arithmetic
+    // *****************************************
+
+    /// Addition
+    const Vec4<T> operator+(const Vec4<T>& rhs) const { return Vec4<T>(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); };
+
+    /// Subtraction
+    const Vec4<T> operator-(const Vec4<T>& rhs) const { return Vec4<T>(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); };
+
+    /// Multiply by scalar
+    const Vec4<T> operator*(const T& v) const { return Vec4<T>(x * v, y * v, z * v, w * v); };
+
+    /// Divide by scalar
+    const Vec4<T> operator/(const T& v) const { return Vec4<T>(x / v, y / v, z / v, w / v); };
+
+    /// Divide by vector
+    const Vec4<T> operator/(const Vec4<T>& rhs) const { return Vec4<T>(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); };
+
+    /// Addition in-place
+    Vec4<T>& operator+= (const Vec4<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; };
+
+    /// Subtract in-place
+    Vec4<T>& operator-= (const Vec4<T>& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; w -= rhs.w; return *this; };
+
+    /// Scalar multiply in-place
+    Vec4<T>& operator*= (const T& v) { x *= v; y *= v; z *= v; w *= v; return *this; };
+
+    /// Scalar divide in-place
+    Vec4<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; w /= v; return *this; };
+};
+
+#include <stdio.h>
+#include "xmmintrin.h"
+#include <math.h>
+#include <float.h> 
+
+// SSE Vec4
+#ifdef _LINUX
+class CMP_SSEVec4f
+#else
+#include "intrin.h"
+class   __declspec(align(16)) CMP_SSEVec4f
+#endif
+{
+public:
+
+    union
+    {
+        __m128 vec128;          // float Vector 128 bits in total (16 Bytes) = array of 4 floats
+#ifdef _LINUX
+        float f32[4];
+#endif
+    };
+
+    // constructors
+    inline CMP_SSEVec4f() {};
+    inline CMP_SSEVec4f(float x, float y, float z, float w) : vec128(_mm_setr_ps(x, y, z, w)) {};
+    inline CMP_SSEVec4f(__m128 vec) : vec128(vec) {}
+    inline CMP_SSEVec4f(const float* data) : vec128(_mm_load_ps(data)) {};
+    inline CMP_SSEVec4f(float scalar) : vec128(_mm_load1_ps(&scalar)) {};
+
+    // copy and assignment
+    inline CMP_SSEVec4f(const CMP_SSEVec4f& init) : vec128(init.vec128) {};
+    inline const CMP_SSEVec4f& operator=(const CMP_SSEVec4f& lhs) { vec128 = lhs.vec128; return *this; };
+
+    // conversion to m128 type for direct use in _mm intrinsics
+    inline operator __m128() { return vec128; };
+    inline operator const __m128() const { return vec128; };
+
+    // indexing
+#ifdef _LINUX
+    inline const float& operator[](int i) const { return f32[i]; };
+    inline float& operator[](int i) { return f32[i]; };
+#else
+    inline const float& operator[](int i) const { return vec128.m128_f32[i]; };
+    inline float& operator[](int i) { return vec128.m128_f32[i]; };
+#endif
+
+    // addition
+    inline CMP_SSEVec4f operator+(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_add_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f& operator+=(const CMP_SSEVec4f& rhs) { vec128 = _mm_add_ps(vec128, rhs.vec128); return *this; };
+
+    // multiplication
+    inline CMP_SSEVec4f operator*(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_mul_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f& operator*=(const CMP_SSEVec4f& rhs) { vec128 = _mm_mul_ps(vec128, rhs.vec128); return *this; };
+
+    // scalar multiplication
+    //inline CMP_SSEVec4f operator*( float rhs ) const { return CMP_SSEVec4f( _mm_mul_ps(vec128, _mm_load1_ps(&rhs)) ); };
+    //inline CMP_SSEVec4f& operator*=( float rhs )  { vec128 = _mm_mul_ps(vec128, _mm_load1_ps(&rhs)); return *this; };
+
+
+    // subtraction
+    inline CMP_SSEVec4f operator-(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_sub_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f& operator-= (const CMP_SSEVec4f& rhs) { vec128 = _mm_sub_ps(vec128, rhs.vec128); return *this; };
+
+    // division
+    inline CMP_SSEVec4f operator/(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_div_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f& operator/= (const CMP_SSEVec4f& rhs) { vec128 = _mm_div_ps(vec128, rhs.vec128); return *this; };
+
+    // scalar division
+    inline CMP_SSEVec4f operator/(float rhs)   const { return CMP_SSEVec4f(_mm_div_ps(vec128, _mm_load1_ps(&rhs))); };
+    inline CMP_SSEVec4f& operator/=(float rhs) { vec128 = _mm_div_ps(vec128, _mm_load1_ps(&rhs)); return *this; };
+
+    // comparison
+    // these return 0 or 0xffffffff in each component
+    inline CMP_SSEVec4f operator< (const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmplt_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator> (const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpgt_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator<=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmple_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator>=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpge_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator==(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpeq_ps(vec128, rhs.vec128)); };
+
+    // bitwise operators
+    inline CMP_SSEVec4f operator|(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_or_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator&(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_and_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator^(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_xor_ps(vec128, rhs.vec128)); };
+    inline const CMP_SSEVec4f& operator|=(const CMP_SSEVec4f& rhs) { vec128 = _mm_or_ps(vec128, rhs.vec128); return *this; };
+    inline const CMP_SSEVec4f& operator&=(const CMP_SSEVec4f& rhs) { vec128 = _mm_and_ps(vec128, rhs.vec128); return *this; };
+
+    // for some horrible reason,there's no bitwise not instruction for SSE,
+    // so we have to do xor with 0xfffffff in order to fake it.
+    // TO get a 0xffffffff, we execute 0=0
+    inline CMP_SSEVec4f operator~() const
+    {
+        __m128 zero = _mm_setzero_ps();
+        __m128 is_true = _mm_cmpeq_ps(zero, zero);
+        return _mm_xor_ps(is_true, vec128);
+    };
+
+};
+
+typedef Vec4<float>             CMP_Vec4f;
+typedef Vec4<double>            CMP_Vec4d;
+typedef Vec4<int>               CMP_Vec4i;
+typedef Vec4<unsigned int>      CMP_Vec4ui;         // unsigned 16 bit x,y,x,w
+typedef Vec4<unsigned char>     CMP_Vec4uc;         // unsigned 8  bit x,y,x,w
+
+typedef Vec4<unsigned char>     CGU_Vec4uc;         // unsigned 8  bit x,y,x,w
+typedef Vec4<unsigned char>     CGV_Vec4uc;         // unsigned 8  bit x,y,x,w
+
+#endif // not ASPM_GPU
+
+#endif // Header Guard
+
--- a/extern/CMP_Core/test/BlockConstants.h
+++ b/extern/CMP_Core/test/BlockConstants.h
@ -0,0 +1,228 @@
+#ifndef BLOCKCONSTANTS_H
+#define BLOCKCONSTANTS_H
+#include <string>
+#include <unordered_map>
+struct Block { const unsigned char* data; const unsigned char* color; };
+
+static const unsigned char BC1_Red_Ignore_Alpha [] {0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_White_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Black_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Green_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_White_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Ignore_Alpha [] {0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Black_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Green_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Blue_Ignore_Alpha [] {0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_White_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Black_Ignore_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Red_Blue_Ignore_Alpha [] {0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Red_Green_Ignore_Alpha [] {0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Green_Blue_Ignore_Alpha [] {0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Red_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC2_Red_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_White_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Black_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Green_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_White_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Black_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Green_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_White_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Black_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_White_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Black_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_White_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_White_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Black_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+
+Block BC1_Red_Ignore_Alpha_Block = {BC1_Red_Ignore_Alpha, nullptr};
+Block BC1_Blue_Half_Alpha_Block = {BC1_Blue_Half_Alpha, nullptr};
+Block BC1_White_Half_Alpha_Block = {BC1_White_Half_Alpha, nullptr};
+Block BC1_Black_Half_Alpha_Block = {BC1_Black_Half_Alpha, nullptr};
+Block BC1_Red_Blue_Half_Alpha_Block = {BC1_Red_Blue_Half_Alpha, nullptr};
+Block BC1_Red_Green_Half_Alpha_Block = {BC1_Red_Green_Half_Alpha, nullptr};
+Block BC1_Green_Blue_Half_Alpha_Block = {BC1_Green_Blue_Half_Alpha, nullptr};
+Block BC1_Red_Full_Alpha_Block = {BC1_Red_Full_Alpha, nullptr};
+Block BC1_Green_Full_Alpha_Block = {BC1_Green_Full_Alpha, nullptr};
+Block BC1_Blue_Full_Alpha_Block = {BC1_Blue_Full_Alpha, nullptr};
+Block BC1_White_Full_Alpha_Block = {BC1_White_Full_Alpha, nullptr};
+Block BC1_Green_Ignore_Alpha_Block = {BC1_Green_Ignore_Alpha, nullptr};
+Block BC1_Black_Full_Alpha_Block = {BC1_Black_Full_Alpha, nullptr};
+Block BC1_Red_Blue_Full_Alpha_Block = {BC1_Red_Blue_Full_Alpha, nullptr};
+Block BC1_Red_Green_Full_Alpha_Block = {BC1_Red_Green_Full_Alpha, nullptr};
+Block BC1_Green_Blue_Full_Alpha_Block = {BC1_Green_Blue_Full_Alpha, nullptr};
+Block BC1_Blue_Ignore_Alpha_Block = {BC1_Blue_Ignore_Alpha, nullptr};
+Block BC1_White_Ignore_Alpha_Block = {BC1_White_Ignore_Alpha, nullptr};
+Block BC1_Black_Ignore_Alpha_Block = {BC1_Black_Ignore_Alpha, nullptr};
+Block BC1_Red_Blue_Ignore_Alpha_Block = {BC1_Red_Blue_Ignore_Alpha, nullptr};
+Block BC1_Red_Green_Ignore_Alpha_Block = {BC1_Red_Green_Ignore_Alpha, nullptr};
+Block BC1_Green_Blue_Ignore_Alpha_Block = {BC1_Green_Blue_Ignore_Alpha, nullptr};
+Block BC1_Red_Half_Alpha_Block = {BC1_Red_Half_Alpha, nullptr};
+Block BC1_Green_Half_Alpha_Block = {BC1_Green_Half_Alpha, nullptr};
+Block BC2_Red_Ignore_Alpha_Block = {BC2_Red_Ignore_Alpha, nullptr};
+Block BC2_Blue_Half_Alpha_Block = {BC2_Blue_Half_Alpha, nullptr};
+Block BC2_White_Half_Alpha_Block = {BC2_White_Half_Alpha, nullptr};
+Block BC2_Black_Half_Alpha_Block = {BC2_Black_Half_Alpha, nullptr};
+Block BC2_Red_Blue_Half_Alpha_Block = {BC2_Red_Blue_Half_Alpha, nullptr};
+Block BC2_Red_Green_Half_Alpha_Block = {BC2_Red_Green_Half_Alpha, nullptr};
+Block BC2_Green_Blue_Half_Alpha_Block = {BC2_Green_Blue_Half_Alpha, nullptr};
+Block BC2_Red_Full_Alpha_Block = {BC2_Red_Full_Alpha, nullptr};
+Block BC2_Green_Full_Alpha_Block = {BC2_Green_Full_Alpha, nullptr};
+Block BC2_Blue_Full_Alpha_Block = {BC2_Blue_Full_Alpha, nullptr};
+Block BC2_White_Full_Alpha_Block = {BC2_White_Full_Alpha, nullptr};
+Block BC2_Green_Ignore_Alpha_Block = {BC2_Green_Ignore_Alpha, nullptr};
+Block BC2_Black_Full_Alpha_Block = {BC2_Black_Full_Alpha, nullptr};
+Block BC2_Red_Blue_Full_Alpha_Block = {BC2_Red_Blue_Full_Alpha, nullptr};
+Block BC2_Red_Green_Full_Alpha_Block = {BC2_Red_Green_Full_Alpha, nullptr};
+Block BC2_Green_Blue_Full_Alpha_Block = {BC2_Green_Blue_Full_Alpha, nullptr};
+Block BC2_Blue_Ignore_Alpha_Block = {BC2_Blue_Ignore_Alpha, nullptr};
+Block BC2_White_Ignore_Alpha_Block = {BC2_White_Ignore_Alpha, nullptr};
+Block BC2_Black_Ignore_Alpha_Block = {BC2_Black_Ignore_Alpha, nullptr};
+Block BC2_Red_Blue_Ignore_Alpha_Block = {BC2_Red_Blue_Ignore_Alpha, nullptr};
+Block BC2_Red_Green_Ignore_Alpha_Block = {BC2_Red_Green_Ignore_Alpha, nullptr};
+Block BC2_Green_Blue_Ignore_Alpha_Block = {BC2_Green_Blue_Ignore_Alpha, nullptr};
+Block BC2_Red_Half_Alpha_Block = {BC2_Red_Half_Alpha, nullptr};
+Block BC2_Green_Half_Alpha_Block = {BC2_Green_Half_Alpha, nullptr};
+Block BC3_Red_Ignore_Alpha_Block = {BC3_Red_Ignore_Alpha, nullptr};
+Block BC3_Blue_Half_Alpha_Block = {BC3_Blue_Half_Alpha, nullptr};
+Block BC3_White_Half_Alpha_Block = {BC3_White_Half_Alpha, nullptr};
+Block BC3_Black_Half_Alpha_Block = {BC3_Black_Half_Alpha, nullptr};
+Block BC3_Red_Blue_Half_Alpha_Block = {BC3_Red_Blue_Half_Alpha, nullptr};
+Block BC3_Red_Green_Half_Alpha_Block = {BC3_Red_Green_Half_Alpha, nullptr};
+Block BC3_Green_Blue_Half_Alpha_Block = {BC3_Green_Blue_Half_Alpha, nullptr};
+Block BC3_Red_Full_Alpha_Block = {BC3_Red_Full_Alpha, nullptr};
+Block BC3_Green_Full_Alpha_Block = {BC3_Green_Full_Alpha, nullptr};
+Block BC3_Blue_Full_Alpha_Block = {BC3_Blue_Full_Alpha, nullptr};
+Block BC3_White_Full_Alpha_Block = {BC3_White_Full_Alpha, nullptr};
+Block BC3_Green_Ignore_Alpha_Block = {BC3_Green_Ignore_Alpha, nullptr};
+Block BC3_Black_Full_Alpha_Block = {BC3_Black_Full_Alpha, nullptr};
+Block BC3_Red_Blue_Full_Alpha_Block = {BC3_Red_Blue_Full_Alpha, nullptr};
+Block BC3_Red_Green_Full_Alpha_Block = {BC3_Red_Green_Full_Alpha, nullptr};
+Block BC3_Green_Blue_Full_Alpha_Block = {BC3_Green_Blue_Full_Alpha, nullptr};
+Block BC3_Blue_Ignore_Alpha_Block = {BC3_Blue_Ignore_Alpha, nullptr};
+Block BC3_White_Ignore_Alpha_Block = {BC3_White_Ignore_Alpha, nullptr};
+Block BC3_Black_Ignore_Alpha_Block = {BC3_Black_Ignore_Alpha, nullptr};
+Block BC3_Red_Blue_Ignore_Alpha_Block = {BC3_Red_Blue_Ignore_Alpha, nullptr};
+Block BC3_Red_Green_Ignore_Alpha_Block = {BC3_Red_Green_Ignore_Alpha, nullptr};
+Block BC3_Green_Blue_Ignore_Alpha_Block = {BC3_Green_Blue_Ignore_Alpha, nullptr};
+Block BC3_Red_Half_Alpha_Block = {BC3_Red_Half_Alpha, nullptr};
+Block BC3_Green_Half_Alpha_Block = {BC3_Green_Half_Alpha, nullptr};
+
+static std::unordered_map<std::string, Block> blocks {
+	{ "BC1_Red_Ignore_Alpha", BC1_Red_Ignore_Alpha_Block},
+	{ "BC1_Blue_Half_Alpha", BC1_Blue_Half_Alpha_Block},
+	{ "BC1_White_Half_Alpha", BC1_White_Half_Alpha_Block},
+	{ "BC1_Black_Half_Alpha", BC1_Black_Half_Alpha_Block},
+	{ "BC1_Red_Blue_Half_Alpha", BC1_Red_Blue_Half_Alpha_Block},
+	{ "BC1_Red_Green_Half_Alpha", BC1_Red_Green_Half_Alpha_Block},
+	{ "BC1_Green_Blue_Half_Alpha", BC1_Green_Blue_Half_Alpha_Block},
+	{ "BC1_Red_Full_Alpha", BC1_Red_Full_Alpha_Block},
+	{ "BC1_Green_Full_Alpha", BC1_Green_Full_Alpha_Block},
+	{ "BC1_Blue_Full_Alpha", BC1_Blue_Full_Alpha_Block},
+	{ "BC1_White_Full_Alpha", BC1_White_Full_Alpha_Block},
+	{ "BC1_Green_Ignore_Alpha", BC1_Green_Ignore_Alpha_Block},
+	{ "BC1_Black_Full_Alpha", BC1_Black_Full_Alpha_Block},
+	{ "BC1_Red_Blue_Full_Alpha", BC1_Red_Blue_Full_Alpha_Block},
+	{ "BC1_Red_Green_Full_Alpha", BC1_Red_Green_Full_Alpha_Block},
+	{ "BC1_Green_Blue_Full_Alpha", BC1_Green_Blue_Full_Alpha_Block},
+	{ "BC1_Blue_Ignore_Alpha", BC1_Blue_Ignore_Alpha_Block},
+	{ "BC1_White_Ignore_Alpha", BC1_White_Ignore_Alpha_Block},
+	{ "BC1_Black_Ignore_Alpha", BC1_Black_Ignore_Alpha_Block},
+	{ "BC1_Red_Blue_Ignore_Alpha", BC1_Red_Blue_Ignore_Alpha_Block},
+	{ "BC1_Red_Green_Ignore_Alpha", BC1_Red_Green_Ignore_Alpha_Block},
+	{ "BC1_Green_Blue_Ignore_Alpha", BC1_Green_Blue_Ignore_Alpha_Block},
+	{ "BC1_Red_Half_Alpha", BC1_Red_Half_Alpha_Block},
+	{ "BC1_Green_Half_Alpha", BC1_Green_Half_Alpha_Block},
+	{ "BC2_Red_Ignore_Alpha", BC2_Red_Ignore_Alpha_Block},
+	{ "BC2_Blue_Half_Alpha", BC2_Blue_Half_Alpha_Block},
+	{ "BC2_White_Half_Alpha", BC2_White_Half_Alpha_Block},
+	{ "BC2_Black_Half_Alpha", BC2_Black_Half_Alpha_Block},
+	{ "BC2_Red_Blue_Half_Alpha", BC2_Red_Blue_Half_Alpha_Block},
+	{ "BC2_Red_Green_Half_Alpha", BC2_Red_Green_Half_Alpha_Block},
+	{ "BC2_Green_Blue_Half_Alpha", BC2_Green_Blue_Half_Alpha_Block},
+	{ "BC2_Red_Full_Alpha", BC2_Red_Full_Alpha_Block},
+	{ "BC2_Green_Full_Alpha", BC2_Green_Full_Alpha_Block},
+	{ "BC2_Blue_Full_Alpha", BC2_Blue_Full_Alpha_Block},
+	{ "BC2_White_Full_Alpha", BC2_White_Full_Alpha_Block},
+	{ "BC2_Green_Ignore_Alpha", BC2_Green_Ignore_Alpha_Block},
+	{ "BC2_Black_Full_Alpha", BC2_Black_Full_Alpha_Block},
+	{ "BC2_Red_Blue_Full_Alpha", BC2_Red_Blue_Full_Alpha_Block},
+	{ "BC2_Red_Green_Full_Alpha", BC2_Red_Green_Full_Alpha_Block},
+	{ "BC2_Green_Blue_Full_Alpha", BC2_Green_Blue_Full_Alpha_Block},
+	{ "BC2_Blue_Ignore_Alpha", BC2_Blue_Ignore_Alpha_Block},
+	{ "BC2_White_Ignore_Alpha", BC2_White_Ignore_Alpha_Block},
+	{ "BC2_Black_Ignore_Alpha", BC2_Black_Ignore_Alpha_Block},
+	{ "BC2_Red_Blue_Ignore_Alpha", BC2_Red_Blue_Ignore_Alpha_Block},
+	{ "BC2_Red_Green_Ignore_Alpha", BC2_Red_Green_Ignore_Alpha_Block},
+	{ "BC2_Green_Blue_Ignore_Alpha", BC2_Green_Blue_Ignore_Alpha_Block},
+	{ "BC2_Red_Half_Alpha", BC2_Red_Half_Alpha_Block},
+	{ "BC2_Green_Half_Alpha", BC2_Green_Half_Alpha_Block},
+	{ "BC3_Red_Ignore_Alpha", BC3_Red_Ignore_Alpha_Block},
+	{ "BC3_Blue_Half_Alpha", BC3_Blue_Half_Alpha_Block},
+	{ "BC3_White_Half_Alpha", BC3_White_Half_Alpha_Block},
+	{ "BC3_Black_Half_Alpha", BC3_Black_Half_Alpha_Block},
+	{ "BC3_Red_Blue_Half_Alpha", BC3_Red_Blue_Half_Alpha_Block},
+	{ "BC3_Red_Green_Half_Alpha", BC3_Red_Green_Half_Alpha_Block},
+	{ "BC3_Green_Blue_Half_Alpha", BC3_Green_Blue_Half_Alpha_Block},
+	{ "BC3_Red_Full_Alpha", BC3_Red_Full_Alpha_Block},
+	{ "BC3_Green_Full_Alpha", BC3_Green_Full_Alpha_Block},
+	{ "BC3_Blue_Full_Alpha", BC3_Blue_Full_Alpha_Block},
+	{ "BC3_White_Full_Alpha", BC3_White_Full_Alpha_Block},
+	{ "BC3_Green_Ignore_Alpha", BC3_Green_Ignore_Alpha_Block},
+	{ "BC3_Black_Full_Alpha", BC3_Black_Full_Alpha_Block},
+	{ "BC3_Red_Blue_Full_Alpha", BC3_Red_Blue_Full_Alpha_Block},
+	{ "BC3_Red_Green_Full_Alpha", BC3_Red_Green_Full_Alpha_Block},
+	{ "BC3_Green_Blue_Full_Alpha", BC3_Green_Blue_Full_Alpha_Block},
+	{ "BC3_Blue_Ignore_Alpha", BC3_Blue_Ignore_Alpha_Block},
+	{ "BC3_White_Ignore_Alpha", BC3_White_Ignore_Alpha_Block},
+	{ "BC3_Black_Ignore_Alpha", BC3_Black_Ignore_Alpha_Block},
+	{ "BC3_Red_Blue_Ignore_Alpha", BC3_Red_Blue_Ignore_Alpha_Block},
+	{ "BC3_Red_Green_Ignore_Alpha", BC3_Red_Green_Ignore_Alpha_Block},
+	{ "BC3_Green_Blue_Ignore_Alpha", BC3_Green_Blue_Ignore_Alpha_Block},
+	{ "BC3_Red_Half_Alpha", BC3_Red_Half_Alpha_Block},
+	{ "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block}
+};
+
+#endif
--- a/extern/CMP_Core/test/CMakeLists.txt
+++ b/extern/CMP_Core/test/CMakeLists.txt
@ -0,0 +1,13 @@
+cmake_minimum_required(VERSION 3.5)
+project(CMP_Core_Tests)
+
+add_executable(Tests TestsMain.cpp)
+add_subdirectory(../../../Common/Lib/Ext/Catch2
+                Common/Lib/Ext/Catch2/bin)
+target_sources(Tests 
+                PRIVATE
+                CompressonatorTests.cpp
+                CompressonatorTests.h
+                BlockConstants.h
+                )
+target_link_libraries(Tests Catch2::Catch2 CMP_Core)
--- a/extern/CMP_Core/test/CompressonatorTests.cpp
+++ b/extern/CMP_Core/test/CompressonatorTests.cpp
--- a/extern/CMP_Core/test/CompressonatorTests.h
+++ b/extern/CMP_Core/test/CompressonatorTests.h
@ -0,0 +1,6 @@
+#ifndef COMPRESSONATOR_TESTS_H
+#define COMPRESSONATOR_TESTS_H
+
+void AssignExpectedColorsToBlocks();
+
+#endif
--- a/extern/CMP_Core/test/TestsMain.cpp
+++ b/extern/CMP_Core/test/TestsMain.cpp
@ -0,0 +1,10 @@
+#define CATCH_CONFIG_RUNNER
+#include "../../../Common/Lib/Ext/Catch2/catch.hpp"
+#include "CompressonatorTests.h"
+
+int main(int argc, char* argv[]) {
+	AssignExpectedColorsToBlocks();
+	int result = Catch::Session().run(argc, argv);
+
+	return result;
+}