Add external libs for comparisons and benchmarks.

2020-03-23 10:07:38 -07:00
parent 4a33d1ac75
commit 9a16bebf8f
67 changed files with 24230 additions and 1 deletions
--- a/extern/CMP_Core/CMP_Core.def
+++ b/extern/CMP_Core/CMP_Core.def
@ -0,0 +1,56 @@
+; Core def : Declares the module parameters for the DLL.
+
+EXPORTS
+CreateOptionsBC1
+CreateOptionsBC2
+CreateOptionsBC3
+CreateOptionsBC4
+CreateOptionsBC5
+CreateOptionsBC6
+CreateOptionsBC7
+
+DestroyOptionsBC1
+DestroyOptionsBC2
+DestroyOptionsBC3
+DestroyOptionsBC4
+DestroyOptionsBC5
+DestroyOptionsBC6
+DestroyOptionsBC7
+
+SetDecodeChannelMapping
+
+SetChannelWeightsBC1
+SetChannelWeightsBC2
+SetChannelWeightsBC3
+
+SetQualityBC1
+SetQualityBC2
+SetQualityBC3
+SetQualityBC4
+SetQualityBC5
+SetQualityBC6
+SetQualityBC7
+
+SetAlphaThresholdBC1
+
+SetMaskBC6
+SetMaskBC7
+
+SetErrorThresholdBC7
+SetAlphaOptionsBC7
+
+CompressBlockBC1
+CompressBlockBC2
+CompressBlockBC3
+CompressBlockBC4
+CompressBlockBC5
+CompressBlockBC6
+CompressBlockBC7
+
+DecompressBlockBC1
+DecompressBlockBC2
+DecompressBlockBC3
+DecompressBlockBC4
+DecompressBlockBC5
+DecompressBlockBC6
+DecompressBlockBC7
--- a/extern/CMP_Core/CMakeLists.txt
+++ b/extern/CMP_Core/CMakeLists.txt
@ -0,0 +1,33 @@
+cmake_minimum_required(VERSION 3.10)
+
+add_library(CMP_Core STATIC "")
+
+target_sources(CMP_Core
+               PRIVATE
+                   shaders/BC1_Encode_kernel.h
+                   shaders/BC1_Encode_kernel.cpp
+                   shaders/BC2_Encode_kernel.h
+                   shaders/BC2_Encode_kernel.cpp
+                   shaders/BC3_Encode_kernel.h
+                   shaders/BC3_Encode_kernel.cpp
+                   shaders/BC4_Encode_kernel.h
+                   shaders/BC4_Encode_kernel.cpp
+                   shaders/BC5_Encode_kernel.h
+                   shaders/BC5_Encode_kernel.cpp
+                   shaders/BC6_Encode_kernel.h
+                   shaders/BC6_Encode_kernel.cpp
+                   shaders/BC7_Encode_Kernel.h
+                   shaders/BC7_Encode_Kernel.cpp
+                   shaders/BCn_Common_Kernel.h
+                   shaders/Common_Def.h
+                   )
+
+target_include_directories(CMP_Core
+                           PRIVATE
+                           shaders
+                           source)
+#add_subdirectory(test)
+
+if (UNIX)
+target_compile_definitions(CMP_Core PRIVATE _LINUX ASPM_GPU)
+endif()
--- a/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp
@ -0,0 +1,582 @@
+//=====================================================================
+// Copyright (c) 2019    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC1_Encode_kernel.h"
+
+//============================================== BC1 INTERFACES  =======================================================
+void CompressBlockBC1_Fast(
+    CMP_Vec4uc  srcBlockTemp[16],
+    CMP_GLOBAL CGU_UINT32 compressedBlock[2])
+{
+    int i, k;
+
+    CMP_Vec3f rgb;
+    CMP_Vec3f average_rgb;                  // The centrepoint of the axis
+    CMP_Vec3f v_rgb;                        // The axis
+    CMP_Vec3f uniques[16];                  // The list of unique colours
+    int unique_pixels;                     // The number of unique pixels
+    CGU_FLOAT unique_recip;                    // Reciprocal of the above for fast multiplication
+    int index_map[16];                     // The map of source pixels to unique indices
+                                    
+    CGU_FLOAT pos_on_axis[16];                 // The distance each unique falls along the compression axis
+    CGU_FLOAT dist_from_axis[16];              // The distance each unique falls from the compression axis
+    CGU_FLOAT left = 0, right = 0, centre = 0; // The extremities and centre (average of left/right) of uniques along the compression axis
+    CGU_FLOAT axis_mapping_error = 0;          // The total computed error in mapping pixels to the axis
+
+    int swap;                              // Indicator if the RGB values need swapping to generate an opaque result
+
+    // -------------------------------------------------------------------------------------
+    // (3) Find the array of unique pixel values and sum them to find their average position
+    // -------------------------------------------------------------------------------------
+    {
+        // Find the array of unique pixel values and sum them to find their average position      
+        int current_pixel, firstdiff;
+        current_pixel = unique_pixels = 0;
+        average_rgb = 0.0f;
+        firstdiff = -1;
+        for (i = 0; i<16; i++)
+        {
+                for (k = 0; k<i; k++)
+                    if ((((srcBlockTemp[k].x ^ srcBlockTemp[i].x) & 0xf8) == 0) && (((srcBlockTemp[k].y ^ srcBlockTemp[i].y) & 0xfc) == 0) && (((srcBlockTemp[k].z ^ srcBlockTemp[i].z) & 0xf8) == 0))
+                        break;
+                index_map[i] = current_pixel++;
+                //pixel_count[i] = 1;
+                CMP_Vec3f trgb;
+                rgb.x = (CGU_FLOAT)((srcBlockTemp[i].x) & 0xff);
+                rgb.y = (CGU_FLOAT)((srcBlockTemp[i].y) & 0xff);
+                rgb.z = (CGU_FLOAT)((srcBlockTemp[i].z) & 0xff);
+
+                trgb.x = CS_RED(rgb.x, rgb.y, rgb.z);
+                trgb.y = CS_GREEN(rgb.x, rgb.y, rgb.z);
+                trgb.z = CS_BLUE(rgb.x, rgb.y, rgb.z);
+                uniques[i] = trgb;
+
+                if (k == i)
+                {
+                    unique_pixels++;
+                    if ((i != 0) && (firstdiff < 0)) firstdiff = i;
+                }
+                average_rgb = average_rgb + trgb;
+        }
+
+        unique_pixels = 16;
+        // Compute average of the uniques
+        unique_recip = 1.0f / (CGU_FLOAT)unique_pixels;
+        average_rgb = average_rgb * unique_recip;
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (4) For each component, reflect points about the average so all lie on the same side
+    // of the average, and compute the new average - this gives a second point that defines the axis
+    // To compute the sign of the axis sum the positive differences of G for each of R and B (the
+    // G axis is always positive in this implementation
+    // -------------------------------------------------------------------------------------
+    // An interesting situation occurs if the G axis contains no information, in which case the RB
+    // axis is also compared. I am not entirely sure if this is the correct implementation - should
+    // the priority axis be determined by magnitude?
+    {
+
+        CGU_FLOAT rg_pos, bg_pos, rb_pos;
+        v_rgb = 0.0f;
+        rg_pos = bg_pos = rb_pos = 0;
+
+        for (i = 0; i < unique_pixels; i++)
+        {
+            rgb = uniques[i] - average_rgb;
+
+#ifndef ASPM_GPU
+            v_rgb.x += (CGU_FLOAT)fabs(rgb.x);
+            v_rgb.y += (CGU_FLOAT)fabs(rgb.y);
+            v_rgb.z += (CGU_FLOAT)fabs(rgb.z);
+#else
+            v_rgb = v_rgb + fabs(rgb);
+#endif
+
+            if (rgb.x > 0) { rg_pos += rgb.y; rb_pos += rgb.z; }
+            if (rgb.z > 0) bg_pos += rgb.y;
+        }
+        v_rgb = v_rgb*unique_recip;
+        if (rg_pos < 0) v_rgb.x = -v_rgb.x;
+        if (bg_pos < 0) v_rgb.z = -v_rgb.z;
+        if ((rg_pos == bg_pos) && (rg_pos == 0))
+            if (rb_pos < 0) v_rgb.z = -v_rgb.z;
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (5) Axis projection and remapping
+    // -------------------------------------------------------------------------------------
+    {
+        CGU_FLOAT v2_recip;
+        // Normalise the axis for simplicity of future calculation
+        v2_recip = (v_rgb.x*v_rgb.x + v_rgb.y*v_rgb.y + v_rgb.z*v_rgb.z);
+        if (v2_recip > 0)
+            v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip);
+        else
+            v2_recip = 1.0f;
+        v_rgb = v_rgb*v2_recip;
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (6) Map the axis
+    // -------------------------------------------------------------------------------------
+    // the line joining (and extended on either side of) average and axis
+    // defines the axis onto which the points will be projected
+    // Project all the points onto the axis, calculate the distance along
+    // the axis from the centre of the axis (average)
+    // From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is
+    //                            P + ((R-P).v) / (v.v))v
+    // The distance along v is therefore (R-P).v / (v.v)
+    // (v.v) is 1 if v is a unit vector.
+    //
+    // Calculate the extremities at the same time - these need to be reasonably accurately
+    // represented in all cases
+    //
+    // In this first calculation, also find the error of mapping the points to the axis - this
+    // is our major indicator of whether or not the block has compressed well - if the points
+    // map well onto the axis then most of the noise introduced is high-frequency noise
+    {
+        left = 10000.0f;
+        right = -10000.0f;
+        axis_mapping_error = 0;
+        for (i = 0; i < unique_pixels; i++)
+        {
+            // Compute the distance along the axis of the point of closest approach
+            CMP_Vec3f temp = (uniques[i] - average_rgb);
+            pos_on_axis[i] = (temp.x * v_rgb.x) + (temp.y * v_rgb.y) + (temp.z * v_rgb.z);
+
+            // Compute the actual point and thence the mapping error
+            rgb = uniques[i] - (average_rgb + (v_rgb * pos_on_axis[i]));
+            dist_from_axis[i] = rgb.x*rgb.x + rgb.y*rgb.y + rgb.z*rgb.z;
+            axis_mapping_error += dist_from_axis[i];
+
+            // Work out the extremities
+            if (pos_on_axis[i] < left)
+                left = pos_on_axis[i];
+            if (pos_on_axis[i] > right)
+                right = pos_on_axis[i];
+        }
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (7) Now we have a good axis and the basic information about how the points are mapped
+    // to it
+    // Our initial guess is to represent the endpoints accurately, by moving the average
+    // to the centre and recalculating the point positions along the line
+    // -------------------------------------------------------------------------------------
+    {
+        centre = (left + right) / 2;
+        average_rgb = average_rgb + (v_rgb*centre);
+        for (i = 0; i<unique_pixels; i++)
+            pos_on_axis[i] -= centre;
+        right -= centre;
+        left -= centre;
+
+        // Accumulate our final resultant error
+        axis_mapping_error *= unique_recip * (1 / 255.0f);
+
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (8) Calculate the high and low output colour values
+    // Involved in this is a rounding procedure which is undoubtedly slightly twitchy. A
+    // straight rounded average is not correct, as the decompressor 'unrounds' by replicating
+    // the top bits to the bottom.
+    // In order to take account of this process, we don't just apply a straight rounding correction,
+    // but base our rounding on the input value (a straight rounding is actually pretty good in terms of
+    // error measure, but creates a visual colour and/or brightness shift relative to the original image)
+    // The method used here is to apply a centre-biased rounding dependent on the input value, which was
+    // (mostly by experiment) found to give minimum MSE while preserving the visual characteristics of
+    // the image.
+    // rgb = (average_rgb + (left|right)*v_rgb);
+    // -------------------------------------------------------------------------------------
+    {
+        CGU_UINT32 c0, c1, t;
+        int rd, gd, bd;
+        rgb = (average_rgb + (v_rgb * left));
+        rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
+        gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
+        bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
+        ROUND_AND_CLAMP(rd, 5);
+        ROUND_AND_CLAMP(gd, 6);
+        ROUND_AND_CLAMP(bd, 5);
+        c0 = ((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3);
+
+        rgb = average_rgb + (v_rgb * right);
+        rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
+        gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
+        bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
+        ROUND_AND_CLAMP(rd, 5);
+        ROUND_AND_CLAMP(gd, 6);
+        ROUND_AND_CLAMP(bd, 5);
+        c1 = (((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3));
+
+        // Force to be a 4-colour opaque block - in which case, c0 is greater than c1
+        // blocktype == 4
+        {
+            if (c0 < c1)
+            {
+                t = c0;
+                c0 = c1;
+                c1 = t;
+                swap = 1;
+            }
+            else if (c0 == c1)
+            {
+                // This block will always be encoded in 3-colour mode
+                // Need to ensure that only one of the two points gets used,
+                // avoiding accidentally setting some transparent pixels into the block
+                for (i = 0; i<unique_pixels; i++)
+                    pos_on_axis[i] = left;
+                swap = 0;
+            }
+            else
+                swap = 0;
+        }
+
+        compressedBlock[0] = c0 | (c1 << 16);
+    }
+
+    // -------------------------------------------------------------------------------------
+    // (9) Final clustering, creating the 2-bit values that define the output
+    // -------------------------------------------------------------------------------------
+    {
+        CGU_UINT32 bit;
+        CGU_FLOAT division;
+        CGU_FLOAT cluster_x[4];
+        CGU_FLOAT cluster_y[4];
+        int cluster_count[4];
+
+        // (blocktype == 4)
+        {
+            compressedBlock[1] = 0;
+            division = right*2.0f / 3.0f;
+            centre = (left + right) / 2;        // Actually, this code only works if centre is 0 or approximately so
+
+            for (i = 0; i<4; i++)
+            {
+                cluster_x[i] = cluster_y[i] = 0.0f;
+                cluster_count[i] = 0;
+            }
+
+
+            for (i = 0; i<16; i++)
+            {
+                rgb.z = pos_on_axis[index_map[i]];
+                // Endpoints (indicated by block > average) are 0 and 1, while
+                // interpolants are 2 and 3
+                if (fabs(rgb.z) >= division)
+                    bit = 0;
+                else
+                    bit = 2;
+                // Positive is in the latter half of the block
+                if (rgb.z >= centre)
+                    bit += 1;
+                // Set the output, taking swapping into account
+                compressedBlock[1] |= ((bit^swap) << (2 * i));
+
+                // Average the X and Y locations for each cluster
+                cluster_x[bit] += (CGU_FLOAT)(i & 3);
+                cluster_y[bit] += (CGU_FLOAT)(i >> 2);
+                cluster_count[bit]++;
+            }
+
+            for (i = 0; i<4; i++)
+            {
+                CGU_FLOAT cr;
+                if (cluster_count[i])
+                {
+                    cr = 1.0f / cluster_count[i];
+                    cluster_x[i] *= cr;
+                    cluster_y[i] *= cr;
+                }
+                else
+                {
+                    cluster_x[i] = cluster_y[i] = -1;
+                }
+            }
+
+            // patterns in axis position detection
+            // (same algorithm as used in the SSE version)
+            if ((compressedBlock[0] & 0xffff) != (compressedBlock[0] >> 16))
+            {
+                CGU_UINT32 i1, k1;
+                CGU_UINT32 x = 0, y = 0;
+                int xstep = 0, ystep = 0;
+
+                // Find a corner to search from
+                for (k1 = 0; k1<4; k1++)
+                {
+                    switch (k1)
+                    {
+                    case 0:
+                        x = 0; y = 0; xstep = 1; ystep = 1;
+                        break;
+                    case 1:
+                        x = 0; y = 3; xstep = 1; ystep = -1;
+                        break;
+                    case 2:
+                        x = 3; y = 0; xstep = -1; ystep = 1;
+                        break;
+                    case 3:
+                        x = 3; y = 3; xstep = -1; ystep = -1;
+                        break;
+                    }
+
+                    for (i1 = 0; i1<4; i1++)
+                    {
+                        if ((POS(x, y + ystep*i1)                < POS(x + xstep, y + ystep*i1)) ||
+                            (POS(x + xstep, y + ystep*i1)        < POS(x + 2 * xstep, y + ystep*i1)) ||
+                            (POS(x + 2 * xstep, y + ystep*i1)    < POS(x + 3 * xstep, y + ystep*i1))
+                            )
+                            break;
+                        if ((POS(x + xstep*i1, y)                < POS(x + xstep*i1, y + ystep)) ||
+                            (POS(x + xstep*i1, y + ystep)        < POS(x + xstep*i1, y + 2 * ystep)) ||
+                            (POS(x + xstep*i1, y + 2 * ystep)    < POS(x + xstep*i1, y + 3 * ystep))
+                            )
+                            break;
+                    }
+                    if (i1 == 4)
+                        break;
+                }
+            }
+        }
+
+    }
+    // done
+}
+
+INLINE void store_uint8(CMP_GLOBAL CGU_UINT8 u_dstptr[8], CGU_UINT32 data[2])
+{
+   int shift = 0;
+   for (CGU_INT k=0; k<4; k++)
+   {
+      u_dstptr[k] = (data[0] >> shift)&0xFF;
+      shift += 8;
+   }
+   shift = 0;
+   for (CGU_INT k=4; k<8; k++)
+   {
+      u_dstptr[k] = (data[1] >> shift)&0xFF;
+      shift += 8;
+   }
+}
+
+void  CompressBlockBC1_Internal(
+    const CMP_Vec4uc  srcBlockTemp[16],
+    CMP_GLOBAL  CGU_UINT32      compressedBlock[2],
+    CMP_GLOBAL  const CMP_BC15Options *BC15options)
+{
+    CGU_UINT8    blkindex = 0;
+    CGU_UINT8    srcindex = 0;
+    CGU_UINT8    rgbBlock[64];
+    for ( CGU_INT32 j = 0; j < 4; j++) {
+     for ( CGU_INT32 i = 0; i < 4; i++) {
+        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
+        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
+        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
+        rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
+        srcindex++;
+        }
+    }
+
+    CMP_BC15Options internalOptions = *BC15options;
+    CalculateColourWeightings(rgbBlock, &internalOptions);
+
+    CompressRGBBlock(rgbBlock,
+                     compressedBlock,
+                     &internalOptions,
+                     TRUE,
+                     FALSE, 
+                     internalOptions.m_nAlphaThreshold);
+}
+
+//============================================== USER INTERFACES  ========================================================
+#ifndef ASPM_GPU
+int CMP_CDECL CreateOptionsBC1(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+      SetDefaultBC15Options(BC15optionsDefault);
+      (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DestroyOptionsBC1(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC1(void *options, 
+                            CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_NEWMEM;
+    CMP_BC15Options *BC15optionsDefault =  reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+    if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+
+int CMP_CDECL SetAlphaThresholdBC1(void *options, 
+                                   CGU_UINT8 alphaThreshold)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault =  reinterpret_cast <CMP_BC15Options *>(options);
+    BC15optionsDefault->m_nAlphaThreshold = alphaThreshold;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetDecodeChannelMapping(void *options,
+                              CGU_BOOL mapRGBA)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault =  reinterpret_cast <CMP_BC15Options *>(options);
+    BC15optionsDefault->m_mapDecodeRGBA = mapRGBA;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetChannelWeightsBC1(void *options,
+                              CGU_FLOAT WeightRed,
+                              CGU_FLOAT WeightGreen,
+                              CGU_FLOAT WeightBlue) {
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
+
+    if ((WeightRed < 0.0f)   || (WeightRed > 1.0f))      return CGU_CORE_ERR_RANGERED;
+    if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f))    return CGU_CORE_ERR_RANGEGREEN;
+    if ((WeightBlue < 0.0f)  || (WeightBlue > 1.0f))     return CGU_CORE_ERR_RANGEBLUE;
+
+    BC15optionsDefault->m_bUseChannelWeighting = true;
+    BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
+    BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
+    BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock,
+                               unsigned int srcStrideInBytes,
+                               CMP_GLOBAL unsigned char cmpBlock[8],
+                               const void *options = NULL) {
+    CMP_Vec4uc inBlock[16];
+
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row=0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
+            dstptr++;
+        }
+    }
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options     = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+
+    CompressBlockBC1_Internal(inBlock, (CMP_GLOBAL  CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8], 
+                                 CMP_GLOBAL unsigned char srcBlock[64],
+                                 const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options     = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressDXTRGB_Internal(srcBlock, ( CGU_UINT32 *)cmpBlock, BC15options);
+
+
+    return CGU_CORE_OK;
+}
+#endif
+
+//============================================== OpenCL USER INTERFACE ========================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
+    CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
+    CMP_GLOBAL  CGU_UINT8*          ImageDestination,
+    CMP_GLOBAL  Source_Info*        SourceInfo,
+    CMP_GLOBAL  CMP_BC15Options*    BC15options
+)
+{
+    CGU_UINT32 xID;
+    CGU_UINT32 yID;
+
+//printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality);
+#ifdef ASPM_GPU
+    xID = get_global_id(0);
+    yID = get_global_id(1);
+#else
+    xID = 0;
+    yID = 0;
+#endif
+
+    if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+    if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+    int  srcWidth = SourceInfo->m_src_width;
+
+    CGU_UINT32 destI = (xID*BC1CompBlockSize) + (yID*(srcWidth / BlockX)*BC1CompBlockSize);
+    int srcindex = 4 * (yID * srcWidth + xID);
+    int blkindex = 0;
+    CMP_Vec4uc srcData[16];
+    srcWidth = srcWidth - 4;
+
+    for ( CGU_INT32 j = 0; j < 4; j++) {
+        for ( CGU_INT32 i = 0; i < 4; i++) {
+            srcData[blkindex++] = ImageSource[srcindex++];
+        }
+        srcindex += srcWidth;
+    }
+
+    // fast low quality mode that matches v3.1 code
+    if (SourceInfo->m_fquality <= 0.04f)
+        CompressBlockBC1_Fast(srcData, (CMP_GLOBAL  CGU_UINT32 *)&ImageDestination[destI]);
+    else
+        CompressBlockBC1_Internal(srcData, (CMP_GLOBAL  CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
--- a/extern/CMP_Core/shaders/BC1_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.h
@ -0,0 +1,48 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC1_ENCODE_KERNEL_H
+#define BC1_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define CS_RED(r, g, b)        (r)
+#define CS_GREEN(r, g, b)    (g)
+#define CS_BLUE(r, g, b)    ((b+g)*0.5f)
+#define DCS_RED(r, g, b)    (r)
+#define DCS_GREEN(r, g, b)    (g)
+#define DCS_BLUE(r, g, b)    ((2.0f*b)-g)
+#define BYTEPP 4
+#define BC1CompBlockSize    8
+
+
+#define ROUND_AND_CLAMP(v, shift)    \
+{\
+    if (v < 0) v = 0;\
+    else if (v > 255) v = 255;\
+    else v += (0x80>>shift) - (v>>shift);\
+}
+
+#define POS(x,y) (pos_on_axis[(x)+(y)*4])
+
+#endif
--- a/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp
@ -0,0 +1,261 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC2_Encode_kernel.h"
+
+//============================================== BC2 INTERFACES =======================================================
+
+void DXTCV11CompressExplicitAlphaBlock(const CGU_UINT8 block_8[16], CMP_GLOBAL CGU_UINT32 block_dxtc[2])
+{
+    CGU_UINT8 i;
+    block_dxtc[0] = block_dxtc[1] = 0;
+    for (i = 0; i < 16; i++)
+    {
+        int v = block_8[i];
+        v = (v + 7 - (v >> 4));
+        v >>= 4;
+        if (v < 0)
+            v = 0;
+        if (v > 0xf)
+            v = 0xf;
+        if (i < 8)
+            block_dxtc[0] |= v << (4 * i);
+        else
+            block_dxtc[1] |= v << (4 * (i - 8));
+    }
+}
+
+#define EXPLICIT_ALPHA_PIXEL_MASK 0xf
+#define EXPLICIT_ALPHA_PIXEL_BPP  4
+
+CGU_INT CompressExplicitAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], 
+    CMP_GLOBAL CGU_UINT32 compressedBlock[2])
+{
+    DXTCV11CompressExplicitAlphaBlock(alphaBlock, compressedBlock);
+    return CGU_CORE_OK;
+}
+
+void  CompressBlockBC2_Internal(const CMP_Vec4uc srcBlockTemp[16],
+                                CMP_GLOBAL CGU_UINT32 compressedBlock[4],
+                                CMP_GLOBAL const CMP_BC15Options *BC15options)
+{
+    CGU_UINT8    blkindex = 0;
+    CGU_UINT8    srcindex = 0;
+    CGU_UINT8    rgbaBlock[64];
+    for (CGU_INT32 j = 0; j < 4; j++) {
+        for (CGU_INT32 i = 0; i < 4; i++) {
+            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
+            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
+            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
+            rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
+            srcindex++;
+        }
+    }
+
+    CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+    for (CGU_INT32 i = 0; i < 16; i++)
+        alphaBlock[i] = (CGU_UINT8)(((CGU_INT32*)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
+
+    // Need a copy, as CalculateColourWeightings sets variables in the BC15options
+    CMP_BC15Options internalOptions = *BC15options;
+    CalculateColourWeightings(rgbaBlock, &internalOptions);
+
+    CGU_INT err = CompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+    if (err != 0)
+        return;
+
+    CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,FALSE,FALSE,0);
+}
+
+//============================================== USER INTERFACES ========================================================
+#ifndef ASPM_GPU
+
+int CMP_CDECL CreateOptionsBC2(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+        SetDefaultBC15Options(BC15optionsDefault);
+        (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DestroyOptionsBC2(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC2(void *options,
+    CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+        if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetChannelWeightsBC2(void *options,
+    CGU_FLOAT WeightRed,
+    CGU_FLOAT WeightGreen,
+    CGU_FLOAT WeightBlue) {
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
+
+    if ((WeightRed < 0.0f) || (WeightRed > 1.0f))       return CGU_CORE_ERR_RANGERED;
+    if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f))   return CGU_CORE_ERR_RANGEGREEN;
+    if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f))     return CGU_CORE_ERR_RANGEBLUE;
+
+    BC15optionsDefault->m_bUseChannelWeighting = true;
+    BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
+    BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
+    BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
+    return CGU_CORE_OK;
+}
+
+// Decompresses an explicit alpha block (DXT3)
+void DecompressExplicitAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4],
+    const CGU_UINT32 compressedBlock[2])
+{
+    for (int i = 0; i < 16; i++)
+    {
+        int nBlock = i < 8 ? 0 : 1;
+        CGU_UINT8 cAlpha = (CGU_UINT8)((compressedBlock[nBlock] >> ((i % 8) * EXPLICIT_ALPHA_PIXEL_BPP)) & EXPLICIT_ALPHA_PIXEL_MASK);
+        alphaBlock[i] = (CGU_UINT8)((cAlpha << EXPLICIT_ALPHA_PIXEL_BPP) | cAlpha);
+    }
+}
+
+void DecompressBC2_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[BLOCK_SIZE_4X4X4],
+    const CGU_UINT32 compressedBlock[4],
+    const CMP_BC15Options *BC15options)
+{
+    CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+
+    DecompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+    DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
+
+    for (CGU_UINT32 i = 0; i < 16; i++)
+        ((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] = (alphaBlock[i] << RGBA8888_OFFSET_A) | (((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] & ~(BYTE_MASK << RGBA8888_OFFSET_A));
+}
+
+int CMP_CDECL CompressBlockBC2(const unsigned char *srcBlock,
+                               unsigned int srcStrideInBytes,
+                               CMP_GLOBAL unsigned char cmpBlock[16],
+                               CMP_GLOBAL const void *options = NULL) {
+
+    CMP_Vec4uc inBlock[16];
+
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
+            dstptr++;
+        }
+    }
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    CompressBlockBC2_Internal(inBlock, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16], 
+                                 CMP_GLOBAL unsigned char srcBlock[64],
+                                 const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressBC2_Internal(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
+
+    return CGU_CORE_OK;
+}
+#endif
+
+//============================================== OpenCL USER INTERFACE ========================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
+    CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
+    CMP_GLOBAL  CGU_UINT8*          ImageDestination,
+    CMP_GLOBAL  Source_Info*        SourceInfo,
+    CMP_GLOBAL  CMP_BC15Options*    BC15options
+)
+{
+    CGU_UINT32 xID;
+    CGU_UINT32 yID;
+
+#ifdef ASPM_GPU
+    xID = get_global_id(0);
+    yID = get_global_id(1);
+#else
+    xID = 0;
+    yID = 0;
+#endif
+
+    if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+    if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+    int  srcWidth = SourceInfo->m_src_width;
+
+    CGU_UINT32 destI = (xID*BC2CompBlockSize) + (yID*(srcWidth / BlockX)*BC2CompBlockSize);
+    int srcindex = 4 * (yID * srcWidth + xID);
+    int blkindex = 0;
+    CMP_Vec4uc srcData[16];
+    srcWidth = srcWidth - 4;
+
+    for ( CGU_INT32 j = 0; j < 4; j++) {
+        for ( CGU_INT32 i = 0; i < 4; i++) {
+            srcData[blkindex++] = ImageSource[srcindex++];
+        }
+        srcindex += srcWidth;
+    }
+
+    CompressBlockBC2_Internal(srcData,(CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
+
--- a/extern/CMP_Core/shaders/BC2_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.h
@ -0,0 +1,34 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC2_ENCODE_KERNEL_H
+#define BC2_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define BC2CompBlockSize    16
+#define NUM_CHANNELS        4
+#define NUM_ENDPOINTS       2
+
+
+#endif
--- a/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp
@ -0,0 +1,218 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC3_Encode_kernel.h"
+
+//============================================== BC3 INTERFACES =======================================================
+
+void CompressBlockBC3_Internal(const CMP_Vec4uc srcBlockTemp[16],
+                               CMP_GLOBAL CGU_UINT32 compressedBlock[4],
+                               CMP_GLOBAL const CMP_BC15Options *BC15options) {
+  CGU_UINT8 blkindex = 0;
+  CGU_UINT8 srcindex = 0;
+  CGU_UINT8 rgbaBlock[64];
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z;  // B
+      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // G
+      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // R
+      rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w;  // A
+      srcindex++;
+    }
+  }
+
+  CMP_BC15Options internalOptions = *BC15options;
+  CalculateColourWeightings(rgbaBlock, &internalOptions);
+
+  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+  for (CGU_INT32 i = 0; i < 16; i++)
+    alphaBlock[i] =
+        (CGU_UINT8)(((CGU_INT32 *)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
+
+  CGU_INT err = CompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+  if (err != 0) return;
+
+  CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,
+                   FALSE, FALSE, 0);
+}
+
+//============================================== USER INTERFACES ========================================================
+#ifndef ASPM_GPU
+
+int CMP_CDECL CreateOptionsBC3(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+        SetDefaultBC15Options(BC15optionsDefault);
+        (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+
+int CMP_CDECL DestroyOptionsBC3(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC3(void *options,
+    CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+        if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetChannelWeightsBC3(void *options,
+    CGU_FLOAT WeightRed,
+    CGU_FLOAT WeightGreen,
+    CGU_FLOAT WeightBlue) {
+    if (!options) return 1;
+    CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
+
+    if ((WeightRed < 0.0f) || (WeightRed > 1.0f))       return CGU_CORE_ERR_RANGERED;
+    if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f))   return CGU_CORE_ERR_RANGEGREEN;
+    if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f))     return CGU_CORE_ERR_RANGEBLUE;
+
+    BC15optionsDefault->m_bUseChannelWeighting = true;
+    BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
+    BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
+    BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
+    return CGU_CORE_OK;
+}
+
+
+void DecompressBC3_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
+                            const CGU_UINT32 compressedBlock[4],
+                            const CMP_BC15Options *BC15options) {
+  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+
+  DecompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
+  DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
+
+  for (CGU_UINT32 i = 0; i < 16; i++)
+    ((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] =
+        (alphaBlock[i] << RGBA8888_OFFSET_A) |
+        (((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] &
+         ~(BYTE_MASK << RGBA8888_OFFSET_A));
+}
+
+int CMP_CDECL CompressBlockBC3( const unsigned char *srcBlock,
+                                unsigned int srcStrideInBytes,
+                                CMP_GLOBAL unsigned char cmpBlock[16],
+                                const void *options = NULL) {
+    CMP_Vec4uc inBlock[16];
+
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
+            inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
+            dstptr++;
+        }
+    }
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL) {
+      BC15options = &BC15optionsDefault;
+      SetDefaultBC15Options(BC15options);
+    }
+
+    CompressBlockBC3_Internal(inBlock,(CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16],
+                                 CMP_GLOBAL unsigned char srcBlock[64],
+                                 const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressBC3_Internal(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
+    return CGU_CORE_OK;
+}
+#endif
+
+//============================================== OpenCL USER INTERFACE ====================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
+    CMP_GLOBAL const CMP_Vec4uc *ImageSource,
+    CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,
+    CMP_GLOBAL CMP_BC15Options *BC15options) {
+  CGU_UINT32 xID;
+  CGU_UINT32 yID;
+
+#ifdef ASPM_GPU
+  xID = get_global_id(0);
+  yID = get_global_id(1);
+#else
+  xID = 0;
+  yID = 0;
+#endif
+
+  if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+  if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+  int srcWidth = SourceInfo->m_src_width;
+
+  CGU_UINT32 destI =
+      (xID * BC3CompBlockSize) + (yID * (srcWidth / BlockX) * BC3CompBlockSize);
+  int srcindex = 4 * (yID * srcWidth + xID);
+  int blkindex = 0;
+  CMP_Vec4uc srcData[16];
+  srcWidth = srcWidth - 4;
+
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      srcData[blkindex++] = ImageSource[srcindex++];
+    }
+    srcindex += srcWidth;
+  }
+
+  CompressBlockBC3_Internal(
+      srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
--- a/extern/CMP_Core/shaders/BC3_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.h
@ -0,0 +1,31 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC3_ENCODE_KERNEL_H
+#define BC3_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define BC3CompBlockSize 16
+
+#endif
--- a/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp
@ -0,0 +1,200 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC4_Encode_kernel.h"
+
+//============================================== BC4 INTERFACES =======================================================
+
+void CompressBlockBC4_Internal(const CMP_Vec4uc srcBlockTemp[16],
+                               CMP_GLOBAL CGU_UINT32 compressedBlock[2],
+                               CMP_GLOBAL const CMP_BC15Options *BC15options) {
+  if (BC15options->m_fquality) {
+    // Reserved!
+  }
+  CGU_UINT8 blkindex = 0;
+  CGU_UINT8 srcindex = 0;
+  CGU_UINT8 alphaBlock[16];
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      alphaBlock[blkindex++] =
+          (CGU_UINT8)srcBlockTemp[srcindex].x;  // Red channel
+      srcindex++;
+    }
+  }
+  CompressAlphaBlock(alphaBlock, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
+}
+
+void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
+                            const CGU_UINT32 compressedBlock[2],
+                            const CMP_BC15Options *BC15options) {
+  if (BC15options) {}
+  CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
+  DecompressAlphaBlock(alphaBlock, compressedBlock);
+
+  CGU_UINT8 blkindex = 0;
+  CGU_UINT8 srcindex = 0;
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex];  // R
+      rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex];  // G
+      rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex];  // B
+      rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex];  // A
+      srcindex++;
+    }
+  }
+}
+
+void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[16],
+                               CMP_GLOBAL CGU_UINT32 compressedBlock[2],
+                               CMP_GLOBAL const CMP_BC15Options *BC15options) {
+  if (BC15options) {}
+  CompressAlphaBlock(srcBlockTemp, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
+}
+
+void DecompressBlockBC4_SingleChannel(CGU_UINT8 srcBlockTemp[16],
+                            const CGU_UINT32 compressedBlock[2],
+                            const CMP_BC15Options *BC15options) {
+  if (BC15options) {}
+  DecompressAlphaBlock(srcBlockTemp, compressedBlock);
+}
+
+//============================================== USER INTERFACES ========================================================
+#ifndef ASPM_GPU
+
+int CMP_CDECL CreateOptionsBC4(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+        SetDefaultBC15Options(BC15optionsDefault);
+        (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DestroyOptionsBC4(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC4(void *options,
+    CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+        if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL CompressBlockBC4(const unsigned char *srcBlock,
+                               unsigned int srcStrideInBytes,
+                               CMP_GLOBAL unsigned char cmpBlock[8],
+                               const void *options = NULL) {
+
+    unsigned char inBlock[16];
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlock[dstptr++] = CGU_UINT8(srcBlock[srcpos++]);
+        }
+    }
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    if (BC15options == NULL) {
+      CMP_BC15Options BC15optionsDefault;
+      BC15options = &BC15optionsDefault;
+      SetDefaultBC15Options(BC15options);
+    }
+
+    CompressBlockBC4_SingleChannel(inBlock,(CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8],
+                            CMP_GLOBAL unsigned char srcBlock[16],
+                            const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressBlockBC4_SingleChannel(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
+    return CGU_CORE_OK;
+}
+#endif
+
+//============================================== OpenCL USER INTERFACE ====================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
+    CMP_GLOBAL const CMP_Vec4uc *ImageSource,
+    CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,
+    CMP_GLOBAL CMP_BC15Options *BC15options) {
+  CGU_UINT32 xID;
+  CGU_UINT32 yID;
+
+#ifdef ASPM_GPU
+  xID = get_global_id(0);
+  yID = get_global_id(1);
+#else
+  xID = 0;
+  yID = 0;
+#endif
+
+  if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+  if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+  int srcWidth = SourceInfo->m_src_width;
+
+  CGU_UINT32 destI =
+      (xID * BC4CompBlockSize) + (yID * (srcWidth / BlockX) * BC4CompBlockSize);
+  int srcindex = 4 * (yID * srcWidth + xID);
+  int blkindex = 0;
+  CMP_Vec4uc srcData[16];
+  srcWidth = srcWidth - 4;
+
+  for (CGU_INT32 j = 0; j < 4; j++) {
+    for (CGU_INT32 i = 0; i < 4; i++) {
+      srcData[blkindex++] = ImageSource[srcindex++];
+    }
+    srcindex += srcWidth;
+  }
+
+  CompressBlockBC4_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
--- a/extern/CMP_Core/shaders/BC4_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.h
@ -0,0 +1,31 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC4_ENCODE_KERNEL_H
+#define BC4_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define BC4CompBlockSize 8
+
+#endif
--- a/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp
@ -0,0 +1,264 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#include "BC5_Encode_kernel.h"
+
+//============================================== BC5 INTERFACES =======================================================
+
+void  CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16],
+                                CMP_GLOBAL CGU_UINT32 compressedBlock[4],
+                                CMP_GLOBAL  CMP_BC15Options *BC15options)
+{
+    if (BC15options->m_fquality) {
+        // Resreved
+    }
+    CGU_UINT8    blkindex = 0;
+    CGU_UINT8    srcindex = 0;
+    CGU_UINT8    alphaBlock[16];
+    for (CGU_INT32 j = 0; j < 4; j++) {
+        for (CGU_INT32 i = 0; i < 4; i++) {
+            alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x;  // Red channel
+            srcindex++;
+        }
+    }
+    CompressAlphaBlock(alphaBlock,&compressedBlock[0]);
+
+    blkindex = 0;
+    srcindex = 0;
+    for (CGU_INT32 j = 0; j < 4; j++) {
+        for (CGU_INT32 i = 0; i < 4; i++) {
+            alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y;  // Green channel
+            srcindex++;
+        }
+    }
+    CompressAlphaBlock(alphaBlock,&compressedBlock[2]);
+
+}
+
+void  DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], 
+                             CGU_UINT32 compressedBlock[4],
+                             CMP_BC15Options *BC15options)
+{
+    CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4];
+    CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4];
+
+    DecompressAlphaBlock(alphaBlockR, &compressedBlock[0]);
+    DecompressAlphaBlock(alphaBlockG, &compressedBlock[2]);
+ 
+    CGU_UINT8    blkindex = 0;
+    CGU_UINT8    srcindex = 0;
+
+    if (BC15options->m_mapDecodeRGBA)
+    {
+        for (CGU_INT32 j = 0; j < 4; j++) {
+            for (CGU_INT32 i = 0; i < 4; i++) {
+                rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex];
+                rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex];
+                rgbaBlock[blkindex++] = 0;
+                rgbaBlock[blkindex++] = 255;
+                srcindex++;
+            }
+        }
+    }
+    else
+    {
+        for (CGU_INT32 j = 0; j < 4; j++) {
+            for (CGU_INT32 i = 0; i < 4; i++) {
+                rgbaBlock[blkindex++] = 0;
+                rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex];
+                rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex];
+                rgbaBlock[blkindex++] = 255;
+                srcindex++;
+            }
+        }
+    }
+
+}
+
+
+void  CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[16],
+                                            const CGU_UINT8 srcBlockG[16],
+                                            CMP_GLOBAL  CGU_UINT32 compressedBlock[4],
+                                            CMP_GLOBAL  const CMP_BC15Options *BC15options)
+{
+    if (BC15options) {}
+    CompressAlphaBlock(srcBlockR,&compressedBlock[0]);
+    CompressAlphaBlock(srcBlockG,&compressedBlock[2]);
+}
+
+void  DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16],
+                                         CMP_GLOBAL CGU_UINT8 srcBlockG[16], 
+                                         const CGU_UINT32 compressedBlock[4],
+                                         const CMP_BC15Options *BC15options)
+{
+    if (BC15options) {}
+    DecompressAlphaBlock(srcBlockR, &compressedBlock[0]);
+    DecompressAlphaBlock(srcBlockG, &compressedBlock[2]);
+}
+
+
+//============================================== USER INTERFACES ========================================================
+#ifndef ASPM_GPU
+
+int CMP_CDECL CreateOptionsBC5(void **options)
+{
+    CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
+    if (BC15optionsDefault) {
+        SetDefaultBC15Options(BC15optionsDefault);
+        (*options) = BC15optionsDefault;
+    }
+    else {
+        (*options) = NULL;
+        return CGU_CORE_ERR_NEWMEM;
+    }
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL DestroyOptionsBC5(void *options)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
+    delete BCOptions;
+    return CGU_CORE_OK;
+}
+
+int CMP_CDECL SetQualityBC5(void *options,
+    CGU_FLOAT fquality)
+{
+    if (!options) return CGU_CORE_ERR_INVALIDPTR;
+    CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
+    if (fquality < 0.0f) fquality = 0.0f;
+    else
+        if (fquality > 1.0f) fquality = 1.0f;
+    BC15optionsDefault->m_fquality = fquality;
+    return CGU_CORE_OK;
+}
+
+
+int CMP_CDECL CompressBlockBC5(const CGU_UINT8 *srcBlockR,
+                               unsigned int srcStrideInBytes1,
+                               const CGU_UINT8 *srcBlockG,
+                               unsigned int srcStrideInBytes2,
+                               CMP_GLOBAL CGU_UINT8 cmpBlock[16],
+                               const void *options = NULL) {
+    CGU_UINT8 inBlockR[16];
+
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    CGU_INT srcpos = 0;
+    CGU_INT dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes1;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlockR[dstptr++] = CGU_UINT8(srcBlockR[srcpos++]);
+        }
+    }
+
+
+    CGU_UINT8 inBlockG[16];
+    //----------------------------------
+    // Fill the inBlock with source data
+    //----------------------------------
+    srcpos = 0;
+    dstptr = 0;
+    for (CGU_UINT8 row = 0; row < 4; row++)
+    {
+        srcpos = row * srcStrideInBytes2;
+        for (CGU_UINT8 col = 0; col < 4; col++)
+        {
+            inBlockG[dstptr++] = CGU_UINT8(srcBlockG[srcpos++]);
+        }
+    }
+
+
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+
+    CompressBlockBC5_DualChannel_Internal(inBlockR,inBlockG, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
+    return CGU_CORE_OK;
+}
+
+int  CMP_CDECL DecompressBlockBC5(const CGU_UINT8 cmpBlock[16],
+                              CMP_GLOBAL CGU_UINT8 srcBlockR[16],
+                              CMP_GLOBAL CGU_UINT8 srcBlockG[16],
+                              const void *options = NULL) {
+    CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
+    CMP_BC15Options BC15optionsDefault;
+    if (BC15options == NULL)
+    {
+        BC15options = &BC15optionsDefault;
+        SetDefaultBC15Options(BC15options);
+    }
+    DecompressBC5_DualChannel_Internal(srcBlockR,srcBlockG,(CGU_UINT32 *)cmpBlock,BC15options);
+
+    return CGU_CORE_OK;
+}
+
+#endif
+
+//============================================== OpenCL USER INTERFACE ====================================================
+#ifdef ASPM_GPU
+CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL  const CMP_Vec4uc*   ImageSource,
+                                          CMP_GLOBAL  CGU_UINT8*          ImageDestination,
+                                          CMP_GLOBAL  Source_Info*        SourceInfo,
+                                          CMP_GLOBAL  CMP_BC15Options*    BC15options
+)
+{
+    CGU_UINT32 xID;
+    CGU_UINT32 yID;
+
+#ifdef ASPM_GPU
+    xID = get_global_id(0);
+    yID = get_global_id(1);
+#else
+    xID = 0;
+    yID = 0;
+#endif
+
+    if (xID >= (SourceInfo->m_src_width / BlockX)) return;
+    if (yID >= (SourceInfo->m_src_height / BlockX)) return;
+    int  srcWidth = SourceInfo->m_src_width;
+
+    CGU_UINT32 destI = (xID*BC5CompBlockSize) + (yID*(srcWidth / BlockX)*BC5CompBlockSize);
+    int srcindex = 4 * (yID * srcWidth + xID);
+    int blkindex = 0;
+    CMP_Vec4uc srcData[16];
+    srcWidth = srcWidth - 4;
+
+    for ( CGU_INT32 j = 0; j < 4; j++) {
+        for ( CGU_INT32 i = 0; i < 4; i++) {
+            srcData[blkindex++] = ImageSource[srcindex++];
+        }
+        srcindex += srcWidth;
+    }
+
+    CompressBlockBC5_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
+}
+#endif
--- a/extern/CMP_Core/shaders/BC5_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.h
@ -0,0 +1,31 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC5_ENCODE_KERNEL_H
+#define BC5_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+#include "BCn_Common_Kernel.h"
+
+#define BC5CompBlockSize 16
+
+#endif
--- a/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp
+++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp
--- a/extern/CMP_Core/shaders/BC6_Encode_kernel.h
+++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.h
@ -0,0 +1,480 @@
+//=====================================================================
+// Copyright (c) 2018    Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef BC6_ENCODE_KERNEL_H
+#define BC6_ENCODE_KERNEL_H
+
+#include "Common_Def.h"
+
+#define MAX_TRACE                       10
+#define MAX_ENTRIES_QUANT_TRACE         16
+#define BlockX                          4
+#define BlockY                          4
+#define BYTEPP                          4
+#define COMPRESSED_BLOCK_SIZE           16             // Size of a compressed block in bytes
+#define MAX_DIMENSION_BIG               4
+#define MAX_SUBSET_SIZE                 16              // Largest possible size for an individual subset
+#define NUM_BLOCK_TYPES                 8               // Number of block types in the format
+#define MAX_SUBSETS                     3               // Maximum number of possible subsets
+#define MAX_PARTITIONS                  64              // Maximum number of partition types
+#define MAX_ENTRIES                     64
+#define MAX_TRY                         20
+
+#define MAX_PARTITIONS_TABLE            (1+64+64)
+#define DIMENSION                       4
+#define MAX_CLUSTERS_BIG                16
+#define EPSILON                         0.000001
+#define MAX_CLUSTERS_QUANT_TRACE        8
+
+//# Image Quality will increase as this number gets larger and end-to-end performance time will reduce
+#define MAX_INDEX_BITS                  4
+#define HIGHQULITY_THRESHOLD            0.7F
+#define qFAST_THRESHOLD                 0.5F
+
+#define F16NEGPREC_LIMIT_VAL            -2048.0f //f16 negative precision limit value
+
+#define LOG_CL_RANGE                    5
+#define LOG_CL_BASE                     2
+#define BIT_BASE                        5
+#define BIT_RANGE                       9
+#define MAX_CLUSTERS                    8
+#define BTT(bits)                       (bits-BIT_BASE)
+#define CLT(cl)                         (cl-LOG_CL_BASE)
+#define MASK(n)                         ((1<<(n))-1)
+#define SIGN_EXTEND_TYPELESS(x,nb)      ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x))
+#define CMP_HALF_MAX                    65504.0f // positive half max
+
+#ifndef ASPM_GPU
+#include <bitset>
+#include <assert.h>
+//typedef uint8_t        byte;
+#else
+//typedef bitset       uint8_t;
+//typedef uint8          byte;
+#endif
+
+#define BC6CompBlockSize 16
+#define BC6BlockX   4
+#define BC6BlockY   4
+
+typedef struct
+{
+    CGU_INT  k;
+    CGU_FLOAT d;
+} BC6H_TRACE;
+
+#define NCHANNELS                        3
+#define MAX_END_POINTS                   2
+#define MAX_BC6H_MODES                  14
+#define MAX_BC6H_PARTITIONS             32
+#define MAX_TWOREGION_MODES             10
+#define COMPRESSED_BLOCK_SIZE           16        // Size of a compressed block in bytes
+#define ONE_REGION_INDEX_OFFSET         65        // bit location to start saving color index values for single region shape
+#define TWO_REGION_INDEX_OFFSET         82        // bit location to start saving color index values for two region shapes
+#define MIN_MODE_FOR_ONE_REGION         11        // Two regions shapes use modes 1..9 and single use 11..14 
+#define R_0(ep)                         (ep)[0][0][i]
+#define R_1(ep)                         (ep)[0][1][i]
+#define R_2(ep)                         (ep)[1][0][i]
+#define R_3(ep)                         (ep)[1][1][i]
+#define FLT16_MAX                       0x7bff
+
+#ifndef ASPM_GPU
+#define USE_SHAKERHD
+#endif
+
+#define USE_NEWRAMP
+
+typedef struct
+{
+    CGU_FLOAT A[NCHANNELS];
+    CGU_FLOAT B[NCHANNELS];
+} END_Points;
+
+typedef struct
+{
+    CGU_FLOAT x, y, z;
+} BC6H_Vec3f;
+
+typedef struct
+{
+    CGU_INT nbits;              // Number of bits
+    CGU_INT prec[3];            // precission of the Qunatized RGB endpoints
+    CGU_INT transformed;        // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
+    CGU_INT modebits;           // number of mode bits
+    CGU_INT IndexPrec;          // Index Precision
+    CGU_INT mode;               // Mode value to save
+    CGU_INT lowestPrec;         // Step size of each precesion incriment
+}  ModePartitions;
+
+__constant ModePartitions ModePartition[MAX_BC6H_MODES + 1] =
+{
+   0,    0,0,0,        0,    0,    0,    0,     0,   // Mode = Invaild
+
+   // Two region Partition
+   10,   5,5,5,        1,    2,    3,    0x00,  31,    // Mode = 1
+   7,    6,6,6,        1,    2,    3,    0x01,  248,   // Mode = 2
+   11,   5,4,4,        1,    5,    3,    0x02,  15,    // Mode = 3
+   11,   4,5,4,        1,    5,    3,    0x06,  15,    // Mode = 4 
+   11,   4,4,5,        1,    5,    3,    0x0a,  15,    // Mode = 5
+   9,    5,5,5,        1,    5,    3,    0x0e,  62,    // Mode = 6
+   8,    6,5,5,        1,    5,    3,    0x12,  124,   // Mode = 7
+   8,    5,6,5,        1,    5,    3,    0x16,  124,   // Mode = 8
+   8,    5,5,6,        1,    5,    3,    0x1a,  124,   // Mode = 9
+   6,    6,6,6,        0,    5,    3,    0x1e,  496,   // Mode = 10
+
+   // One region Partition    
+   10,   10,10,10,     0,    5,    4,    0x03,  31,    // Mode = 11
+   11,   9,9,9,        1,    5,    4,    0x07,  15,    // Mode = 12
+   12,   8,8,8,        1,    5,    4,    0x0b,  7,     // Mode = 13
+   16,   4,4,4,        1,    5,    4,    0x0f,  1,     // Mode = 14
+};
+
+//================================================
+// Mode Pathern order to try on endpoints
+// The order can be rearranged to set which modes gets processed first
+// for now it is set in order.
+//================================================
+__constant CGU_INT8 ModeFitOrder[MAX_BC6H_MODES + 1] =
+{
+   0,                //0: N/A
+    // ----  2 region lower bits ---
+    1,                // 10 5 5 5
+    2,                // 7  6 6 6 
+    3,                // 11 5 4 5
+    4,                // 11 4 5 4
+    5,                // 11 4 4 5
+    6,                // 9  5 5 5
+    7,                // 8  6 5 5
+    8,                // 8  5 6 5
+    9,                // 8  5 5 6
+    10,               // 6  6 6 6
+    //------ 1 region high bits ---
+    11,               // 10 10 10 10
+    12,               // 11 9  9  9
+    13,               // 12 8  8  8
+    14                // 16 4  4  4
+};
+
+// The Region2FixUps are for our index[subset = 2][16][3] locations
+// indexed by shape region 2
+__constant CGU_INT g_Region2FixUp[32] =
+{
+   7 , 3 , 11, 7,
+   3 , 11, 9 , 5,
+   2 , 12, 7 , 3,
+   11, 7 , 11, 3,
+   7 , 1 , 0 , 1,
+   0 , 1 , 0 , 7,
+   0 , 1 , 1 , 0,
+   4 , 4 , 1 , 0,
+};
+
+// Indexed by all shape regions 
+// Partition Set Fixups for region 1 note region 0 is always at 0
+// that means normally we use 3 bits to define an index value
+// if its at the fix up location then its one bit less
+__constant CGU_INT g_indexfixups[32] =
+{
+   15,15,15,15,
+   15,15,15,15,
+   15,15,15,15,
+   15,15,15,15,
+   15, 2, 8, 2,
+   2, 8, 8,15,
+   2, 8, 2, 2,
+   8, 8, 2, 2,
+};
+
+typedef struct
+{
+    CGU_INT8 region;                // one or two
+    CGU_INT8 m_mode;                // m
+    CGU_INT8 d_shape_index;         // d
+    CGU_INT rw;                            // endpt[0].A[0]
+    CGU_INT rx;                            // endpt[0].B[0]
+    CGU_INT ry;                            // endpt[1].A[0]
+    CGU_INT rz;                            // endpt[1].B[0] 
+    CGU_INT gw;                            // endpt[0].A[1]
+    CGU_INT gx;                            // endpt[0].B[1]
+    CGU_INT gy;                            // endpt[1].A[1]
+    CGU_INT gz;                            // endpt[1].B[1]
+    CGU_INT bw;                            // endpt[0].A[2]
+    CGU_INT bx;                            // endpt[0].B[2]
+    CGU_INT by;                            // endpt[1].A[2]
+    CGU_INT bz;                            // endpt[1].B[2]
+
+    union
+    {
+        CGU_UINT8 indices[4][4];            // Indices data after header block
+        CGU_UINT8 indices16[16];
+    };
+
+    union
+    {
+        CGU_FLOAT         din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];   // Original data input as floats
+        unsigned char     cdin[256];                                 // as uchar to match float
+    };
+
+    END_Points    EC[MAX_END_POINTS];    // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[]
+    END_Points    E[MAX_END_POINTS];     // decompressed endpoints 
+    CGU_BOOL      issigned;            // Format is 16 bit signed floating point 
+    CGU_BOOL      istransformed;       // region two: all modes = true except mode=10
+    short         wBits;               // number of bits for the root endpoint
+    short         tBits[NCHANNELS];    // number of bits used for the transformed endpoints
+    CGU_INT           format;              // floating point format are we using for decompression
+    BC6H_Vec3f     Paletef[2][16];
+
+    CGU_INT           index;               // for debugging
+    CGU_FLOAT     fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    CGU_FLOAT     cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    CGU_INT           shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    CGU_INT           cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    CGU_INT           entryCount[MAX_SUBSETS];
+    CGU_INT           cur_best_entryCount[MAX_SUBSETS];
+    CGU_FLOAT     partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    CGU_FLOAT     cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    CGU_BOOL      optimized;           // were end points optimized during final encoding
+
+} BC6H_Encode_local;
+
+#ifndef ASPM_GPU
+using namespace std;
+class BitHeader
+{
+public:
+    BitHeader(const CGU_UINT8 in[], CGU_INT sizeinbytes)
+    {
+        m_bits.reset();
+        m_sizeinbytes = sizeinbytes;
+
+        if ((in != NULL) && (sizeinbytes <= 16))
+        {
+            // Init bits set with given data
+            CGU_INT bitpos = 0;
+            for (CGU_INT i = 0; i < sizeinbytes; i++)
+            {
+                CGU_INT bit = 1;
+                for (CGU_INT j = 0; j < 8; j++)
+                {
+                    m_bits[bitpos] = in[i] & bit ? 1 : 0;
+                    bit = bit << 1;
+                    bitpos++;
+                }
+            }
+        }
+    }
+
+    ~BitHeader()
+    {
+    }
+
+    void transferbits(CGU_UINT8 in[], CGU_INT sizeinbytes)
+    {
+        if ((sizeinbytes <= m_sizeinbytes) && (in != NULL))
+        {
+            // Init bits set with given data
+            memset(in, 0, sizeinbytes);
+            CGU_INT bitpos = 0;
+            for (CGU_INT i = 0; i < sizeinbytes; i++)
+            {
+                CGU_INT bit = 1;
+                for (CGU_INT j = 0; j < 8; j++)
+                {
+                    if (m_bits[bitpos]) in[i] |= bit;
+                    bit = bit << 1;
+                    bitpos++;
+                }
+            }
+        }
+    }
+
+    CGU_INT getvalue(CGU_INT start, CGU_INT bitsize)
+    {
+        CGU_INT value = 0;
+        CGU_INT end = start + bitsize - 1;
+        for (; end >= start; end--)
+        {
+            value |= m_bits[end] ? 1 : 0;
+            if (end > start) value <<= 1;
+        }
+
+        return value;
+    }
+
+    void setvalue(CGU_INT start, CGU_INT bitsize, CGU_INT value, CGU_INT maskshift = 0)
+    {
+        CGU_INT end = start + bitsize - 1;
+        CGU_INT mask = 0x1 << maskshift;
+        for (; start <= end; start++)
+        {
+            m_bits[start] = (value&mask) ? 1 : 0;
+            mask <<= 1;
+        }
+    }
+
+    bitset<128> m_bits;        // 16 bytes max
+    CGU_INT     m_sizeinbytes;
+};
+
+//==================== DECODER CODE ======================
+#define MAXENDPOINTS                    2
+#define U16MAX                          0xffff
+#define S16MAX                          0x7fff
+#define SIGN_EXTEND(w,tbits)            ((((signed(w))&(1<<((tbits)-1)))?((~0)<<(tbits)):0)|(signed(w)))
+
+enum
+{
+    UNSIGNED_F16 = 1,
+    SIGNED_F16   = 2
+};
+
+enum
+{
+    BC6_ONE = 0,
+    BC6_TWO
+};
+
+enum
+{
+    C_RED = 0,
+    C_GREEN,
+    C_BLUE
+};
+
+struct BC6H_Vec3
+{
+    int x,y,z;
+};
+
+struct AMD_BC6H_Format
+{
+    unsigned short region;             // one or two
+    unsigned short m_mode;             // m
+    int d_shape_index;                 // d
+    int rw;                            // endpt[0].A[0]
+    int rx;                            // endpt[0].B[0]
+    int ry;                            // endpt[1].A[0]
+    int rz;                            // endpt[1].B[0] 
+    int gw;                            // endpt[0].A[1]
+    int gx;                            // endpt[0].B[1]
+    int gy;                            // endpt[1].A[1]
+    int gz;                            // endpt[1].B[1]
+    int bw;                            // endpt[0].A[2]
+    int bx;                            // endpt[0].B[2]
+    int by;                            // endpt[1].A[2]
+    int bz;                            // endpt[1].B[2]
+    
+    union
+    {
+        CGU_UINT8 indices[4][4];            // Indices data after header block
+        CGU_UINT8 indices16[16];
+    };
+
+    float         din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];   // Original data input
+    END_Points    EC[MAXENDPOINTS];    // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[]
+    END_Points    E[MAXENDPOINTS];     // decompressed endpoints 
+    bool          issigned;            // Format is 16 bit signed floating point 
+    bool          istransformed;       // region two: all modes = true except mode=10
+    short         wBits;               // number of bits for the root endpoint
+    short         tBits[NCHANNELS];    // number of bits used for the transformed endpoints
+    int           format;              // floating point format are we using for decompression
+    BC6H_Vec3      Palete[2][16];
+    BC6H_Vec3f     Paletef[2][16];
+
+    int           index;               // for debugging
+    float         fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    float         cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    int           shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    int           cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    int           entryCount[MAX_SUBSETS];
+    int           cur_best_entryCount[MAX_SUBSETS];
+    float         partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    float         cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    bool          optimized;           // were end points optimized during final encoding
+};
+
+// ===================================  END OF DECODER CODE ========================================================
+#endif
+
+//-------------------------------------------------
+// Set by Host : Read only in kernel
+//-------------------------------------------------
+typedef struct
+{
+    // Setup at initialization time
+    CGU_FLOAT  m_quality;
+    CGU_FLOAT  m_performance;
+    CGU_FLOAT  m_errorThreshold;
+    CGU_DWORD  m_validModeMask;
+    CGU_BOOL   m_imageNeedsAlpha;
+    CGU_BOOL   m_colourRestrict;
+    CGU_BOOL   m_alphaRestrict;
+    CGU_BOOL   m_isSigned;
+} CMP_BC6HOptions;
+
+typedef struct
+{
+    // These are quality parameters used to select when to use the high precision quantizer
+    // and shaker paths
+    CGU_FLOAT m_quantizerRangeThreshold;
+    CGU_FLOAT m_shakerRangeThreshold;
+    CGU_FLOAT m_partitionSearchSize;
+
+    // Setup at initialization time
+    CGU_FLOAT  m_quality;
+    CGU_FLOAT  m_performance;
+    CGU_FLOAT  m_errorThreshold;
+    CGU_DWORD  m_validModeMask;
+    CGU_BOOL   m_imageNeedsAlpha;
+    CGU_BOOL   m_colourRestrict;
+    CGU_BOOL   m_alphaRestrict;
+    CGU_BOOL   m_isSigned;
+
+    // Source image info : must be set prior to use in kernel
+    CGU_UINT32   m_src_width;
+    CGU_UINT32   m_src_height;
+    CGU_UINT32   m_src_stride;
+
+} BC6H_Encode;
+
+CMP_STATIC void SetDefaultBC6Options(BC6H_Encode *BC6Encode)
+{
+    if (BC6Encode)
+    {
+        BC6Encode->m_quality = 1.0f;
+        BC6Encode->m_quantizerRangeThreshold = 0.0f;
+        BC6Encode->m_shakerRangeThreshold = 0.0f;
+        BC6Encode->m_partitionSearchSize = 0.20f;
+        BC6Encode->m_performance = 0.0f;
+        BC6Encode->m_errorThreshold = 0.0f;
+        BC6Encode->m_validModeMask = 0;
+        BC6Encode->m_imageNeedsAlpha = 0;
+        BC6Encode->m_colourRestrict = 0;
+        BC6Encode->m_alphaRestrict = 0;
+        BC6Encode->m_isSigned = 0;
+        BC6Encode->m_src_width = 4;
+        BC6Encode->m_src_height = 4;
+        BC6Encode->m_src_stride = 0;
+    }
+}
+
+#endif
--- a/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp
+++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp
--- a/extern/CMP_Core/shaders/BC7_Encode_Kernel.h
+++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.h
--- a/extern/CMP_Core/shaders/BCn_Common_Kernel.h
+++ b/extern/CMP_Core/shaders/BCn_Common_Kernel.h
--- a/extern/CMP_Core/shaders/Common_Def.h
+++ b/extern/CMP_Core/shaders/Common_Def.h
@ -0,0 +1,300 @@
+#ifndef _COMMON_DEFINITIONS_H
+#define _COMMON_DEFINITIONS_H
+
+//===============================================================================
+// Copyright (c) 2007-2019 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2004-2006 ATI Technologies Inc.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//
+//  File Name:   Common_Def.h
+//  Description: common definitions used for CPU/HPC/GPU
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+// Features
+#ifdef _WIN32
+//#define USE_ASPM_CODE
+#endif
+
+// Proxy ISPC compiler (Warning! Not all ASPM features will be available : expect build errors for specialized ASPM code!
+#ifdef ISPC
+#define ASPM
+#endif
+
+// Using OpenCL Compiler
+#ifdef __OPENCL_VERSION__
+#define  ASPM_GPU
+#endif
+
+
+#ifdef _LINUX
+#undef ASPM_GPU
+#include <cstring>
+#include <cmath>
+#include <stdio.h>
+#include "cmp_math_vec4.h"
+#endif
+
+#ifndef CMP_MAX
+#define CMP_MAX(x, y) (((x) > (y)) ? (x) : (y))
+#endif
+
+#ifndef CMP_MIN
+#define CMP_MIN(x, y) (((x) < (y)) ? (x) : (y))
+#endif
+
+#define CMP_SET_BC13_DECODER_RGBA       //  Sets mapping BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha 
+                                        //       RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3]
+                                        //  BC4 alpha always maps as AAAA to channels [0,1,2,3] 
+                                        //  BC5 decoded (Red&Green) maps R,G,B=0,A=255 to [0,1,2,3] else  maps [B=0,G,R,A=255] to [0,1,2,3]
+
+//#define USE_BLOCK_LINEAR
+
+#define CMP_FLOAT_MAX       3.402823466e+38F // max value used to detect an Error in processing
+#define CMP_FLOAT_MAX_EXP   38
+#define USE_PROCESS_SEPERATE_ALPHA          // Enable this to use higher quality code using CompressDualIndexBlock
+#define COMPRESSED_BLOCK_SIZE           16  // Size of a compressed block in bytes
+#define MAX_DIMENSION_BIG               4   // Max number of channels  (RGBA)
+#define MAX_SUBSETS                     3   // Maximum number of possible subsets
+#define MAX_SUBSET_SIZE                 16  // Largest possible size for an individual subset
+#define BLOCK_SIZE_4X4X4                64
+#define BLOCK_SIZE_4X4                  16
+#define BlockX                          4
+#define BlockY                          4
+//#define USE_BLOCK_LINEAR    // Source Data is organized in linear form for each block : Experimental Code not fully developed 
+//#define USE_DOUBLE          // Default is to use float, enable to use double data types only for float definitions
+
+typedef enum {
+    CGU_CORE_OK = 0,                          // No errors, call was successfull
+    CGU_CORE_ERR_UNKOWN,                      // An unknown error occurred
+    CGU_CORE_ERR_NEWMEM,                      // New Memory Allocation Failed
+    CGU_CORE_ERR_INVALIDPTR,                  // The pointer value used is invalid or null
+    CGU_CORE_ERR_RANGERED,                    // values for Red   Channel is out of range (too high or too low)
+    CGU_CORE_ERR_RANGEGREEN,                  // values for Green Channel is out of range (too high or too low)
+    CGU_CORE_ERR_RANGEBLUE,                   // values for Blue  Channel is out of range (too high or too low)
+} CGU_ERROR_CODES;
+
+
+//---------------------------------------------
+// Predefinitions for GPU and CPU compiled code
+//---------------------------------------------
+
+#ifdef ASPM_GPU  // GPU Based code
+        // ==== Vectors ====
+        typedef float2  CGU_Vec2f;
+        typedef float2  CGV_Vec2f;
+        typedef float3  CMP_Vec3f;
+        typedef float3  CGU_Vec3f;
+        typedef float3  CGV_Vec3f;
+        typedef uchar3  CGU_Vec3uc;
+        typedef uchar3  CGV_Vec3uc;
+        typedef uchar4  CMP_Vec4uc;
+        typedef uchar4  CGU_Vec4uc;
+        typedef uchar4  CGV_Vec4uc;
+
+        #define USE_BC7_SP_ERR_IDX
+        #define ASPM_PRINT(args)      printf args
+        #define BC7_ENCODECLASS
+
+        #define CMP_EXPORT
+        #define INLINE
+        #define uniform
+        #define varying
+        #define CMP_GLOBAL          __global
+        #define CMP_KERNEL          __kernel
+        #define CMP_CONSTANT        __constant
+        #define CMP_STATIC
+
+
+        typedef unsigned int        CGU_DWORD;      //32bits
+        typedef int                 CGU_INT;        //32bits
+        typedef int                 CGU_BOOL;
+        typedef unsigned short      CGU_SHORT;      //16bits
+        typedef float               CGU_FLOAT;
+        typedef unsigned int        uint32;     // need to remove this def
+
+        typedef int                 CGV_INT;
+        typedef unsigned int        CGU_UINT;
+        typedef int                 CGUV_INT;
+        typedef int                 CGV_BOOL;
+
+        typedef char                CGU_INT8;
+        typedef unsigned char       CGU_UINT8;
+        typedef short               CGU_INT16;
+        typedef unsigned short      CGU_UINT16;
+        typedef int                 CGU_INT32;
+        typedef unsigned int        CGU_UINT32;
+        typedef unsigned long       CGU_UINT64;
+
+        typedef char                CGV_INT8;
+        typedef unsigned char       CGV_UINT8;
+        typedef short               CGV_INT16;
+        typedef unsigned short      CGV_UINT16;
+        typedef int                 CGV_INT32;
+        typedef unsigned int        CGV_UINT32;
+        typedef unsigned long       CGV_UINT64;
+
+        typedef float               CGV_FLOAT;
+
+        #define TRUE  1
+        #define FALSE 0
+        #define CMP_CDECL
+
+#else
+    // CPU & ASPM definitions
+
+    #ifdef ASPM // SPMD ,SIMD CPU code
+        // using hybrid (CPU/GPU) aspm compiler 
+        #define ASPM_PRINT(args)       print args
+        #define CMP_USE_FOREACH_ASPM
+        #define __ASPM__
+        #define BC7_ENCODECLASS
+
+        #define USE_BC7_SP_ERR_IDX
+        //#define USE_BC7_RAMP
+
+        #define CMP_EXPORT          export
+        #define TRUE            true
+        #define FALSE           false
+        typedef uniform bool    CGU_BOOL;
+        typedef bool            CGV_BOOL;
+
+        typedef unsigned int8   uint8;
+        typedef unsigned int16  uint16;
+        typedef unsigned int32  uint32;
+        typedef unsigned int64  uint64;
+        typedef uniform float   CGU_FLOAT;
+        typedef varying float   CGV_FLOAT;
+        typedef uniform uint8   CGU_UINT8;
+        typedef varying uint8   CGV_UINT8;
+
+
+        typedef CGV_UINT8<4> CGV_Vec4uc;
+        typedef CGU_UINT8<4> CGU_Vec4uc;
+
+        typedef CGU_FLOAT<3> CGU_Vec3f;
+        typedef CGV_FLOAT<3> CGV_Vec3f;
+
+        typedef CGU_FLOAT<2> CGU_Vec2f;
+        typedef CGV_FLOAT<2> CGV_Vec2f;
+
+        #define CMP_CDECL
+
+    #else   // standard CPU code
+        #include <stdio.h>
+        #include <string>
+        #include "cmp_math_vec4.h"
+
+        // using CPU compiler
+        #define ASPM_PRINT(args)  printf args
+        #define USE_BC7_RAMP
+        #define USE_BC7_SP_ERR_IDX
+
+        #define CMP_EXPORT
+        #define BC7_ENCODECLASS BC7_EncodeClass::
+        #define TRUE            1
+        #define FALSE           0
+        #define uniform
+        #define varying
+
+        typedef char            int8;
+        typedef short           int16;
+        typedef int             int32;
+        typedef long            int64;
+        typedef unsigned char   uint8;
+        typedef unsigned short  uint16;
+        typedef unsigned int    uint32;
+        typedef unsigned long   uint64;
+
+        typedef int8            CGV_BOOL;
+        typedef int8            CGU_BOOL;
+        typedef int16           CGU_WORD;
+        typedef uint8           CGU_SHORT;
+        typedef int64           CGU_LONG;
+        typedef uint64          CGU_ULONG;
+
+        typedef uniform float   CGU_FLOAT;
+        typedef varying float   CGV_FLOAT;
+        typedef uniform uint8   CGU_UINT8;
+        typedef varying uint8   CGV_UINT8;
+        #if defined(WIN32) || defined(_WIN64)
+        #define CMP_CDECL __cdecl
+        #else
+        #define CMP_CDECL
+        #endif
+    #endif
+
+    // Common CPU & ASPM definitions
+    #define CMP_ASSERT(arg)
+
+    #define CMP_GLOBAL
+
+    #define CMP_KERNEL
+    #define __local                 const
+    #define __constant              const
+    #define CMP_CONSTANT            const
+    #define INLINE                  inline
+    #define CMP_STATIC              static
+
+
+    typedef uniform int32           CGU_DWORD;
+    typedef uniform uint8           CGU_UBYTE;
+    typedef uniform int             CGU_INT;
+    typedef uniform int8            CGU_INT8;
+
+    typedef uniform int16           CGU_INT16;
+    typedef uniform uint16          CGU_UINT16;
+    typedef uniform int32           CGU_INT32;
+    typedef uniform uint32          CGU_UINT32;
+    typedef uniform uint64          CGU_UINT64;
+
+    typedef int                     CGV_INT;
+    typedef int8                    CGV_INT8;
+    typedef int16                   CGV_INT16;
+    typedef int32                   CGV_INT32;
+    typedef uint16                  CGV_UINT16;
+    typedef uint32                  CGV_UINT32;
+    typedef uint64                  CGV_UINT64;
+#endif // ASPM_GPU
+
+
+typedef struct 
+{
+    CGU_UINT32     m_src_width;
+    CGU_UINT32     m_src_height;
+    CGU_UINT32     m_width_in_blocks;
+    CGU_UINT32     m_height_in_blocks;
+    CGU_FLOAT      m_fquality;
+} Source_Info;
+
+// Ref Compute_CPU_HPC
+struct texture_surface
+{
+    CGU_UINT8*  ptr;
+    CGU_INT     width,
+                height,
+                stride;
+    CGU_INT     channels;
+};
+
+#endif
--- a/extern/CMP_Core/shaders/CopyFiles.bat
+++ b/extern/CMP_Core/shaders/CopyFiles.bat
@ -0,0 +1,50 @@
+REM ====================================
+REM Hybrid Codecs: Full support in v4.0
+REM ====================================
+
+REM gets the output dir
+set BUILD_OUTDIR=%1
+
+REM get the batch files dir 
+SET mypath=%~dp0
+echo %mypath:~0,-1%
+
+IF NOT EXIST "%outpath%"\Plugins mkdir %BUILD_OUTDIR%Plugins
+IF NOT EXIST "%outpath%"\Plugins\Compute mkdir %BUILD_OUTDIR%Plugins\Compute
+
+REM Build Vulkan Shader Binary
+REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1.comp -o %BUILD_OUTDIR%\Plugins\Compute\BC1.spv
+REM IF %ERRORLEVEL% GTR 0 exit 123
+
+REM Enabled in v4.0
+REM 
+REM del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_Kernel.cpp.cmp
+REM del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp
+
+XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h"        %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.h"     %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.cpp"   %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h"  %BUILD_OUTDIR%Plugins\Compute\
+XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp"  %BUILD_OUTDIR%Plugins\Compute\
+
+echo "Dependencies copied done"
+
+
+
+
--- a/extern/CMP_Core/source/CMP_Core.h
+++ b/extern/CMP_Core/source/CMP_Core.h
@ -0,0 +1,153 @@
+//=====================================================================
+// Copyright (c) 2019   Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+/// \file CMP_Core.h
+//
+//=====================================================================
+
+#ifndef CMP_CORE_H
+#define CMP_CORE_H
+
+#include <stdint.h>
+#ifdef _WIN32
+#define CMP_CDECL __cdecl
+#else
+#define CMP_CDECL
+#endif
+
+//====================================================================================
+// API Definitions for Core API
+//------------------------------------------------------------------------------------
+// All API return 0 on success else error codes > 0
+// See Common_Def.h CGU_CORE_ values for the error codes
+//=====================================================================================
+
+//======================================================================================================
+// Block level setting option: Create and Destroy Reference Pointers
+//======================================================================================================
+// Context create and destroy to use for BCn codec settings, where n is the set [1,2,3,4,5,6,7]
+// All codecs will use default max quality settings, users can create multiple contexts to 
+// set quality levels, masks , channel mapping, etc...
+
+int CMP_CDECL CreateOptionsBC1(void **optionsBC1);
+int CMP_CDECL CreateOptionsBC2(void **optionsBC2);
+int CMP_CDECL CreateOptionsBC3(void **optionsBC3);
+int CMP_CDECL CreateOptionsBC4(void **optionsBC4);
+int CMP_CDECL CreateOptionsBC5(void **optionsBC5);
+int CMP_CDECL CreateOptionsBC6(void **optionsBC6);
+int CMP_CDECL CreateOptionsBC7(void **optionsBC7);
+
+int CMP_CDECL DestroyOptionsBC1(void *optionsBC1);
+int CMP_CDECL DestroyOptionsBC2(void *optionsBC2);
+int CMP_CDECL DestroyOptionsBC3(void *optionsBC3);
+int CMP_CDECL DestroyOptionsBC4(void *optionsBC4);
+int CMP_CDECL DestroyOptionsBC5(void *optionsBC5);
+int CMP_CDECL DestroyOptionsBC6(void *optionsBC6);
+int CMP_CDECL DestroyOptionsBC7(void *optionsBC7);
+
+
+//======================================================================================================
+// Block level settings using the options Reference Pointers
+//======================================================================================================
+
+// Setting channel Weights : Applies to BC1, BC2 and BC3 valid ranges are [0..1.0f] Default is {1.0f, 1.0f , 1.0f}
+// Use channel weightings. With swizzled formats the weighting applies to the data within the specified channel not the channel itself.
+int CMP_CDECL SetChannelWeightsBC1(void *options, float WeightRed, float WeightGreen, float WeightBlue);
+int CMP_CDECL SetChannelWeightsBC2(void *options, float WeightRed, float WeightGreen, float WeightBlue);
+int CMP_CDECL SetChannelWeightsBC3(void *options, float WeightRed, float WeightGreen, float WeightBlue);
+
+
+//  True sets mapping CMP_Core BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha as
+//       RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3]
+//  Default is set to true.
+int CMP_CDECL SetDecodeChannelMapping(void *options, bool mapRGBA);
+
+int CMP_CDECL SetQualityBC1(void *options, float fquality);
+int CMP_CDECL SetQualityBC2(void *options, float fquality);
+int CMP_CDECL SetQualityBC3(void *options, float fquality);
+int CMP_CDECL SetQualityBC4(void *options, float fquality);
+int CMP_CDECL SetQualityBC5(void *options, float fquality);
+int CMP_CDECL SetQualityBC6(void *options, float fquality);
+int CMP_CDECL SetQualityBC7(void *options, float fquality);
+
+
+int CMP_CDECL SetAlphaThresholdBC1(void *options, unsigned char alphaThreshold);
+
+int CMP_CDECL SetMaskBC6(void *options, unsigned int  mask);
+int CMP_CDECL SetMaskBC7(void *options, unsigned char mask);
+
+int CMP_CDECL SetAlphaOptionsBC7(void *options, bool imageNeedsAlpha, bool colourRestrict, bool alphaRestrict);
+int CMP_CDECL SetErrorThresholdBC7(void *options, float minThreshold, float maxThreshold);
+
+//======================================================================================================
+// (4x4) Block level 4 channel source CompressBlock and DecompressBlock API for BCn Codecs
+//======================================================================================================
+// The options parameter for these API can be set to null in the calls if defaults settings is sufficient
+// Example: CompressBlockBC1(srcBlock,16,cmpBlock,NULL);   For "C" call
+//          CompressBlockBC1(srcBlock,16,cmpBlock);        For "C++" calls
+//
+// To use this parameter first create the options context using the CreateOptions call
+// then use the Set Options to set various codec settings and pass them to the appropriate 
+// Compress or Decompress API.
+// The source (srcBlock) channel format is expected to be RGBA:8888 by default for LDR Codecs
+// for BC6H the format is RGBA Half float (16 bits per channel)
+//------------------------------------------------------------------------------------------------------
+#ifdef __cplusplus
+#define CMP_DEFAULTNULL  =NULL
+#else
+#define CMP_DEFAULTNULL
+#endif
+
+//=========================================================================================================
+// 4 channel Sources, default format RGBA:8888 is processed as a 4x4 block starting at srcBlock location
+// where each row of the block is calculated from srcStride
+//=========================================================================================================
+int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[8 ], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL CompressBlockBC2(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL CompressBlockBC3(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL CompressBlockBC7(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
+
+int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8 ], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
+
+//================================================
+// 1 channel Source 4x4 8 bits per block
+//================================================
+int CMP_CDECL CompressBlockBC4(const unsigned char *srcBlock, unsigned int  srcStrideInBytes, unsigned char cmpBlock[8], const void *options  CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8], unsigned char srcBlock[16], const void *options  CMP_DEFAULTNULL);
+
+//================================================
+// 2 channel Source 2x(4x4 8 bits)
+//================================================
+int CMP_CDECL CompressBlockBC5(const unsigned char *srcBlock1, unsigned int srcStrideInBytes1,
+                               const unsigned char *srcBlock2, unsigned int srcStrideInBytes2,
+                               unsigned char cmpBlock[16], const void *options  CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC5(const unsigned char cmpBlock[16], unsigned char srcBlock1[16], unsigned char srcBlock2[16], const void *options  CMP_DEFAULTNULL);
+
+//========================================================================================
+// For 3 channel Source  RGB_16, Note srcStride is in unsigned short steps (2 bytes each)
+//========================================================================================
+int CMP_CDECL CompressBlockBC6(const unsigned short *srcBlock, unsigned int srcStrideInShorts, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
+int CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16], unsigned short srcBlock[48], const void *options CMP_DEFAULTNULL);
+
+#endif  // CMP_CORE
--- a/extern/CMP_Core/source/cmp_math_vec4.h
+++ b/extern/CMP_Core/source/cmp_math_vec4.h
@ -0,0 +1,417 @@
+//=====================================================================
+// Copyright 2019 (c), Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+#ifndef CMP_MATH_VEC4_H
+#define CMP_MATH_VEC4_H
+
+//====================================================
+// Vector Class definitions for CPU & Intrinsics
+//====================================================
+
+#if defined (_LINUX) || defined (_WIN32)
+
+//============================================= VEC2 ==================================================
+template<class T>
+class Vec2
+{
+public:
+
+    T x;
+    T y;
+
+    // *****************************************
+    //     Constructors
+    // *****************************************
+
+    /// Default constructor
+    Vec2() : x((T)0), y((T)0) {};
+
+    /// Value constructor
+    Vec2(const T& vx, const T& vy) : x(vx), y(vy) {};
+
+    /// Copy constructor
+    Vec2(const Vec2<T>& val) : x(val.x), y(val.y) {};
+
+    /// Single value constructor.  Sets all components to the given value
+    Vec2(const T& v) : x(v), y(v) {};
+
+
+    // *****************************************
+    //     Conversions/Assignment/Indexing
+    // *****************************************
+
+    /// cast to T*
+    operator const T* () const { return (const T*)this; };
+
+    /// cast to T*
+    operator T* () { return (T*)this; };
+
+    /// Indexing
+    const T& operator[](int i) const { return ((const T*)this)[i]; };
+    T& operator[](int i) { return ((T*)this)[i]; };
+
+    /// Assignment
+    const Vec2<T>& operator=(const Vec2<T>& rhs) { x = rhs.x; y = rhs.y; return *this; };
+
+    // *****************************************
+    //    Comparison
+    // *****************************************
+
+    /// Equality comparison
+    bool operator==(const Vec2<T>& rhs) const { return (x == rhs.x && y == rhs.y); };
+
+    /// Inequality comparision
+    bool operator!=(const Vec2<T>& rhs) const { return (x != rhs.x || y != rhs.y); };
+
+    // *****************************************
+    //    Arithmetic
+    // *****************************************
+
+    /// Addition
+    const Vec2<T> operator+(const Vec2<T>& rhs) const { return Vec2<T>(x + rhs.x, y + rhs.y); };
+
+    /// Subtraction
+    const Vec2<T> operator-(const Vec2<T>& rhs) const { return Vec2<T>(x - rhs.x, y - rhs.y); };
+
+    /// Multiply by scalar
+    const Vec2<T> operator*(const T& v) const { return Vec2<T>(x * v, y * v); };
+
+    /// Divide by scalar
+    const Vec2<T> operator/(const T& v) const { return Vec2<T>(x / v, y / v); };
+
+    /// Addition in-place
+    Vec2<T>& operator+= (const Vec2<T>& rhs) { x += rhs.x; y += rhs.y; return *this; };
+
+    /// Subtract in-place
+    Vec2<T>& operator-= (const Vec2<T>& rhs) { x -= rhs.x; y -= rhs.y; return *this; };
+
+    /// Scalar multiply in-place
+    Vec2<T>& operator*= (const T& v) { x *= v; y *= v; return *this; };
+
+    /// Scalar divide in-place
+    Vec2<T>& operator/= (const T& v) { x /= v; y /= v; return *this; };
+
+
+};
+
+typedef Vec2<float>  CMP_Vec2f;
+typedef Vec2<float>  CGU_Vec2f;
+typedef Vec2<float>  CGV_Vec2f;
+typedef Vec2<double> CMP_Vec2d;
+typedef Vec2<int>    CMP_Vec2i;
+
+//}
+
+
+
+
+//============================================= VEC3 ==================================================
+template<class T>
+class Vec3
+{
+public:
+
+    T x;
+    T y;
+    T z;
+
+    // *****************************************
+    //     Constructors
+    // *****************************************
+
+    /// Default constructor
+    Vec3() : x((T)0), y((T)0), z((T)0) {};
+
+    /// Value constructor
+    Vec3(const T& vx, const T& vy, const T& vz) : x(vx), y(vy), z(vz) {};
+
+    /// Copy constructor
+    Vec3(const Vec3<T>& val) : x(val.x), y(val.y), z(val.z) {};
+
+    /// Single value constructor.  Sets all components to the given value
+    Vec3(const T& v) : x(v), y(v), z(v) {};
+
+    /// Array constructor.  Assumes a 3-component array
+    Vec3(const T* v) : x(v[0]), y(v[1]), z(v[2]) {};
+
+    // *****************************************
+    //     Conversions/Assignment/Indexing
+    // *****************************************
+
+    /// cast to T*
+    operator const T* () const { return (const T*)this; };
+
+    /// cast to T*
+    operator T* () { return (T*)this; };
+
+    /// Assignment
+    const Vec3<T>& operator=(const Vec3<T>& rhs) { x = rhs.x; y = rhs.y; z = rhs.z; return *this; };
+
+    // *****************************************
+    //    Comparison
+    // *****************************************
+
+    /// Equality comparison
+    bool operator==(const Vec3<T>& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); };
+
+    /// Inequality comparision
+    bool operator!=(const Vec3<T>& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z); };
+
+    // *****************************************
+    //    Arithmetic
+    // *****************************************
+
+    /// Addition
+    const Vec3<T> operator+(const Vec3<T>& rhs) const { return Vec3<T>(x + rhs.x, y + rhs.y, z + rhs.z); };
+
+    /// Subtraction
+    const Vec3<T> operator-(const Vec3<T>& rhs) const { return Vec3<T>(x - rhs.x, y - rhs.y, z - rhs.z); };
+
+    /// Multiply by scalar
+    const Vec3<T> operator*(const T& v) const { return Vec3<T>(x * v, y * v, z * v); };
+
+    /// Divide by scalar
+    const Vec3<T> operator/(const T& v) const { return Vec3<T>(x / v, y / v, z / v); };
+
+    /// Divide by vector
+    const Vec3<T> operator/(const Vec3<T>& rhs) const { return Vec3<T>(x / rhs.x, y / rhs.y, z / rhs.z); };
+
+    /// Addition in-place
+    Vec3<T>& operator+= (const Vec3<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; };
+
+    /// Subtract in-place
+    Vec3<T>& operator-= (const Vec3<T>& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; return *this; };
+
+    /// Scalar multiply in-place
+    Vec3<T>& operator*= (const T& v) { x *= v; y *= v; z *= v; return *this; };
+
+    /// Scalar divide in-place
+    Vec3<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; return *this; };
+};
+
+typedef Vec3<float>             CGU_Vec3f;
+typedef Vec3<float>             CGV_Vec3f;
+typedef Vec3<unsigned char>     CGU_Vec3uc;
+typedef Vec3<unsigned char>     CGV_Vec3uc;
+
+typedef Vec3<float>             CMP_Vec3f;
+typedef Vec3<double>            CMP_Vec3d;
+typedef Vec3<int>               CMP_Vec3i;
+typedef Vec3<unsigned char>     CMP_Vec3uc;
+
+//============================================= VEC4 ==================================================
+template<class T>
+class Vec4
+{
+public:
+
+    T x;
+    T y;
+    T z;
+    T w;
+
+    // *****************************************
+    //     Constructors
+    // *****************************************
+
+    /// Default constructor
+    Vec4() : x((T)0), y((T)0), z((T)0), w((T)0) {};
+
+    /// Value constructor
+    Vec4(const T& vx, const T& vy, const T& vz, const T& vw) : x(vx), y(vy), z(vz), w(vw) {};
+
+    /// Copy constructor
+    Vec4(const Vec4<T>& val) : x(val.x), y(val.y), z(val.z), w(val.w) {};
+
+    /// Single value constructor.  Sets all components to the given value
+    Vec4(const T& v) : x(v), y(v), z(v), w(v) {};
+
+    /// Array constructor.  Assumes a 4-component array
+    Vec4(const T* v) : x(v[0]), y(v[1]), z(v[2]), w(v[3]) {};
+
+    // *****************************************
+    //     Conversions/Assignment/Indexing
+    // *****************************************
+
+    /// cast to T*
+    operator const T* () const { return (const T*)this; };
+
+    /// cast to T*
+    operator T* () { return (T*)this; };
+
+    /// Assignment
+    const Vec4<T>& operator=(const Vec4<T>& rhs) { x = rhs.x; y = rhs.y; z = rhs.z;  w = rhs.w; return *this; };
+
+    // *****************************************
+    //    Comparison
+    // *****************************************
+
+    /// Equality comparison
+    bool operator==(const Vec4<T>& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w); };
+
+    /// Inequality comparision
+    bool operator!=(const Vec4<T>& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z || w != rhs.w); };
+
+    // *****************************************
+    //    Arithmetic
+    // *****************************************
+
+    /// Addition
+    const Vec4<T> operator+(const Vec4<T>& rhs) const { return Vec4<T>(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); };
+
+    /// Subtraction
+    const Vec4<T> operator-(const Vec4<T>& rhs) const { return Vec4<T>(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); };
+
+    /// Multiply by scalar
+    const Vec4<T> operator*(const T& v) const { return Vec4<T>(x * v, y * v, z * v, w * v); };
+
+    /// Divide by scalar
+    const Vec4<T> operator/(const T& v) const { return Vec4<T>(x / v, y / v, z / v, w / v); };
+
+    /// Divide by vector
+    const Vec4<T> operator/(const Vec4<T>& rhs) const { return Vec4<T>(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); };
+
+    /// Addition in-place
+    Vec4<T>& operator+= (const Vec4<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; };
+
+    /// Subtract in-place
+    Vec4<T>& operator-= (const Vec4<T>& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; w -= rhs.w; return *this; };
+
+    /// Scalar multiply in-place
+    Vec4<T>& operator*= (const T& v) { x *= v; y *= v; z *= v; w *= v; return *this; };
+
+    /// Scalar divide in-place
+    Vec4<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; w /= v; return *this; };
+};
+
+#include <stdio.h>
+#include "xmmintrin.h"
+#include <math.h>
+#include <float.h> 
+
+// SSE Vec4
+#ifdef _LINUX
+class CMP_SSEVec4f
+#else
+#include "intrin.h"
+class   __declspec(align(16)) CMP_SSEVec4f
+#endif
+{
+public:
+
+    union
+    {
+        __m128 vec128;          // float Vector 128 bits in total (16 Bytes) = array of 4 floats
+#ifdef _LINUX
+        float f32[4];
+#endif
+    };
+
+    // constructors
+    inline CMP_SSEVec4f() {};
+    inline CMP_SSEVec4f(float x, float y, float z, float w) : vec128(_mm_setr_ps(x, y, z, w)) {};
+    inline CMP_SSEVec4f(__m128 vec) : vec128(vec) {}
+    inline CMP_SSEVec4f(const float* data) : vec128(_mm_load_ps(data)) {};
+    inline CMP_SSEVec4f(float scalar) : vec128(_mm_load1_ps(&scalar)) {};
+
+    // copy and assignment
+    inline CMP_SSEVec4f(const CMP_SSEVec4f& init) : vec128(init.vec128) {};
+    inline const CMP_SSEVec4f& operator=(const CMP_SSEVec4f& lhs) { vec128 = lhs.vec128; return *this; };
+
+    // conversion to m128 type for direct use in _mm intrinsics
+    inline operator __m128() { return vec128; };
+    inline operator const __m128() const { return vec128; };
+
+    // indexing
+#ifdef _LINUX
+    inline const float& operator[](int i) const { return f32[i]; };
+    inline float& operator[](int i) { return f32[i]; };
+#else
+    inline const float& operator[](int i) const { return vec128.m128_f32[i]; };
+    inline float& operator[](int i) { return vec128.m128_f32[i]; };
+#endif
+
+    // addition
+    inline CMP_SSEVec4f operator+(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_add_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f& operator+=(const CMP_SSEVec4f& rhs) { vec128 = _mm_add_ps(vec128, rhs.vec128); return *this; };
+
+    // multiplication
+    inline CMP_SSEVec4f operator*(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_mul_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f& operator*=(const CMP_SSEVec4f& rhs) { vec128 = _mm_mul_ps(vec128, rhs.vec128); return *this; };
+
+    // scalar multiplication
+    //inline CMP_SSEVec4f operator*( float rhs ) const { return CMP_SSEVec4f( _mm_mul_ps(vec128, _mm_load1_ps(&rhs)) ); };
+    //inline CMP_SSEVec4f& operator*=( float rhs )  { vec128 = _mm_mul_ps(vec128, _mm_load1_ps(&rhs)); return *this; };
+
+
+    // subtraction
+    inline CMP_SSEVec4f operator-(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_sub_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f& operator-= (const CMP_SSEVec4f& rhs) { vec128 = _mm_sub_ps(vec128, rhs.vec128); return *this; };
+
+    // division
+    inline CMP_SSEVec4f operator/(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_div_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f& operator/= (const CMP_SSEVec4f& rhs) { vec128 = _mm_div_ps(vec128, rhs.vec128); return *this; };
+
+    // scalar division
+    inline CMP_SSEVec4f operator/(float rhs)   const { return CMP_SSEVec4f(_mm_div_ps(vec128, _mm_load1_ps(&rhs))); };
+    inline CMP_SSEVec4f& operator/=(float rhs) { vec128 = _mm_div_ps(vec128, _mm_load1_ps(&rhs)); return *this; };
+
+    // comparison
+    // these return 0 or 0xffffffff in each component
+    inline CMP_SSEVec4f operator< (const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmplt_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator> (const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpgt_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator<=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmple_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator>=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpge_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator==(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpeq_ps(vec128, rhs.vec128)); };
+
+    // bitwise operators
+    inline CMP_SSEVec4f operator|(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_or_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator&(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_and_ps(vec128, rhs.vec128)); };
+    inline CMP_SSEVec4f operator^(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_xor_ps(vec128, rhs.vec128)); };
+    inline const CMP_SSEVec4f& operator|=(const CMP_SSEVec4f& rhs) { vec128 = _mm_or_ps(vec128, rhs.vec128); return *this; };
+    inline const CMP_SSEVec4f& operator&=(const CMP_SSEVec4f& rhs) { vec128 = _mm_and_ps(vec128, rhs.vec128); return *this; };
+
+    // for some horrible reason,there's no bitwise not instruction for SSE,
+    // so we have to do xor with 0xfffffff in order to fake it.
+    // TO get a 0xffffffff, we execute 0=0
+    inline CMP_SSEVec4f operator~() const
+    {
+        __m128 zero = _mm_setzero_ps();
+        __m128 is_true = _mm_cmpeq_ps(zero, zero);
+        return _mm_xor_ps(is_true, vec128);
+    };
+
+};
+
+typedef Vec4<float>             CMP_Vec4f;
+typedef Vec4<double>            CMP_Vec4d;
+typedef Vec4<int>               CMP_Vec4i;
+typedef Vec4<unsigned int>      CMP_Vec4ui;         // unsigned 16 bit x,y,x,w
+typedef Vec4<unsigned char>     CMP_Vec4uc;         // unsigned 8  bit x,y,x,w
+
+typedef Vec4<unsigned char>     CGU_Vec4uc;         // unsigned 8  bit x,y,x,w
+typedef Vec4<unsigned char>     CGV_Vec4uc;         // unsigned 8  bit x,y,x,w
+
+#endif // not ASPM_GPU
+
+#endif // Header Guard
+
--- a/extern/CMP_Core/test/BlockConstants.h
+++ b/extern/CMP_Core/test/BlockConstants.h
@ -0,0 +1,228 @@
+#ifndef BLOCKCONSTANTS_H
+#define BLOCKCONSTANTS_H
+#include <string>
+#include <unordered_map>
+struct Block { const unsigned char* data; const unsigned char* color; };
+
+static const unsigned char BC1_Red_Ignore_Alpha [] {0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_White_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Black_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Green_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_White_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Ignore_Alpha [] {0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Black_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Red_Green_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Blue_Ignore_Alpha [] {0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_White_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Black_Ignore_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Red_Blue_Ignore_Alpha [] {0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Red_Green_Ignore_Alpha [] {0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Green_Blue_Ignore_Alpha [] {0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC1_Red_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC1_Green_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const unsigned char BC2_Red_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_White_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Black_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Green_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_White_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Black_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Green_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_White_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Black_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Red_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC2_Green_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_White_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Black_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_White_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_White_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Black_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Red_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
+static const unsigned char BC3_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
+
+Block BC1_Red_Ignore_Alpha_Block = {BC1_Red_Ignore_Alpha, nullptr};
+Block BC1_Blue_Half_Alpha_Block = {BC1_Blue_Half_Alpha, nullptr};
+Block BC1_White_Half_Alpha_Block = {BC1_White_Half_Alpha, nullptr};
+Block BC1_Black_Half_Alpha_Block = {BC1_Black_Half_Alpha, nullptr};
+Block BC1_Red_Blue_Half_Alpha_Block = {BC1_Red_Blue_Half_Alpha, nullptr};
+Block BC1_Red_Green_Half_Alpha_Block = {BC1_Red_Green_Half_Alpha, nullptr};
+Block BC1_Green_Blue_Half_Alpha_Block = {BC1_Green_Blue_Half_Alpha, nullptr};
+Block BC1_Red_Full_Alpha_Block = {BC1_Red_Full_Alpha, nullptr};
+Block BC1_Green_Full_Alpha_Block = {BC1_Green_Full_Alpha, nullptr};
+Block BC1_Blue_Full_Alpha_Block = {BC1_Blue_Full_Alpha, nullptr};
+Block BC1_White_Full_Alpha_Block = {BC1_White_Full_Alpha, nullptr};
+Block BC1_Green_Ignore_Alpha_Block = {BC1_Green_Ignore_Alpha, nullptr};
+Block BC1_Black_Full_Alpha_Block = {BC1_Black_Full_Alpha, nullptr};
+Block BC1_Red_Blue_Full_Alpha_Block = {BC1_Red_Blue_Full_Alpha, nullptr};
+Block BC1_Red_Green_Full_Alpha_Block = {BC1_Red_Green_Full_Alpha, nullptr};
+Block BC1_Green_Blue_Full_Alpha_Block = {BC1_Green_Blue_Full_Alpha, nullptr};
+Block BC1_Blue_Ignore_Alpha_Block = {BC1_Blue_Ignore_Alpha, nullptr};
+Block BC1_White_Ignore_Alpha_Block = {BC1_White_Ignore_Alpha, nullptr};
+Block BC1_Black_Ignore_Alpha_Block = {BC1_Black_Ignore_Alpha, nullptr};
+Block BC1_Red_Blue_Ignore_Alpha_Block = {BC1_Red_Blue_Ignore_Alpha, nullptr};
+Block BC1_Red_Green_Ignore_Alpha_Block = {BC1_Red_Green_Ignore_Alpha, nullptr};
+Block BC1_Green_Blue_Ignore_Alpha_Block = {BC1_Green_Blue_Ignore_Alpha, nullptr};
+Block BC1_Red_Half_Alpha_Block = {BC1_Red_Half_Alpha, nullptr};
+Block BC1_Green_Half_Alpha_Block = {BC1_Green_Half_Alpha, nullptr};
+Block BC2_Red_Ignore_Alpha_Block = {BC2_Red_Ignore_Alpha, nullptr};
+Block BC2_Blue_Half_Alpha_Block = {BC2_Blue_Half_Alpha, nullptr};
+Block BC2_White_Half_Alpha_Block = {BC2_White_Half_Alpha, nullptr};
+Block BC2_Black_Half_Alpha_Block = {BC2_Black_Half_Alpha, nullptr};
+Block BC2_Red_Blue_Half_Alpha_Block = {BC2_Red_Blue_Half_Alpha, nullptr};
+Block BC2_Red_Green_Half_Alpha_Block = {BC2_Red_Green_Half_Alpha, nullptr};
+Block BC2_Green_Blue_Half_Alpha_Block = {BC2_Green_Blue_Half_Alpha, nullptr};
+Block BC2_Red_Full_Alpha_Block = {BC2_Red_Full_Alpha, nullptr};
+Block BC2_Green_Full_Alpha_Block = {BC2_Green_Full_Alpha, nullptr};
+Block BC2_Blue_Full_Alpha_Block = {BC2_Blue_Full_Alpha, nullptr};
+Block BC2_White_Full_Alpha_Block = {BC2_White_Full_Alpha, nullptr};
+Block BC2_Green_Ignore_Alpha_Block = {BC2_Green_Ignore_Alpha, nullptr};
+Block BC2_Black_Full_Alpha_Block = {BC2_Black_Full_Alpha, nullptr};
+Block BC2_Red_Blue_Full_Alpha_Block = {BC2_Red_Blue_Full_Alpha, nullptr};
+Block BC2_Red_Green_Full_Alpha_Block = {BC2_Red_Green_Full_Alpha, nullptr};
+Block BC2_Green_Blue_Full_Alpha_Block = {BC2_Green_Blue_Full_Alpha, nullptr};
+Block BC2_Blue_Ignore_Alpha_Block = {BC2_Blue_Ignore_Alpha, nullptr};
+Block BC2_White_Ignore_Alpha_Block = {BC2_White_Ignore_Alpha, nullptr};
+Block BC2_Black_Ignore_Alpha_Block = {BC2_Black_Ignore_Alpha, nullptr};
+Block BC2_Red_Blue_Ignore_Alpha_Block = {BC2_Red_Blue_Ignore_Alpha, nullptr};
+Block BC2_Red_Green_Ignore_Alpha_Block = {BC2_Red_Green_Ignore_Alpha, nullptr};
+Block BC2_Green_Blue_Ignore_Alpha_Block = {BC2_Green_Blue_Ignore_Alpha, nullptr};
+Block BC2_Red_Half_Alpha_Block = {BC2_Red_Half_Alpha, nullptr};
+Block BC2_Green_Half_Alpha_Block = {BC2_Green_Half_Alpha, nullptr};
+Block BC3_Red_Ignore_Alpha_Block = {BC3_Red_Ignore_Alpha, nullptr};
+Block BC3_Blue_Half_Alpha_Block = {BC3_Blue_Half_Alpha, nullptr};
+Block BC3_White_Half_Alpha_Block = {BC3_White_Half_Alpha, nullptr};
+Block BC3_Black_Half_Alpha_Block = {BC3_Black_Half_Alpha, nullptr};
+Block BC3_Red_Blue_Half_Alpha_Block = {BC3_Red_Blue_Half_Alpha, nullptr};
+Block BC3_Red_Green_Half_Alpha_Block = {BC3_Red_Green_Half_Alpha, nullptr};
+Block BC3_Green_Blue_Half_Alpha_Block = {BC3_Green_Blue_Half_Alpha, nullptr};
+Block BC3_Red_Full_Alpha_Block = {BC3_Red_Full_Alpha, nullptr};
+Block BC3_Green_Full_Alpha_Block = {BC3_Green_Full_Alpha, nullptr};
+Block BC3_Blue_Full_Alpha_Block = {BC3_Blue_Full_Alpha, nullptr};
+Block BC3_White_Full_Alpha_Block = {BC3_White_Full_Alpha, nullptr};
+Block BC3_Green_Ignore_Alpha_Block = {BC3_Green_Ignore_Alpha, nullptr};
+Block BC3_Black_Full_Alpha_Block = {BC3_Black_Full_Alpha, nullptr};
+Block BC3_Red_Blue_Full_Alpha_Block = {BC3_Red_Blue_Full_Alpha, nullptr};
+Block BC3_Red_Green_Full_Alpha_Block = {BC3_Red_Green_Full_Alpha, nullptr};
+Block BC3_Green_Blue_Full_Alpha_Block = {BC3_Green_Blue_Full_Alpha, nullptr};
+Block BC3_Blue_Ignore_Alpha_Block = {BC3_Blue_Ignore_Alpha, nullptr};
+Block BC3_White_Ignore_Alpha_Block = {BC3_White_Ignore_Alpha, nullptr};
+Block BC3_Black_Ignore_Alpha_Block = {BC3_Black_Ignore_Alpha, nullptr};
+Block BC3_Red_Blue_Ignore_Alpha_Block = {BC3_Red_Blue_Ignore_Alpha, nullptr};
+Block BC3_Red_Green_Ignore_Alpha_Block = {BC3_Red_Green_Ignore_Alpha, nullptr};
+Block BC3_Green_Blue_Ignore_Alpha_Block = {BC3_Green_Blue_Ignore_Alpha, nullptr};
+Block BC3_Red_Half_Alpha_Block = {BC3_Red_Half_Alpha, nullptr};
+Block BC3_Green_Half_Alpha_Block = {BC3_Green_Half_Alpha, nullptr};
+
+static std::unordered_map<std::string, Block> blocks {
+	{ "BC1_Red_Ignore_Alpha", BC1_Red_Ignore_Alpha_Block},
+	{ "BC1_Blue_Half_Alpha", BC1_Blue_Half_Alpha_Block},
+	{ "BC1_White_Half_Alpha", BC1_White_Half_Alpha_Block},
+	{ "BC1_Black_Half_Alpha", BC1_Black_Half_Alpha_Block},
+	{ "BC1_Red_Blue_Half_Alpha", BC1_Red_Blue_Half_Alpha_Block},
+	{ "BC1_Red_Green_Half_Alpha", BC1_Red_Green_Half_Alpha_Block},
+	{ "BC1_Green_Blue_Half_Alpha", BC1_Green_Blue_Half_Alpha_Block},
+	{ "BC1_Red_Full_Alpha", BC1_Red_Full_Alpha_Block},
+	{ "BC1_Green_Full_Alpha", BC1_Green_Full_Alpha_Block},
+	{ "BC1_Blue_Full_Alpha", BC1_Blue_Full_Alpha_Block},
+	{ "BC1_White_Full_Alpha", BC1_White_Full_Alpha_Block},
+	{ "BC1_Green_Ignore_Alpha", BC1_Green_Ignore_Alpha_Block},
+	{ "BC1_Black_Full_Alpha", BC1_Black_Full_Alpha_Block},
+	{ "BC1_Red_Blue_Full_Alpha", BC1_Red_Blue_Full_Alpha_Block},
+	{ "BC1_Red_Green_Full_Alpha", BC1_Red_Green_Full_Alpha_Block},
+	{ "BC1_Green_Blue_Full_Alpha", BC1_Green_Blue_Full_Alpha_Block},
+	{ "BC1_Blue_Ignore_Alpha", BC1_Blue_Ignore_Alpha_Block},
+	{ "BC1_White_Ignore_Alpha", BC1_White_Ignore_Alpha_Block},
+	{ "BC1_Black_Ignore_Alpha", BC1_Black_Ignore_Alpha_Block},
+	{ "BC1_Red_Blue_Ignore_Alpha", BC1_Red_Blue_Ignore_Alpha_Block},
+	{ "BC1_Red_Green_Ignore_Alpha", BC1_Red_Green_Ignore_Alpha_Block},
+	{ "BC1_Green_Blue_Ignore_Alpha", BC1_Green_Blue_Ignore_Alpha_Block},
+	{ "BC1_Red_Half_Alpha", BC1_Red_Half_Alpha_Block},
+	{ "BC1_Green_Half_Alpha", BC1_Green_Half_Alpha_Block},
+	{ "BC2_Red_Ignore_Alpha", BC2_Red_Ignore_Alpha_Block},
+	{ "BC2_Blue_Half_Alpha", BC2_Blue_Half_Alpha_Block},
+	{ "BC2_White_Half_Alpha", BC2_White_Half_Alpha_Block},
+	{ "BC2_Black_Half_Alpha", BC2_Black_Half_Alpha_Block},
+	{ "BC2_Red_Blue_Half_Alpha", BC2_Red_Blue_Half_Alpha_Block},
+	{ "BC2_Red_Green_Half_Alpha", BC2_Red_Green_Half_Alpha_Block},
+	{ "BC2_Green_Blue_Half_Alpha", BC2_Green_Blue_Half_Alpha_Block},
+	{ "BC2_Red_Full_Alpha", BC2_Red_Full_Alpha_Block},
+	{ "BC2_Green_Full_Alpha", BC2_Green_Full_Alpha_Block},
+	{ "BC2_Blue_Full_Alpha", BC2_Blue_Full_Alpha_Block},
+	{ "BC2_White_Full_Alpha", BC2_White_Full_Alpha_Block},
+	{ "BC2_Green_Ignore_Alpha", BC2_Green_Ignore_Alpha_Block},
+	{ "BC2_Black_Full_Alpha", BC2_Black_Full_Alpha_Block},
+	{ "BC2_Red_Blue_Full_Alpha", BC2_Red_Blue_Full_Alpha_Block},
+	{ "BC2_Red_Green_Full_Alpha", BC2_Red_Green_Full_Alpha_Block},
+	{ "BC2_Green_Blue_Full_Alpha", BC2_Green_Blue_Full_Alpha_Block},
+	{ "BC2_Blue_Ignore_Alpha", BC2_Blue_Ignore_Alpha_Block},
+	{ "BC2_White_Ignore_Alpha", BC2_White_Ignore_Alpha_Block},
+	{ "BC2_Black_Ignore_Alpha", BC2_Black_Ignore_Alpha_Block},
+	{ "BC2_Red_Blue_Ignore_Alpha", BC2_Red_Blue_Ignore_Alpha_Block},
+	{ "BC2_Red_Green_Ignore_Alpha", BC2_Red_Green_Ignore_Alpha_Block},
+	{ "BC2_Green_Blue_Ignore_Alpha", BC2_Green_Blue_Ignore_Alpha_Block},
+	{ "BC2_Red_Half_Alpha", BC2_Red_Half_Alpha_Block},
+	{ "BC2_Green_Half_Alpha", BC2_Green_Half_Alpha_Block},
+	{ "BC3_Red_Ignore_Alpha", BC3_Red_Ignore_Alpha_Block},
+	{ "BC3_Blue_Half_Alpha", BC3_Blue_Half_Alpha_Block},
+	{ "BC3_White_Half_Alpha", BC3_White_Half_Alpha_Block},
+	{ "BC3_Black_Half_Alpha", BC3_Black_Half_Alpha_Block},
+	{ "BC3_Red_Blue_Half_Alpha", BC3_Red_Blue_Half_Alpha_Block},
+	{ "BC3_Red_Green_Half_Alpha", BC3_Red_Green_Half_Alpha_Block},
+	{ "BC3_Green_Blue_Half_Alpha", BC3_Green_Blue_Half_Alpha_Block},
+	{ "BC3_Red_Full_Alpha", BC3_Red_Full_Alpha_Block},
+	{ "BC3_Green_Full_Alpha", BC3_Green_Full_Alpha_Block},
+	{ "BC3_Blue_Full_Alpha", BC3_Blue_Full_Alpha_Block},
+	{ "BC3_White_Full_Alpha", BC3_White_Full_Alpha_Block},
+	{ "BC3_Green_Ignore_Alpha", BC3_Green_Ignore_Alpha_Block},
+	{ "BC3_Black_Full_Alpha", BC3_Black_Full_Alpha_Block},
+	{ "BC3_Red_Blue_Full_Alpha", BC3_Red_Blue_Full_Alpha_Block},
+	{ "BC3_Red_Green_Full_Alpha", BC3_Red_Green_Full_Alpha_Block},
+	{ "BC3_Green_Blue_Full_Alpha", BC3_Green_Blue_Full_Alpha_Block},
+	{ "BC3_Blue_Ignore_Alpha", BC3_Blue_Ignore_Alpha_Block},
+	{ "BC3_White_Ignore_Alpha", BC3_White_Ignore_Alpha_Block},
+	{ "BC3_Black_Ignore_Alpha", BC3_Black_Ignore_Alpha_Block},
+	{ "BC3_Red_Blue_Ignore_Alpha", BC3_Red_Blue_Ignore_Alpha_Block},
+	{ "BC3_Red_Green_Ignore_Alpha", BC3_Red_Green_Ignore_Alpha_Block},
+	{ "BC3_Green_Blue_Ignore_Alpha", BC3_Green_Blue_Ignore_Alpha_Block},
+	{ "BC3_Red_Half_Alpha", BC3_Red_Half_Alpha_Block},
+	{ "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block}
+};
+
+#endif
--- a/extern/CMP_Core/test/CMakeLists.txt
+++ b/extern/CMP_Core/test/CMakeLists.txt
@ -0,0 +1,13 @@
+cmake_minimum_required(VERSION 3.5)
+project(CMP_Core_Tests)
+
+add_executable(Tests TestsMain.cpp)
+add_subdirectory(../../../Common/Lib/Ext/Catch2
+                Common/Lib/Ext/Catch2/bin)
+target_sources(Tests 
+                PRIVATE
+                CompressonatorTests.cpp
+                CompressonatorTests.h
+                BlockConstants.h
+                )
+target_link_libraries(Tests Catch2::Catch2 CMP_Core)
--- a/extern/CMP_Core/test/CompressonatorTests.cpp
+++ b/extern/CMP_Core/test/CompressonatorTests.cpp
--- a/extern/CMP_Core/test/CompressonatorTests.h
+++ b/extern/CMP_Core/test/CompressonatorTests.h
@ -0,0 +1,6 @@
+#ifndef COMPRESSONATOR_TESTS_H
+#define COMPRESSONATOR_TESTS_H
+
+void AssignExpectedColorsToBlocks();
+
+#endif
--- a/extern/CMP_Core/test/TestsMain.cpp
+++ b/extern/CMP_Core/test/TestsMain.cpp
@ -0,0 +1,10 @@
+#define CATCH_CONFIG_RUNNER
+#include "../../../Common/Lib/Ext/Catch2/catch.hpp"
+#include "CompressonatorTests.h"
+
+int main(int argc, char* argv[]) {
+	AssignExpectedColorsToBlocks();
+	int result = Catch::Session().run(argc, argv);
+
+	return result;
+}
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@ -9,5 +9,9 @@ ADD_SUBDIRECTORY(EtcLib)
 ADD_SUBDIRECTORY(rg_etc1_v104)
 #ADD_SUBDIRECTORY(etcpack)

-ADD_SUBDIRECTORY(butteraugli)
+#ADD_SUBDIRECTORY(butteraugli)
+
+ADD_SUBDIRECTORY(libsquish-1.15)
+
+ADD_SUBDIRECTORY(CMP_Core)

--- a/extern/libsquish-1.15/CMakeLists.txt
+++ b/extern/libsquish-1.15/CMakeLists.txt
@ -0,0 +1,117 @@
+# cmake build file for squish
+# by Stefan Roettger (snroettg@gmail.com)
+# updated by Simon Brown (si@sjbrown.co.uk)
+
+# features:
+#   uses -fopenmp when available
+#    use BUILD_SQUISH_WITH_OPENMP to override
+#   Xcode: builds universal binaries, uses SSE2 on i386 and Altivec on ppc
+#   Unix and VS: SSE2 support is enabled by default
+#    use BUILD_SQUISH_WITH_SSE2 and BUILD_SQUISH_WITH_ALTIVEC to override
+
+PROJECT(squish)
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.3)
+
+OPTION(BUILD_SQUISH_WITH_OPENMP "Build with OpenMP." ON)
+
+OPTION(BUILD_SQUISH_WITH_SSE2 "Build with SSE2." ON)
+OPTION(BUILD_SQUISH_WITH_ALTIVEC "Build with Altivec." OFF)
+
+OPTION(BUILD_SHARED_LIBS "Build shared libraries." OFF)
+
+OPTION(BUILD_SQUISH_EXTRA "Build extra source code." OFF)
+
+IF (BUILD_SQUISH_WITH_OPENMP)
+   FIND_PACKAGE(OpenMP)
+   IF (OPENMP_FOUND)
+       SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+       SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+       ADD_DEFINITIONS(-DSQUISH_USE_OPENMP)
+   ENDIF()
+ENDIF()
+
+IF (CMAKE_GENERATOR STREQUAL "Xcode")
+    SET(CMAKE_OSX_ARCHITECTURES "i386;ppc")
+ELSE (CMAKE_GENERATOR STREQUAL "Xcode")
+    IF (BUILD_SQUISH_WITH_SSE2 AND NOT WIN32)
+        ADD_DEFINITIONS(-DSQUISH_USE_SSE=2 -msse2)
+    ENDIF (BUILD_SQUISH_WITH_SSE2 AND NOT WIN32)
+    IF (BUILD_SQUISH_WITH_ALTIVEC AND NOT WIN32)
+        ADD_DEFINITIONS(-DSQUISH_USE_ALTIVEC=1 -maltivec)
+    ENDIF (BUILD_SQUISH_WITH_ALTIVEC AND NOT WIN32)
+ENDIF (CMAKE_GENERATOR STREQUAL "Xcode")
+
+SET(SQUISH_HDRS
+    squish.h
+    )
+
+SET(SQUISH_SRCS
+    alpha.cpp
+    alpha.h
+    clusterfit.cpp
+    clusterfit.h
+    colourblock.cpp
+    colourblock.h
+    colourfit.cpp
+    colourfit.h
+    colourset.cpp
+    colourset.h
+    maths.cpp
+    maths.h
+    rangefit.cpp
+    rangefit.h
+    simd.h
+    simd_float.h
+    simd_sse.h
+    simd_ve.h
+    singlecolourfit.cpp
+    singlecolourfit.h
+    singlecolourlookup.inl
+    squish.cpp
+    )
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+ADD_LIBRARY(squish ${SQUISH_SRCS} ${SQUISH_HDRS})
+
+SET_TARGET_PROPERTIES(
+    squish PROPERTIES
+    PUBLIC_HEADER "${SQUISH_HDRS}"
+    VERSION 0.0
+    SOVERSION 0.0
+    DEBUG_POSTFIX "d"
+    XCODE_ATTRIBUTE_GCC_PREPROCESSOR_DEFINITIONS "$(SQUISH_CPP_$(CURRENT_ARCH))"
+    XCODE_ATTRIBUTE_OTHER_CFLAGS "$(SQUISH_CFLAGS_$(CURRENT_ARCH))"
+    XCODE_ATTRIBUTE_SQUISH_CPP_i386 "SQUISH_USE_SSE=2"
+    XCODE_ATTRIBUTE_SQUISH_CFLAGS_i386 ""
+    XCODE_ATTRIBUTE_SQUISH_CPP_ppc "SQUISH_USE_ALTIVEC=1"
+    XCODE_ATTRIBUTE_SQUISH_CFLAGS_ppc "-maltivec"
+    )
+
+IF (BUILD_SQUISH_EXTRA)
+    SET(SQUISHTEST_SRCS extra/squishtest.cpp)
+
+    ADD_EXECUTABLE(squishtest ${SQUISHTEST_SRCS})
+    SET_TARGET_PROPERTIES(squishtest PROPERTIES DEBUG_POSTFIX "d")
+    TARGET_LINK_LIBRARIES(squishtest squish)
+
+    SET(SQUISHPNG_SRCS extra/squishpng.cpp)
+
+    FIND_PACKAGE(PNG)
+
+    IF (PNG_FOUND)
+        SET(CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES)
+        INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
+        ADD_EXECUTABLE(squishpng ${SQUISHPNG_SRCS})
+        SET_TARGET_PROPERTIES(squishpng PROPERTIES DEBUG_POSTFIX "d")
+        TARGET_LINK_LIBRARIES(squishpng squish ${PNG_LIBRARIES})
+    ENDIF (PNG_FOUND)
+ENDIF (BUILD_SQUISH_EXTRA)
+
+INSTALL(
+    TARGETS squish
+    LIBRARY DESTINATION lib
+    ARCHIVE DESTINATION lib
+    PUBLIC_HEADER DESTINATION include
+    )
--- a/extern/libsquish-1.15/CMakeModules/FindlibSquish.cmake
+++ b/extern/libsquish-1.15/CMakeModules/FindlibSquish.cmake
@ -0,0 +1,14 @@
+# Defines
+#  LIBSQUISH_FOUND
+#  LIBSQUISH_INCLUDE_DIR
+#  LIBSQUISH_LIBRARIES
+
+FIND_PATH(LIBSQUISH_INCLUDE_DIR squish.h PATHS . squish .. ../squish DOC "Directory containing libSquish headers")
+FIND_LIBRARY(LIBSQUISH_LIBRARY NAMES squish libsquish PATHS . squish .. ../squish PATH_SUFFIXES lib lib64 release minsizerel relwithdebinfo DOC "Path to libSquish library")
+
+SET(LIBSQUISH_LIBRARIES ${LIBSQUISH_LIBRARY})
+
+IF (LIBSQUISH_LIBRARY AND LIBSQUISH_INCLUDE_DIR)
+   SET(LIBSQUISH_FOUND TRUE)
+   MESSAGE(STATUS "Found libSquish: ${LIBSQUISH_LIBRARY}")
+ENDIF (LIBSQUISH_LIBRARY AND LIBSQUISH_INCLUDE_DIR)
--- a/extern/libsquish-1.15/ChangeLog.txt
+++ b/extern/libsquish-1.15/ChangeLog.txt
@ -0,0 +1,66 @@
+1.15
+* parallel compression using openmp with cmake (Marian Krivos / Stefan Roettger)
+* parallel decompression using openmp with cmake (Stefan Roettger)
+
+1.14
+* backport BGRA support
+* backport BC4 and BC5 support
+* backport BlockMSE support
+
+1.11-1.13
+* added support for CMake and QMake (Stefan Roettger)
+* misc. minor changes on the build system (Stefan Roettger)
+* added svg icon (Stefan Roettger)
+
+1.10
+* Iterative cluster fit is now considered to be a new compression mode
+* The core cluster fit is now 4x faster using contributions by Ignacio
+Castano from NVIDIA
+* The single colour lookup table has been halved by exploiting symmetry
+
+1.9
+* Added contributed SSE1 truncate implementation
+* Changed use of SQUISH_USE_SSE to be 1 for SSE and 2 for SSE2 instructions
+* Cluster fit is now iterative to further reduce image error
+
+1.8
+* Switched from using floor to trunc for much better SSE performance (again)
+* Xcode build now expects libpng in /usr/local for extra/squishpng
+
+1.7
+* Fixed floating-point equality issue in clusterfit sort (x86 affected only)
+* Implemented proper SSE(2) floor function for 50% speedup on SSE builds 
+* The range fit implementation now uses the correct colour metric
+
+1.6
+* Fixed bug in CompressImage where masked pixels were not skipped over
+* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels
+* Fixed major DXT1 bug that can generate unexpected transparent pixels
+
+1.5
+* Added CompressMasked function to handle incomplete DXT blocks more cleanly
+* Added kWeightColourByAlpha flag for better quality images when alpha blending
+
+1.4
+* Fixed stack overflow in rangefit
+
+1.3
+* Worked around SSE floor implementation bug, proper fix needed!
+* This release has visual studio and makefile builds that work
+
+1.2
+* Added provably optimal single colour compressor
+* Added extra/squishgen.cpp that generates single colour lookup tables
+
+1.1
+* Fixed a DXT1 colour output bug
+* Changed argument order for Decompress function to match Compress
+* Added GetStorageRequirements function
+* Added CompressImage function
+* Added DecompressImage function
+* Moved squishtool.cpp to extra/squishpng.cpp
+* Added extra/squishtest.cpp
+
+1.0
+* Initial release
+
--- a/extern/libsquish-1.15/Doxyfile
+++ b/extern/libsquish-1.15/Doxyfile
@ -0,0 +1,214 @@
+# Doxyfile 1.4.6
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = squish
+PROJECT_NUMBER         = 1.14
+OUTPUT_DIRECTORY       = docs 
+CREATE_SUBDIRS         = NO
+OUTPUT_LANGUAGE        = English
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ABBREVIATE_BRIEF       = 
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = YES
+STRIP_FROM_PATH        = 
+STRIP_FROM_INC_PATH    = 
+SHORT_NAMES            = NO
+JAVADOC_AUTOBRIEF      = NO
+MULTILINE_CPP_IS_BRIEF = NO
+INHERIT_DOCS           = YES
+SEPARATE_MEMBER_PAGES  = NO
+TAB_SIZE               = 4
+ALIASES                = 
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+BUILTIN_STL_SUPPORT    = NO
+DISTRIBUTE_GROUP_DOC   = NO
+SUBGROUPING            = YES
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = NO
+EXTRACT_LOCAL_CLASSES  = YES
+EXTRACT_LOCAL_METHODS  = NO
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = NO
+HIDE_SCOPE_NAMES       = NO
+SHOW_INCLUDE_FILES     = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+SORT_BRIEF_DOCS        = NO
+SORT_BY_SCOPE_NAME     = NO
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ENABLED_SECTIONS       = 
+MAX_INITIALIZER_LINES  = 30
+SHOW_USED_FILES        = YES
+FILE_VERSION_FILTER    = 
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = YES
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_NO_PARAMDOC       = NO
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           = 
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = squish.h
+FILE_PATTERNS          = 
+RECURSIVE              = NO
+EXCLUDE                = 
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       = 
+EXAMPLE_PATH           = 
+EXAMPLE_PATTERNS       = 
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = 
+INPUT_FILTER           = 
+FILTER_PATTERNS        = 
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+USE_HTAGS              = NO
+VERBATIM_HEADERS       = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = NO
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          = 
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            = 
+HTML_FOOTER            = 
+HTML_STYLESHEET        = 
+GENERATE_HTMLHELP      = NO
+CHM_FILE               = 
+HHC_LOCATION           = 
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = NO
+TREEVIEW_WIDTH         = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = NO
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         = latex
+MAKEINDEX_CMD_NAME     = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         = 
+LATEX_HEADER           = 
+PDF_HYPERLINKS         = NO
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = NO
+LATEX_HIDE_INDICES     = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = NO
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    = 
+RTF_EXTENSIONS_FILE    = 
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = NO
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .3
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = NO
+XML_OUTPUT             = xml
+XML_PROGRAMLISTING     = YES
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+GENERATE_PERLMOD       = NO
+PERLMOD_LATEX          = NO
+PERLMOD_PRETTY         = YES
+PERLMOD_MAKEVAR_PREFIX = 
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = 
+INCLUDE_FILE_PATTERNS  = 
+PREDEFINED             = 
+EXPAND_AS_DEFINED      = 
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references   
+#---------------------------------------------------------------------------
+TAGFILES               = 
+GENERATE_TAGFILE       = 
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = NO
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+GROUP_GRAPHS           = YES
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+GRAPHICAL_HIERARCHY    = YES
+DIRECTORY_GRAPH        = YES
+DOT_IMAGE_FORMAT       = png
+DOTFILE_DIRS           = 
+MAX_DOT_GRAPH_DEPTH    = 0
+DOT_TRANSPARENT        = NO
+DOT_MULTI_TARGETS      = NO
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine   
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
--- a/extern/libsquish-1.15/LICENSE.txt
+++ b/extern/libsquish-1.15/LICENSE.txt
@ -0,0 +1,20 @@
+	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+	Permission is hereby granted, free of charge, to any person obtaining
+	a copy of this software and associated documentation files (the 
+	"Software"), to	deal in the Software without restriction, including
+	without limitation the rights to use, copy, modify, merge, publish,
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+
+	The above copyright notice and this permission notice shall be included
+	in all copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/extern/libsquish-1.15/Makefile
+++ b/extern/libsquish-1.15/Makefile
@ -0,0 +1,65 @@
+include config
+
+VER = 1.15
+SOVER = 0
+
+SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp
+
+HDR = alpha.h clusterfit.h colourblock.h colourfit.h colourset.h maths.h rangefit.h singlecolourfit.h squish.h
+HDR += config.h simd.h simd_float.h simd_sse.h simd_ve.h singlecolourlookup.inl
+
+OBJ = $(SRC:%.cpp=%.o)
+
+SOLIB = libsquish.so.$(SOVER)
+LIB = $(SOLIB).0
+CPPFLAGS += -fPIC
+LIBA = libsquish.a
+
+.PHONY: all install uninstall docs tgz clean
+
+all: $(LIB) $(LIBA) docs libsquish.pc
+
+install: $(LIB) $(LIBA) libsquish.pc
+	$(INSTALL_DIRECTORY) $(INSTALL_DIR)/include $(INSTALL_DIR)/$(LIB_PATH)
+	$(INSTALL_FILE) squish.h $(INSTALL_DIR)/include
+	$(INSTALL_FILE) $(LIBA) $(INSTALL_DIR)/$(LIB_PATH)
+ifneq ($(USE_SHARED),0)
+	$(INSTALL_FILE) $(LIB) $(INSTALL_DIR)/$(LIB_PATH)
+	ln -s $(LIB) $(INSTALL_DIR)/$(LIB_PATH)/$(SOLIB)
+	ln -s $(LIB) $(INSTALL_DIR)/$(LIB_PATH)/libsquish.so
+	$(INSTALL_DIRECTORY) $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig
+	$(INSTALL_FILE) libsquish.pc $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig
+endif
+
+uninstall:
+	$(RM) $(INSTALL_DIR)/include/squish.h
+	$(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(LIBA)
+	-$(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(LIB)
+	-$(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(SOLIB)
+	-$(RM) $(INSTALL_DIR)/$(LIB_PATH)/libsquish.so
+	-$(RM) $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig/libsquish.pc
+
+$(LIB): $(OBJ)
+ifneq ($(USE_SHARED),0)
+	$(CXX) $(LDFLAGS) -shared -Wl,-soname,$(SOLIB) -o $@ $(OBJ)
+endif
+
+$(LIBA): $(OBJ)
+	$(AR) cr $@ $?
+	@ranlib $@
+
+docs: $(SRC) $(HDR)
+	@if [ -x "`command -v doxygen`" ]; then doxygen; fi
+
+libsquish.pc: libsquish.pc.in
+	@sed 's|@PREFIX@|$(PREFIX)|;s|@LIB_PATH@|$(LIB_PATH)|' $@.in > $@
+
+tgz: clean
+	tar zcf libsquish-$(VER).tgz $(SRC) $(HDR) Makefile config CMakeLists.txt CMakeModules libSquish.* README.txt LICENSE.txt ChangeLog.txt Doxyfile libsquish.pc.in extra --exclude \*.svn\*
+
+%.o: %.cpp
+	$(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o $@ -c $<
+
+clean:
+	$(RM) $(OBJ) $(LIB) $(LIBA) libsquish.pc
+	@-$(RM) -rf docs
--- a/extern/libsquish-1.15/README.txt
+++ b/extern/libsquish-1.15/README.txt
@ -0,0 +1,18 @@
+LICENSE
+-------
+
+The squish library is distributed under the terms and conditions of the MIT
+license. This license is specified at the top of each source file and must be
+preserved in its entirety.
+
+BUILDING AND INSTALLING THE LIBRARY
+-----------------------------------
+
+The preferred way to install the library on Unix/Mac (and Windows) is via cmake:
+ cmake . && make && sudo make install
+
+REPORTING BUGS OR FEATURE REQUESTS
+----------------------------------
+
+Feedback can be sent to Simon Brown (the developer) at si@sjbrown.co.uk
+Feedback can also be sent to Stefan Roettger (the maintainer) at snroettg@gmail.com
--- a/extern/libsquish-1.15/alpha.cpp
+++ b/extern/libsquish-1.15/alpha.cpp
@ -0,0 +1,350 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include "alpha.h"
+
+#include <climits>
+#include <algorithm>
+
+namespace squish {
+
+static int FloatToInt( float a, int limit )
+{
+    // use ANSI round-to-zero behaviour to get round-to-nearest
+    int i = ( int )( a + 0.5f );
+
+    // clamp to the limit
+    if( i < 0 )
+        i = 0;
+    else if( i > limit )
+        i = limit;
+
+    // done
+    return i;
+}
+
+void CompressAlphaDxt3( u8 const* rgba, int mask, void* block )
+{
+    u8* bytes = reinterpret_cast< u8* >( block );
+
+    // quantise and pack the alpha values pairwise
+    for( int i = 0; i < 8; ++i )
+    {
+        // quantise down to 4 bits
+        float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
+        float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
+        int quant1 = FloatToInt( alpha1, 15 );
+        int quant2 = FloatToInt( alpha2, 15 );
+
+        // set alpha to zero where masked
+        int bit1 = 1 << ( 2*i );
+        int bit2 = 1 << ( 2*i + 1 );
+        if( ( mask & bit1 ) == 0 )
+            quant1 = 0;
+        if( ( mask & bit2 ) == 0 )
+            quant2 = 0;
+
+        // pack into the byte
+        bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
+    }
+}
+
+void DecompressAlphaDxt3( u8* rgba, void const* block )
+{
+    u8 const* bytes = reinterpret_cast< u8 const* >( block );
+
+    // unpack the alpha values pairwise
+    for( int i = 0; i < 8; ++i )
+    {
+        // quantise down to 4 bits
+        u8 quant = bytes[i];
+
+        // unpack the values
+        u8 lo = quant & 0x0f;
+        u8 hi = quant & 0xf0;
+
+        // convert back up to bytes
+        rgba[8*i + 3] = lo | ( lo << 4 );
+        rgba[8*i + 7] = hi | ( hi >> 4 );
+    }
+}
+
+static void FixRange( int& min, int& max, int steps )
+{
+    if( max - min < steps )
+        max = std::min( min + steps, 255 );
+    if( max - min < steps )
+        min = std::max( 0, max - steps );
+}
+
+static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
+{
+    // fit each alpha value to the codebook
+    int err = 0;
+    for( int i = 0; i < 16; ++i )
+    {
+        // check this pixel is valid
+        int bit = 1 << i;
+        if( ( mask & bit ) == 0 )
+        {
+            // use the first code
+            indices[i] = 0;
+            continue;
+        }
+
+        // find the least error and corresponding index
+        int value = rgba[4*i + 3];
+        int least = INT_MAX;
+        int index = 0;
+        for( int j = 0; j < 8; ++j )
+        {
+            // get the squared error from this code
+            int dist = ( int )value - ( int )codes[j];
+            dist *= dist;
+
+            // compare with the best so far
+            if( dist < least )
+            {
+                least = dist;
+                index = j;
+            }
+        }
+
+        // save this index and accumulate the error
+        indices[i] = ( u8 )index;
+        err += least;
+    }
+
+    // return the total error
+    return err;
+}
+
+static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
+{
+    u8* bytes = reinterpret_cast< u8* >( block );
+
+    // write the first two bytes
+    bytes[0] = ( u8 )alpha0;
+    bytes[1] = ( u8 )alpha1;
+
+    // pack the indices with 3 bits each
+    u8* dest = bytes + 2;
+    u8 const* src = indices;
+    for( int i = 0; i < 2; ++i )
+    {
+        // pack 8 3-bit values
+        int value = 0;
+        for( int j = 0; j < 8; ++j )
+        {
+            int index = *src++;
+            value |= ( index << 3*j );
+        }
+
+        // store in 3 bytes
+        for( int j = 0; j < 3; ++j )
+        {
+            int byte = ( value >> 8*j ) & 0xff;
+            *dest++ = ( u8 )byte;
+        }
+    }
+}
+
+static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
+{
+    // check the relative values of the endpoints
+    if( alpha0 > alpha1 )
+    {
+        // swap the indices
+        u8 swapped[16];
+        for( int i = 0; i < 16; ++i )
+        {
+            u8 index = indices[i];
+            if( index == 0 )
+                swapped[i] = 1;
+            else if( index == 1 )
+                swapped[i] = 0;
+            else if( index <= 5 )
+                swapped[i] = 7 - index;
+            else
+                swapped[i] = index;
+        }
+
+        // write the block
+        WriteAlphaBlock( alpha1, alpha0, swapped, block );
+    }
+    else
+    {
+        // write the block
+        WriteAlphaBlock( alpha0, alpha1, indices, block );
+    }
+}
+
+static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
+{
+    // check the relative values of the endpoints
+    if( alpha0 < alpha1 )
+    {
+        // swap the indices
+        u8 swapped[16];
+        for( int i = 0; i < 16; ++i )
+        {
+            u8 index = indices[i];
+            if( index == 0 )
+                swapped[i] = 1;
+            else if( index == 1 )
+                swapped[i] = 0;
+            else
+                swapped[i] = 9 - index;
+        }
+
+        // write the block
+        WriteAlphaBlock( alpha1, alpha0, swapped, block );
+    }
+    else
+    {
+        // write the block
+        WriteAlphaBlock( alpha0, alpha1, indices, block );
+    }
+}
+
+void CompressAlphaDxt5( u8 const* rgba, int mask, void* block )
+{
+    // get the range for 5-alpha and 7-alpha interpolation
+    int min5 = 255;
+    int max5 = 0;
+    int min7 = 255;
+    int max7 = 0;
+    for( int i = 0; i < 16; ++i )
+    {
+        // check this pixel is valid
+        int bit = 1 << i;
+        if( ( mask & bit ) == 0 )
+            continue;
+
+        // incorporate into the min/max
+        int value = rgba[4*i + 3];
+        if( value < min7 )
+            min7 = value;
+        if( value > max7 )
+            max7 = value;
+        if( value != 0 && value < min5 )
+            min5 = value;
+        if( value != 255 && value > max5 )
+            max5 = value;
+    }
+
+    // handle the case that no valid range was found
+    if( min5 > max5 )
+        min5 = max5;
+    if( min7 > max7 )
+        min7 = max7;
+
+    // fix the range to be the minimum in each case
+    FixRange( min5, max5, 5 );
+    FixRange( min7, max7, 7 );
+
+    // set up the 5-alpha code book
+    u8 codes5[8];
+    codes5[0] = ( u8 )min5;
+    codes5[1] = ( u8 )max5;
+    for( int i = 1; i < 5; ++i )
+        codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
+    codes5[6] = 0;
+    codes5[7] = 255;
+
+    // set up the 7-alpha code book
+    u8 codes7[8];
+    codes7[0] = ( u8 )min7;
+    codes7[1] = ( u8 )max7;
+    for( int i = 1; i < 7; ++i )
+        codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
+
+    // fit the data to both code books
+    u8 indices5[16];
+    u8 indices7[16];
+    int err5 = FitCodes( rgba, mask, codes5, indices5 );
+    int err7 = FitCodes( rgba, mask, codes7, indices7 );
+
+    // save the block with least error
+    if( err5 <= err7 )
+        WriteAlphaBlock5( min5, max5, indices5, block );
+    else
+        WriteAlphaBlock7( min7, max7, indices7, block );
+}
+
+void DecompressAlphaDxt5( u8* rgba, void const* block )
+{
+    // get the two alpha values
+    u8 const* bytes = reinterpret_cast< u8 const* >( block );
+    int alpha0 = bytes[0];
+    int alpha1 = bytes[1];
+
+    // compare the values to build the codebook
+    u8 codes[8];
+    codes[0] = ( u8 )alpha0;
+    codes[1] = ( u8 )alpha1;
+    if( alpha0 <= alpha1 )
+    {
+        // use 5-alpha codebook
+        for( int i = 1; i < 5; ++i )
+            codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
+        codes[6] = 0;
+        codes[7] = 255;
+    }
+    else
+    {
+        // use 7-alpha codebook
+        for( int i = 1; i < 7; ++i )
+            codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
+    }
+
+    // decode the indices
+    u8 indices[16];
+    u8 const* src = bytes + 2;
+    u8* dest = indices;
+    for( int i = 0; i < 2; ++i )
+    {
+        // grab 3 bytes
+        int value = 0;
+        for( int j = 0; j < 3; ++j )
+        {
+            int byte = *src++;
+            value |= ( byte << 8*j );
+        }
+
+        // unpack 8 3-bit values from it
+        for( int j = 0; j < 8; ++j )
+        {
+            int index = ( value >> 3*j ) & 0x7;
+            *dest++ = ( u8 )index;
+        }
+    }
+
+    // write out the indexed codebook values
+    for( int i = 0; i < 16; ++i )
+        rgba[4*i + 3] = codes[indices[i]];
+}
+
+} // namespace squish
--- a/extern/libsquish-1.15/alpha.h
+++ b/extern/libsquish-1.15/alpha.h
@ -0,0 +1,41 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_ALPHA_H
+#define SQUISH_ALPHA_H
+
+#include "squish.h"
+
+namespace squish {
+
+void CompressAlphaDxt3( u8 const* rgba, int mask, void* block );
+void CompressAlphaDxt5( u8 const* rgba, int mask, void* block );
+
+void DecompressAlphaDxt3( u8* rgba, void const* block );
+void DecompressAlphaDxt5( u8* rgba, void const* block );
+
+} // namespace squish
+
+#endif // ndef SQUISH_ALPHA_H
--- a/extern/libsquish-1.15/clusterfit.cpp
+++ b/extern/libsquish-1.15/clusterfit.cpp
@ -0,0 +1,392 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+    Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include "clusterfit.h"
+#include "colourset.h"
+#include "colourblock.h"
+#include <cfloat>
+
+namespace squish {
+
+ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
+  : ColourFit( colours, flags )
+{
+    // set the iteration count
+    m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
+
+    // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
+    if( metric )
+        m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
+    else
+        m_metric = VEC4_CONST( 1.0f );
+
+    // initialise the best error
+    m_besterror = VEC4_CONST( FLT_MAX );
+
+    // cache some values
+    int const count = m_colours->GetCount();
+    Vec3 const* values = m_colours->GetPoints();
+
+    // get the covariance matrix
+    Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
+
+    // compute the principle component
+    m_principle = ComputePrincipleComponent( covariance );
+}
+
+bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
+{
+    // cache some values
+    int const count = m_colours->GetCount();
+    Vec3 const* values = m_colours->GetPoints();
+
+    // build the list of dot products
+    float dps[16];
+    u8* order = ( u8* )m_order + 16*iteration;
+    for( int i = 0; i < count; ++i )
+    {
+        dps[i] = Dot( values[i], axis );
+        order[i] = ( u8 )i;
+    }
+
+    // stable sort using them
+    for( int i = 0; i < count; ++i )
+    {
+        for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
+        {
+            std::swap( dps[j], dps[j - 1] );
+            std::swap( order[j], order[j - 1] );
+        }
+    }
+
+    // check this ordering is unique
+    for( int it = 0; it < iteration; ++it )
+    {
+        u8 const* prev = ( u8* )m_order + 16*it;
+        bool same = true;
+        for( int i = 0; i < count; ++i )
+        {
+            if( order[i] != prev[i] )
+            {
+                same = false;
+                break;
+            }
+        }
+        if( same )
+            return false;
+    }
+
+    // copy the ordering and weight all the points
+    Vec3 const* unweighted = m_colours->GetPoints();
+    float const* weights = m_colours->GetWeights();
+    m_xsum_wsum = VEC4_CONST( 0.0f );
+    for( int i = 0; i < count; ++i )
+    {
+        int j = order[i];
+        Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
+        Vec4 w( weights[j] );
+        Vec4 x = p*w;
+        m_points_weights[i] = x;
+        m_xsum_wsum += x;
+    }
+    return true;
+}
+
+void ClusterFit::Compress3( void* block )
+{
+    // declare variables
+    int const count = m_colours->GetCount();
+    Vec4 const two = VEC4_CONST( 2.0 );
+    Vec4 const one = VEC4_CONST( 1.0f );
+    Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
+    Vec4 const zero = VEC4_CONST( 0.0f );
+    Vec4 const half = VEC4_CONST( 0.5f );
+    Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+    Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
+
+    // prepare an ordering using the principle axis
+    ConstructOrdering( m_principle, 0 );
+
+    // check all possible clusters and iterate on the total order
+    Vec4 beststart = VEC4_CONST( 0.0f );
+    Vec4 bestend = VEC4_CONST( 0.0f );
+    Vec4 besterror = m_besterror;
+    u8 bestindices[16];
+    int bestiteration = 0;
+    int besti = 0, bestj = 0;
+
+    // loop over iterations (we avoid the case that all points in first or last cluster)
+    for( int iterationIndex = 0;; )
+    {
+        // first cluster [0,i) is at the start
+        Vec4 part0 = VEC4_CONST( 0.0f );
+        for( int i = 0; i < count; ++i )
+        {
+            // second cluster [i,j) is half along
+            Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
+            int jmin = ( i == 0 ) ? 1 : i;
+            for( int j = jmin;; )
+            {
+                // last cluster [j,count) is at the end
+                Vec4 part2 = m_xsum_wsum - part1 - part0;
+
+                // compute least squares terms directly
+                Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
+                Vec4 alpha2_sum = alphax_sum.SplatW();
+
+                Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
+                Vec4 beta2_sum = betax_sum.SplatW();
+
+                Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
+
+                // compute the least-squares optimal points
+                Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
+                Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
+                Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
+
+                // clamp to the grid
+                a = Min( one, Max( zero, a ) );
+                b = Min( one, Max( zero, b ) );
+                a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
+                b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
+
+                // compute the error (we skip the constant xxsum)
+                Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+                Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
+                Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
+                Vec4 e4 = MultiplyAdd( two, e3, e1 );
+
+                // apply the metric to the error term
+                Vec4 e5 = e4*m_metric;
+                Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
+
+                // keep the solution if it wins
+                if( CompareAnyLessThan( error, besterror ) )
+                {
+                    beststart = a;
+                    bestend = b;
+                    besti = i;
+                    bestj = j;
+                    besterror = error;
+                    bestiteration = iterationIndex;
+                }
+
+                // advance
+                if( j == count )
+                    break;
+                part1 += m_points_weights[j];
+                ++j;
+            }
+
+            // advance
+            part0 += m_points_weights[i];
+        }
+
+        // stop if we didn't improve in this iteration
+        if( bestiteration != iterationIndex )
+            break;
+
+        // advance if possible
+        ++iterationIndex;
+        if( iterationIndex == m_iterationCount )
+            break;
+
+        // stop if a new iteration is an ordering that has already been tried
+        Vec3 axis = ( bestend - beststart ).GetVec3();
+        if( !ConstructOrdering( axis, iterationIndex ) )
+            break;
+    }
+
+    // save the block if necessary
+    if( CompareAnyLessThan( besterror, m_besterror ) )
+    {
+        // remap the indices
+        u8 const* order = ( u8* )m_order + 16*bestiteration;
+
+        u8 unordered[16];
+        for( int m = 0; m < besti; ++m )
+            unordered[order[m]] = 0;
+        for( int m = besti; m < bestj; ++m )
+            unordered[order[m]] = 2;
+        for( int m = bestj; m < count; ++m )
+            unordered[order[m]] = 1;
+
+        m_colours->RemapIndices( unordered, bestindices );
+
+        // save the block
+        WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
+
+        // save the error
+        m_besterror = besterror;
+    }
+}
+
+void ClusterFit::Compress4( void* block )
+{
+    // declare variables
+    int const count = m_colours->GetCount();
+    Vec4 const two = VEC4_CONST( 2.0f );
+    Vec4 const one = VEC4_CONST( 1.0f );
+    Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
+    Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
+    Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
+    Vec4 const zero = VEC4_CONST( 0.0f );
+    Vec4 const half = VEC4_CONST( 0.5f );
+    Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+    Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
+
+    // prepare an ordering using the principle axis
+    ConstructOrdering( m_principle, 0 );
+
+    // check all possible clusters and iterate on the total order
+    Vec4 beststart = VEC4_CONST( 0.0f );
+    Vec4 bestend = VEC4_CONST( 0.0f );
+    Vec4 besterror = m_besterror;
+    u8 bestindices[16];
+    int bestiteration = 0;
+    int besti = 0, bestj = 0, bestk = 0;
+
+    // loop over iterations (we avoid the case that all points in first or last cluster)
+    for( int iterationIndex = 0;; )
+    {
+        // first cluster [0,i) is at the start
+        Vec4 part0 = VEC4_CONST( 0.0f );
+        for( int i = 0; i < count; ++i )
+        {
+            // second cluster [i,j) is one third along
+            Vec4 part1 = VEC4_CONST( 0.0f );
+            for( int j = i;; )
+            {
+                // third cluster [j,k) is two thirds along
+                Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
+                int kmin = ( j == 0 ) ? 1 : j;
+                for( int k = kmin;; )
+                {
+                    // last cluster [k,count) is at the end
+                    Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
+
+                    // compute least squares terms directly
+                    Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
+                    Vec4 const alpha2_sum = alphax_sum.SplatW();
+
+                    Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
+                    Vec4 const beta2_sum = betax_sum.SplatW();
+
+                    Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
+
+                    // compute the least-squares optimal points
+                    Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
+                    Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
+                    Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
+
+                    // clamp to the grid
+                    a = Min( one, Max( zero, a ) );
+                    b = Min( one, Max( zero, b ) );
+                    a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
+                    b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
+
+                    // compute the error (we skip the constant xxsum)
+                    Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+                    Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
+                    Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
+                    Vec4 e4 = MultiplyAdd( two, e3, e1 );
+
+                    // apply the metric to the error term
+                    Vec4 e5 = e4*m_metric;
+                    Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
+
+                    // keep the solution if it wins
+                    if( CompareAnyLessThan( error, besterror ) )
+                    {
+                        beststart = a;
+                        bestend = b;
+                        besterror = error;
+                        besti = i;
+                        bestj = j;
+                        bestk = k;
+                        bestiteration = iterationIndex;
+                    }
+
+                    // advance
+                    if( k == count )
+                        break;
+                    part2 += m_points_weights[k];
+                    ++k;
+                }
+
+                // advance
+                if( j == count )
+                    break;
+                part1 += m_points_weights[j];
+                ++j;
+            }
+
+            // advance
+            part0 += m_points_weights[i];
+        }
+
+        // stop if we didn't improve in this iteration
+        if( bestiteration != iterationIndex )
+            break;
+
+        // advance if possible
+        ++iterationIndex;
+        if( iterationIndex == m_iterationCount )
+            break;
+
+        // stop if a new iteration is an ordering that has already been tried
+        Vec3 axis = ( bestend - beststart ).GetVec3();
+        if( !ConstructOrdering( axis, iterationIndex ) )
+            break;
+    }
+
+    // save the block if necessary
+    if( CompareAnyLessThan( besterror, m_besterror ) )
+    {
+        // remap the indices
+        u8 const* order = ( u8* )m_order + 16*bestiteration;
+
+        u8 unordered[16];
+        for( int m = 0; m < besti; ++m )
+            unordered[order[m]] = 0;
+        for( int m = besti; m < bestj; ++m )
+            unordered[order[m]] = 2;
+        for( int m = bestj; m < bestk; ++m )
+            unordered[order[m]] = 3;
+        for( int m = bestk; m < count; ++m )
+            unordered[order[m]] = 1;
+
+        m_colours->RemapIndices( unordered, bestindices );
+
+        // save the block
+        WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
+
+        // save the error
+        m_besterror = besterror;
+    }
+}
+
+} // namespace squish
--- a/extern/libsquish-1.15/clusterfit.h
+++ b/extern/libsquish-1.15/clusterfit.h
@ -0,0 +1,61 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+    Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_CLUSTERFIT_H
+#define SQUISH_CLUSTERFIT_H
+
+#include "squish.h"
+#include "maths.h"
+#include "simd.h"
+#include "colourfit.h"
+
+namespace squish {
+
+class ClusterFit : public ColourFit
+{
+public:
+    ClusterFit( ColourSet const* colours, int flags, float* metric );
+
+private:
+    bool ConstructOrdering( Vec3 const& axis, int iteration );
+
+    virtual void Compress3( void* block );
+    virtual void Compress4( void* block );
+
+    enum { kMaxIterations = 8 };
+
+    int m_iterationCount;
+    Vec3 m_principle;
+    u8 m_order[16*kMaxIterations];
+    Vec4 m_points_weights[16];
+    Vec4 m_xsum_wsum;
+    Vec4 m_metric;
+    Vec4 m_besterror;
+};
+
+} // namespace squish
+
+#endif // ndef SQUISH_CLUSTERFIT_H
--- a/extern/libsquish-1.15/colourblock.cpp
+++ b/extern/libsquish-1.15/colourblock.cpp
@ -0,0 +1,214 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include "colourblock.h"
+
+namespace squish {
+
+static int FloatToInt( float a, int limit )
+{
+    // use ANSI round-to-zero behaviour to get round-to-nearest
+    int i = ( int )( a + 0.5f );
+
+    // clamp to the limit
+    if( i < 0 )
+        i = 0;
+    else if( i > limit )
+        i = limit;
+
+    // done
+    return i;
+}
+
+static int FloatTo565( Vec3::Arg colour )
+{
+    // get the components in the correct range
+    int r = FloatToInt( 31.0f*colour.X(), 31 );
+    int g = FloatToInt( 63.0f*colour.Y(), 63 );
+    int b = FloatToInt( 31.0f*colour.Z(), 31 );
+
+    // pack into a single value
+    return ( r << 11 ) | ( g << 5 ) | b;
+}
+
+static void WriteColourBlock( int a, int b, u8* indices, void* block )
+{
+    // get the block as bytes
+    u8* bytes = ( u8* )block;
+
+    // write the endpoints
+    bytes[0] = ( u8 )( a & 0xff );
+    bytes[1] = ( u8 )( a >> 8 );
+    bytes[2] = ( u8 )( b & 0xff );
+    bytes[3] = ( u8 )( b >> 8 );
+
+    // write the indices
+    for( int i = 0; i < 4; ++i )
+    {
+        u8 const* ind = indices + 4*i;
+        bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
+    }
+}
+
+void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
+{
+    // get the packed values
+    int a = FloatTo565( start );
+    int b = FloatTo565( end );
+
+    // remap the indices
+    u8 remapped[16];
+    if( a <= b )
+    {
+        // use the indices directly
+        for( int i = 0; i < 16; ++i )
+            remapped[i] = indices[i];
+    }
+    else
+    {
+        // swap a and b
+        std::swap( a, b );
+        for( int i = 0; i < 16; ++i )
+        {
+            if( indices[i] == 0 )
+                remapped[i] = 1;
+            else if( indices[i] == 1 )
+                remapped[i] = 0;
+            else
+                remapped[i] = indices[i];
+        }
+    }
+
+    // write the block
+    WriteColourBlock( a, b, remapped, block );
+}
+
+void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
+{
+    // get the packed values
+    int a = FloatTo565( start );
+    int b = FloatTo565( end );
+
+    // remap the indices
+    u8 remapped[16];
+    if( a < b )
+    {
+        // swap a and b
+        std::swap( a, b );
+        for( int i = 0; i < 16; ++i )
+            remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
+    }
+    else if( a == b )
+    {
+        // use index 0
+        for( int i = 0; i < 16; ++i )
+            remapped[i] = 0;
+    }
+    else
+    {
+        // use the indices directly
+        for( int i = 0; i < 16; ++i )
+            remapped[i] = indices[i];
+    }
+
+    // write the block
+    WriteColourBlock( a, b, remapped, block );
+}
+
+static int Unpack565( u8 const* packed, u8* colour )
+{
+    // build the packed value
+    int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
+
+    // get the components in the stored range
+    u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
+    u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
+    u8 blue = ( u8 )( value & 0x1f );
+
+    // scale up to 8 bits
+    colour[0] = ( red << 3 ) | ( red >> 2 );
+    colour[1] = ( green << 2 ) | ( green >> 4 );
+    colour[2] = ( blue << 3 ) | ( blue >> 2 );
+    colour[3] = 255;
+
+    // return the value
+    return value;
+}
+
+void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
+{
+    // get the block bytes
+    u8 const* bytes = reinterpret_cast< u8 const* >( block );
+
+    // unpack the endpoints
+    u8 codes[16];
+    int a = Unpack565( bytes, codes );
+    int b = Unpack565( bytes + 2, codes + 4 );
+
+    // generate the midpoints
+    for( int i = 0; i < 3; ++i )
+    {
+        int c = codes[i];
+        int d = codes[4 + i];
+
+        if( isDxt1 && a <= b )
+        {
+            codes[8 + i] = ( u8 )( ( c + d )/2 );
+            codes[12 + i] = 0;
+        }
+        else
+        {
+            codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
+            codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
+        }
+    }
+
+    // fill in alpha for the intermediate values
+    codes[8 + 3] = 255;
+    codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
+
+    // unpack the indices
+    u8 indices[16];
+    for( int i = 0; i < 4; ++i )
+    {
+        u8* ind = indices + 4*i;
+        u8 packed = bytes[4 + i];
+
+        ind[0] = packed & 0x3;
+        ind[1] = ( packed >> 2 ) & 0x3;
+        ind[2] = ( packed >> 4 ) & 0x3;
+        ind[3] = ( packed >> 6 ) & 0x3;
+    }
+
+    // store out the colours
+    for( int i = 0; i < 16; ++i )
+    {
+        u8 offset = 4*indices[i];
+        for( int j = 0; j < 4; ++j )
+            rgba[4*i + j] = codes[offset + j];
+    }
+}
+
+} // namespace squish
--- a/extern/libsquish-1.15/colourblock.h
+++ b/extern/libsquish-1.15/colourblock.h
@ -0,0 +1,41 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_COLOURBLOCK_H
+#define SQUISH_COLOURBLOCK_H
+
+#include "squish.h"
+#include "maths.h"
+
+namespace squish {
+
+void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
+void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
+
+void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
+
+} // namespace squish
+
+#endif // ndef SQUISH_COLOURBLOCK_H
--- a/extern/libsquish-1.15/colourfit.cpp
+++ b/extern/libsquish-1.15/colourfit.cpp
@ -0,0 +1,54 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include "colourfit.h"
+#include "colourset.h"
+
+namespace squish {
+
+ColourFit::ColourFit( ColourSet const* colours, int flags )
+  : m_colours( colours ),
+    m_flags( flags )
+{
+}
+
+ColourFit::~ColourFit()
+{
+}
+
+void ColourFit::Compress( void* block )
+{
+    bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
+    if( isDxt1 )
+    {
+        Compress3( block );
+        if( !m_colours->IsTransparent() )
+            Compress4( block );
+    }
+    else
+        Compress4( block );
+}
+
+} // namespace squish
--- a/extern/libsquish-1.15/colourfit.h
+++ b/extern/libsquish-1.15/colourfit.h
@ -0,0 +1,56 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_COLOURFIT_H
+#define SQUISH_COLOURFIT_H
+
+#include "squish.h"
+#include "maths.h"
+
+#include <climits>
+
+namespace squish {
+
+class ColourSet;
+
+class ColourFit
+{
+public:
+    ColourFit( ColourSet const* colours, int flags );
+    virtual ~ColourFit();
+
+    void Compress( void* block );
+
+protected:
+    virtual void Compress3( void* block ) = 0;
+    virtual void Compress4( void* block ) = 0;
+
+    ColourSet const* m_colours;
+    int m_flags;
+};
+
+} // namespace squish
+
+#endif // ndef SQUISH_COLOURFIT_H
--- a/extern/libsquish-1.15/colourset.cpp
+++ b/extern/libsquish-1.15/colourset.cpp
@ -0,0 +1,121 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include "colourset.h"
+
+namespace squish {
+
+ColourSet::ColourSet( u8 const* rgba, int mask, int flags )
+  : m_count( 0 ),
+    m_transparent( false )
+{
+    // check the compression mode for dxt1
+    bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
+    bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
+
+    // create the minimal set
+    for( int i = 0; i < 16; ++i )
+    {
+        // check this pixel is enabled
+        int bit = 1 << i;
+        if( ( mask & bit ) == 0 )
+        {
+            m_remap[i] = -1;
+            continue;
+        }
+
+        // check for transparent pixels when using dxt1
+        if( isDxt1 && rgba[4*i + 3] < 128 )
+        {
+            m_remap[i] = -1;
+            m_transparent = true;
+            continue;
+        }
+
+        // loop over previous points for a match
+        for( int j = 0;; ++j )
+        {
+            // allocate a new point
+            if( j == i )
+            {
+                // normalise coordinates to [0,1]
+                float x = ( float )rgba[4*i] / 255.0f;
+                float y = ( float )rgba[4*i + 1] / 255.0f;
+                float z = ( float )rgba[4*i + 2] / 255.0f;
+
+                // ensure there is always non-zero weight even for zero alpha
+                float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
+
+                // add the point
+                m_points[m_count] = Vec3( x, y, z );
+                m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
+                m_remap[i] = m_count;
+
+                // advance
+                ++m_count;
+                break;
+            }
+
+            // check for a match
+            int oldbit = 1 << j;
+            bool match = ( ( mask & oldbit ) != 0 )
+                && ( rgba[4*i] == rgba[4*j] )
+                && ( rgba[4*i + 1] == rgba[4*j + 1] )
+                && ( rgba[4*i + 2] == rgba[4*j + 2] )
+                && ( rgba[4*j + 3] >= 128 || !isDxt1 );
+            if( match )
+            {
+                // get the index of the match
+                int index = m_remap[j];
+
+                // ensure there is always non-zero weight even for zero alpha
+                float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
+
+                // map to this point and increase the weight
+                m_weights[index] += ( weightByAlpha ? w : 1.0f );
+                m_remap[i] = index;
+                break;
+            }
+        }
+    }
+
+    // square root the weights
+    for( int i = 0; i < m_count; ++i )
+        m_weights[i] = std::sqrt( m_weights[i] );
+}
+
+void ColourSet::RemapIndices( u8 const* source, u8* target ) const
+{
+    for( int i = 0; i < 16; ++i )
+    {
+        int j = m_remap[i];
+        if( j == -1 )
+            target[i] = 3;
+        else
+            target[i] = source[j];
+    }
+}
+
+} // namespace squish
--- a/extern/libsquish-1.15/colourset.h
+++ b/extern/libsquish-1.15/colourset.h
@ -0,0 +1,58 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_COLOURSET_H
+#define SQUISH_COLOURSET_H
+
+#include "squish.h"
+#include "maths.h"
+
+namespace squish {
+
+/*! @brief Represents a set of block colours
+*/
+class ColourSet
+{
+public:
+    ColourSet( u8 const* rgba, int mask, int flags );
+
+    int GetCount() const { return m_count; }
+    Vec3 const* GetPoints() const { return m_points; }
+    float const* GetWeights() const { return m_weights; }
+    bool IsTransparent() const { return m_transparent; }
+
+    void RemapIndices( u8 const* source, u8* target ) const;
+
+private:
+    int m_count;
+    Vec3 m_points[16];
+    float m_weights[16];
+    int m_remap[16];
+    bool m_transparent;
+};
+
+} // namespace sqish
+
+#endif // ndef SQUISH_COLOURSET_H
--- a/extern/libsquish-1.15/config
+++ b/extern/libsquish-1.15/config
@ -0,0 +1,38 @@
+# config file for GNUmake
+
+# define to 1 to use OpenMP parallelization
+USE_OPENMP ?= 0
+
+# define to 1 to install shared library
+USE_SHARED ?= 0
+
+# define to 1 to use Altivec instructions
+USE_ALTIVEC ?= 0
+
+# define to 1 to use SSE2 instructions
+USE_SSE ?= 0
+
+# default flags
+CXXFLAGS ?= -O2 -Wall
+ifeq ($(USE_OPENMP),1)
+   CPPFLAGS += -DSQUISH_USE_OPENMP
+   CXXFLAGS += -fopenmp
+endif
+ifeq ($(USE_ALTIVEC),1)
+   CPPFLAGS += -DSQUISH_USE_ALTIVEC=1
+   CXXFLAGS += -maltivec
+endif
+ifeq ($(USE_SSE),1)
+   CPPFLAGS += -DSQUISH_USE_SSE=2
+   CXXFLAGS += -msse
+endif
+
+# install options
+INSTALL = install
+INSTALL_FILE      = $(INSTALL) -p -m 644
+INSTALL_PROGRAM   = $(INSTALL) -p -m 755
+INSTALL_DIRECTORY = $(INSTALL) -d -m 755
+
+# where should we install to
+INSTALL_DIR ?= /usr/local
+LIB_PATH ?= lib
--- a/extern/libsquish-1.15/config.h
+++ b/extern/libsquish-1.15/config.h
@ -0,0 +1,49 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_CONFIG_H
+#define SQUISH_CONFIG_H
+
+// Set to 1 when building squish to use Altivec instructions.
+#ifndef SQUISH_USE_ALTIVEC
+#define SQUISH_USE_ALTIVEC 0
+#endif
+
+// Set to 1 or 2 when building squish to use SSE or SSE2 instructions.
+#ifndef SQUISH_USE_SSE
+#define SQUISH_USE_SSE 2
+#endif
+
+// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
+#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
+#error "Cannot enable both Altivec and SSE!"
+#endif
+#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
+#define SQUISH_USE_SIMD 1
+#else
+#define SQUISH_USE_SIMD 0
+#endif
+
+#endif // ndef SQUISH_CONFIG_H
--- a/extern/libsquish-1.15/extra/squishgen.cpp
+++ b/extern/libsquish-1.15/extra/squishgen.cpp
@ -0,0 +1,151 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include <iostream>
+
+struct SourceBlock
+{
+    int start;
+    int end;
+    int error;
+};
+
+struct TargetValue
+{
+    SourceBlock sources[2];
+};
+
+static void GenerateData( std::string const& name, int bits, int colours )
+{
+    TargetValue values[256];
+
+    // initialise the data
+    for( int target = 0; target < 256; ++target )
+        for( int index = 0; index < colours; ++index )
+            values[target].sources[index].error = 255;
+
+    // loop over all possible source points
+    int count = ( 1 << bits );
+    for( int value1 = 0; value1 < count; ++value1 )
+    {
+        for( int value2 = 0; value2 < count; ++value2 )
+        {
+            // compute the 8-bit endpoints
+            int a = ( value1 << ( 8 - bits ) ) | ( value1 >> ( 2*bits - 8 ) );
+            int b = ( value2 << ( 8 - bits ) ) | ( value2 >> ( 2*bits - 8 ) );
+
+            // fill in the codebook with the these and intermediates
+            int codes[2];
+            codes[0] = a;
+            if( colours == 3 )
+                codes[1] = ( a + b )/2;
+            else
+                codes[1] = ( 2*a + b )/3;
+
+            // mark each target point with the endpoints and index needed for it
+            for( int index = 0; index < 2; ++index )
+            {
+                int target = codes[index];
+
+                SourceBlock& block = values[target].sources[index];
+                if( block.error != 0 )
+                {
+                    block.start = value1;
+                    block.end = value2;
+                    block.error = 0;
+                }
+            }
+        }
+    }
+
+    // iteratively fill in the missing values
+    for( ;; )
+    {
+        bool stable = true;
+        for( int index = 0; index < 2; ++index )
+        {
+            for( int target = 0; target < 256; ++target )
+            {
+                if( target != 255 )
+                {
+                    SourceBlock& current = values[target].sources[index];
+                    SourceBlock& next = values[target + 1].sources[index];
+                    if( current.error > next.error + 1 )
+                    {
+                        current.start = next.start;
+                        current.end = next.end;
+                        current.error = next.error + 1;
+                        stable = false;
+                    }
+                }
+                if( target != 0 )
+                {
+                    SourceBlock& current = values[target].sources[index];
+                    SourceBlock& previous = values[target - 1].sources[index];
+                    if( current.error > previous.error + 1 )
+                    {
+                        current.start = previous.start;
+                        current.end = previous.end;
+                        current.error = previous.error + 1;
+                        stable = false;
+                    }
+                }
+            }
+        }
+        if( stable )
+            break;
+    }
+
+    // debug
+    std::cout << "\nstatic SingleColourLookup const " << name << "[] = \n{\n";
+    for( int i = 0;; )
+    {
+        std::cout << "\t{ { ";
+        for( int j = 0;; )
+        {
+            SourceBlock const& block = values[i].sources[j];
+            if( j < colours )
+                std::cout << "{ " << block.start << ", " << block.end << ", " << block.error << " }";
+            else
+                std::cout << "{ 0, 0, 0 }";
+            if( ++j == 2 )
+                break;
+            std::cout << ", ";
+        }
+        std::cout << " } }";
+        if( ++i == 256 )
+            break;
+        std::cout << ",\n";
+    }
+    std::cout << "\n};\n";
+}
+
+int main()
+{
+    GenerateData( "lookup_5_3", 5, 3 );
+    GenerateData( "lookup_6_3", 6, 3 );
+    GenerateData( "lookup_5_4", 5, 4 );
+    GenerateData( "lookup_6_4", 6, 4 );
+}
--- a/extern/libsquish-1.15/extra/squishpng.cpp
+++ b/extern/libsquish-1.15/extra/squishpng.cpp
@ -0,0 +1,546 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+/*! @file
+
+    @brief Test program that compresses images loaded using the PNG format.
+
+    This program requires libpng for PNG input and output, and is designed to
+    test the RMS error for DXT compression for a set of test images.
+
+    This program uses the high-level image compression and decompression
+    functions that process an entire image at a time.
+*/
+
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <ctime>
+#include <cmath>
+#include <squish.h>
+#include <png.h>
+
+#ifdef _MSC_VER
+#pragma warning( disable: 4511 4512 )
+#endif // def _MSC_VER
+
+using namespace squish;
+
+//! Simple exception class.
+class Error : public std::exception
+{
+public:
+    Error( std::string const& excuse ) : m_excuse( excuse ) {}
+    ~Error() throw() {}
+
+    virtual char const* what() const throw() { return m_excuse.c_str(); }
+
+private:
+    std::string m_excuse;
+};
+
+//! Base class to make derived classes non-copyable
+class NonCopyable
+{
+public:
+    NonCopyable() {}
+
+private:
+    NonCopyable( NonCopyable const& );
+    NonCopyable& operator=( NonCopyable const& );
+};
+
+//! Memory object.
+class Mem : NonCopyable
+{
+public:
+    Mem() : m_p( 0 ) {}
+    explicit Mem( int size ) : m_p( new u8[size] ) {}
+    ~Mem() { delete[] m_p; }
+
+    void Reset( int size )
+    {
+        u8 *p = new u8[size];
+        delete m_p;
+        m_p = p;
+    }
+
+    u8* Get() const { return m_p; }
+
+private:
+    u8* m_p;
+};
+
+//! File object.
+class File : NonCopyable
+{
+public:
+    explicit File( FILE* fp ) : m_fp( fp ) {}
+    ~File() { if( m_fp ) fclose( m_fp ); }
+
+    bool IsValid() const { return m_fp != 0; }
+    FILE* Get() const { return m_fp; }
+
+private:
+    FILE* m_fp;
+};
+
+//! PNG read object.
+class PngReadStruct : NonCopyable
+{
+public:
+    PngReadStruct()
+      : m_png( 0 ),
+        m_info( 0 ),
+        m_end( 0 )
+    {
+        m_png = png_create_read_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
+        if( !m_png )
+            throw Error( "failed to create png read struct" );
+
+        m_info = png_create_info_struct( m_png );
+        m_end = png_create_info_struct( m_png );
+        if( !m_info || !m_end )
+        {
+            png_infopp info = m_info ? &m_info : 0;
+            png_infopp end = m_end ? &m_end : 0;
+            png_destroy_read_struct( &m_png, info, end );
+            throw Error( "failed to create png info structs" );
+        }
+    }
+
+    ~PngReadStruct()
+    {
+        png_destroy_read_struct( &m_png, &m_info, &m_end );
+    }
+
+    png_structp GetPng() const { return m_png; }
+    png_infop GetInfo() const { return m_info; }
+
+private:
+    png_structp m_png;
+    png_infop m_info, m_end;
+};
+
+//! PNG write object.
+class PngWriteStruct : NonCopyable
+{
+public:
+    PngWriteStruct()
+      : m_png( 0 ),
+        m_info( 0 )
+    {
+        m_png = png_create_write_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
+        if( !m_png )
+            throw Error( "failed to create png read struct" );
+
+        m_info = png_create_info_struct( m_png );
+        if( !m_info )
+        {
+            png_infopp info = m_info ? &m_info : 0;
+            png_destroy_write_struct( &m_png, info );
+            throw Error( "failed to create png info structs" );
+        }
+    }
+
+    ~PngWriteStruct()
+    {
+        png_destroy_write_struct( &m_png, &m_info );
+    }
+
+    png_structp GetPng() const { return m_png; }
+    png_infop GetInfo() const { return m_info; }
+
+private:
+    png_structp m_png;
+    png_infop m_info;
+};
+
+//! PNG rows object.
+class PngRows : NonCopyable
+{
+public:
+    PngRows( int pitch, int height ) : m_height( height )
+    {
+        m_rows = new png_bytep[m_height];
+        for( int i = 0; i < m_height; ++i )
+            m_rows[i] = new png_byte[pitch];
+    }
+
+    ~PngRows()
+    {
+        for( int i = 0; i < m_height; ++i )
+            delete[] m_rows[i];
+        delete[] m_rows;
+    }
+
+    png_bytep* Get() const { return m_rows; }
+
+    png_bytep operator[](int y) const { return m_rows[y]; }
+
+private:
+    png_bytep* m_rows;
+    int m_height;
+};
+
+//! Represents a DXT compressed image in memory.
+struct DxtData
+{
+    int width;
+    int height;
+    int format; //!< Either kDxt1, kDxt3 or kDxt5.
+    Mem data;
+    bool isColour;
+    bool isAlpha;
+};
+
+//! Represents an uncompressed RGBA image in memory.
+class Image
+{
+public:
+    Image();
+
+    void LoadPng( std::string const& fileName );
+    void SavePng( std::string const& fileName ) const;
+
+    void Decompress( DxtData const& dxt );
+    void Compress( DxtData& dxt, int flags ) const;
+
+    double GetRmsError( Image const& image ) const;
+
+private:
+    int m_width;
+    int m_height;
+    bool m_isColour; //!< Either colour or luminance.
+    bool m_isAlpha; //!< Either alpha or not.
+    Mem m_pixels;
+};
+
+Image::Image()
+  : m_width( 0 ),
+    m_height( 0 ),
+    m_isColour( false ),
+    m_isAlpha( false )
+{
+}
+
+void Image::LoadPng( std::string const& fileName )
+{
+    // open the source file
+    File file( fopen( fileName.c_str(), "rb" ) );
+    if( !file.IsValid() )
+    {
+        std::ostringstream oss;
+        oss << "failed to open \"" << fileName << "\" for reading";
+        throw Error( oss.str() );
+    }
+
+    // check the signature bytes
+    png_byte header[8];
+    size_t check = fread( header, 1, 8, file.Get() );
+    if( check != 8 )
+        throw Error( "file read error" );
+    if( png_sig_cmp( header, 0, 8 ) )
+    {
+        std::ostringstream oss;
+        oss << "\"" << fileName << "\" does not look like a png file";
+        throw Error( oss.str() );
+    }
+
+    // read the image into memory
+    PngReadStruct png;
+    png_init_io( png.GetPng(), file.Get() );
+    png_set_sig_bytes( png.GetPng(), 8 );
+    png_read_png( png.GetPng(), png.GetInfo(), PNG_TRANSFORM_EXPAND, 0 );
+
+    // get the image info
+    png_uint_32 width;
+    png_uint_32 height;
+    int bitDepth;
+    int colourType;
+    png_get_IHDR( png.GetPng(), png.GetInfo(), &width, &height, &bitDepth, &colourType, 0, 0, 0 );
+
+    // check the image is 8 bit
+    if( bitDepth != 8 )
+    {
+        std::ostringstream oss;
+        oss << "cannot process " << bitDepth << "-bit image (bit depth must be 8)";
+        throw Error( oss.str() );
+    }
+
+    // copy the data into a contiguous array
+    m_width = width;
+    m_height = height;
+    m_isColour = ( ( colourType & PNG_COLOR_MASK_COLOR ) != 0 );
+    m_isAlpha = ( ( colourType & PNG_COLOR_MASK_ALPHA ) != 0 );
+    m_pixels.Reset(4*width*height);
+
+    // get the image rows
+    png_bytep const *rows = png_get_rows( png.GetPng(), png.GetInfo() );
+    if( !rows )
+        throw Error( "failed to get image rows" );
+
+    // copy the pixels into the storage
+    u8 *dest = m_pixels.Get();
+    for( int y = 0; y < m_height; ++y )
+    {
+        u8 const *src = rows[y];
+        for( int x = 0; x < m_width; ++x )
+        {
+            if( m_isColour )
+            {
+                dest[0] = src[0];
+                dest[1] = src[1];
+                dest[2] = src[2];
+                src += 3;
+            }
+            else
+            {
+                u8 lum = *src++;
+                dest[0] = lum;
+                dest[1] = lum;
+                dest[2] = lum;
+            }
+
+            if( m_isAlpha )
+                dest[3] = *src++;
+            else
+                dest[3] = 255;
+
+            dest += 4;
+        }
+    }
+}
+
+void Image::SavePng( std::string const& fileName ) const
+{
+    // create the target rows
+    int const pixelSize = ( m_isColour ? 3 : 1 ) + ( m_isAlpha ? 1 : 0 );
+    PngRows rows( m_width*pixelSize, m_height );
+
+    // fill the rows with pixel data
+    u8 const *src = m_pixels.Get();
+    for( int y = 0; y < m_height; ++y )
+    {
+        u8 *dest = rows[y];
+        for( int x = 0; x < m_width; ++x )
+        {
+            if( m_isColour )
+            {
+                dest[0] = src[0];
+                dest[1] = src[1];
+                dest[2] = src[2];
+                dest += 3;
+            }
+            else
+                *dest++ = src[1];
+
+            if( m_isAlpha )
+                *dest++ = src[3];
+
+            src += 4;
+        }
+    }
+
+    // set up the image
+    PngWriteStruct png;
+    png_set_IHDR(
+        png.GetPng(), png.GetInfo(), m_width, m_height,
+        8, ( m_isColour ? PNG_COLOR_MASK_COLOR : 0) | ( m_isAlpha ? PNG_COLOR_MASK_ALPHA : 0 ),
+        PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT
+    );
+
+    // open the target file
+    File file( fopen( fileName.c_str(), "wb" ) );
+    if( !file.IsValid() )
+    {
+        std::ostringstream oss;
+        oss << "failed to open \"" << fileName << "\" for writing";
+        throw Error( oss.str() );
+    }
+
+    // write the image
+    png_set_rows( png.GetPng(), png.GetInfo(), rows.Get() );
+    png_init_io( png.GetPng(), file.Get() );
+    png_write_png( png.GetPng(), png.GetInfo(), PNG_TRANSFORM_IDENTITY, 0 );
+}
+
+void Image::Decompress( DxtData const& dxt )
+{
+    // allocate storage
+    m_width = dxt.width;
+    m_height = dxt.height;
+    m_isColour = dxt.isColour;
+    m_isAlpha = dxt.isAlpha;
+    m_pixels.Reset( 4*m_width*m_height );
+
+    // use the whole image decompression function to do the work
+    DecompressImage( m_pixels.Get(), m_width, m_height, dxt.data.Get(), dxt.format );
+}
+
+void Image::Compress( DxtData& dxt, int flags ) const
+{
+    // work out how much memory we need
+    int storageSize = GetStorageRequirements( m_width, m_height, flags );
+
+    // set the structure fields and allocate it
+    dxt.width = m_width;
+    dxt.height = m_height;
+    dxt.format = flags & ( kDxt1 | kDxt3 | kDxt5 );
+    dxt.isColour = m_isColour;
+    dxt.isAlpha = m_isAlpha;
+    dxt.data.Reset( storageSize );
+
+    // use the whole image compression function to do the work
+    CompressImage( m_pixels.Get(), m_width, m_height, dxt.data.Get(), flags );
+}
+
+double Image::GetRmsError( Image const& image ) const
+{
+    if( m_width != image.m_width || m_height != image.m_height )
+        throw Error( "image dimensions mismatch when computing RMS error" );
+
+    // accumulate colour error
+    double difference = 0;
+    u8 const *a = m_pixels.Get();
+    u8 const *b = image.m_pixels.Get();
+    for( int y = 0; y < m_height; ++y )
+    {
+        for( int x = 0; x < m_width; ++x )
+        {
+            int d0 = ( int )a[0] - ( int )b[0];
+            int d1 = ( int )a[1] - ( int )b[1];
+            int d2 = ( int )a[2] - ( int )b[2];
+            difference += ( double )( d0*d0 + d1*d1 + d2*d2 );
+            a += 4;
+            b += 4;
+        }
+    }
+    return std::sqrt( difference/( double )( m_width*m_height ) );
+}
+
+int main( int argc, char* argv[] )
+{
+    try
+    {
+        // parse the command-line
+        std::string sourceFileName;
+        std::string targetFileName;
+        int format = kDxt1;
+        int fit = kColourClusterFit;
+        int extra = 0;
+        bool help = false;
+        bool arguments = true;
+        bool error = false;
+        for( int i = 1; i < argc; ++i )
+        {
+            // check for options
+            char const* word = argv[i];
+            if( arguments && word[0] == '-' )
+            {
+                for( int j = 1; word[j] != '\0'; ++j )
+                {
+                    switch( word[j] )
+                    {
+                    case 'h': help = true; break;
+                    case '1': format = kDxt1; break;
+                    case '3': format = kDxt3; break;
+                    case '5': format = kDxt5; break;
+                    case 'r': fit = kColourRangeFit; break;
+                    case 'i': fit = kColourIterativeClusterFit; break;
+                    case 'w': extra = kWeightColourByAlpha; break;
+                    case '-': arguments = false; break;
+                    default:
+                        std::cerr << "squishpng error: unknown option '" << word[j] << "'" << std::endl;
+                        error = true;
+                    }
+                }
+            }
+            else
+            {
+                if( sourceFileName.empty() )
+                    sourceFileName.assign( word );
+                else if( targetFileName.empty() )
+                    targetFileName.assign( word );
+                else
+                {
+                    std::cerr << "squishpng error: unexpected argument \"" << word << "\"" << std::endl;
+                    error = true;
+                }
+            }
+        }
+
+        // check arguments
+        if( sourceFileName.empty() )
+        {
+            std::cerr << "squishpng error: no source file given" << std::endl;
+            error = true;
+        }
+        if( help || error )
+        {
+            std::cout
+                << "SYNTAX" << std::endl
+                << "\tsquishpng [-135riw] <source> [<target>]" << std::endl
+                << "OPTIONS" << std::endl
+                << "\t-h\tPrint this help message" << std::endl
+                << "\t-135\tSpecifies whether to use DXT1 (default), DXT3 or DXT5 compression" << std::endl
+                << "\t-r\tUse the fast but inferior range-based colour compressor" << std::endl
+                << "\t-i\tUse the very slow but slightly better iterative colour compressor" << std::endl
+                << "\t-w\tWeight colour values by alpha in the cluster colour compressor" << std::endl
+                ;
+
+            return error ? -1 : 0;
+        }
+
+        // load the source image
+        Image sourceImage;
+        sourceImage.LoadPng( sourceFileName );
+
+        // compress to DXT
+        DxtData dxt;
+        sourceImage.Compress( dxt, format | fit | extra );
+
+        // decompress back
+        Image targetImage;
+        targetImage.Decompress( dxt );
+
+        // compare the images
+        double rmsError = sourceImage.GetRmsError( targetImage );
+        std::cout << sourceFileName << " " << rmsError << std::endl;
+
+        // save the target image if necessary
+        if( !targetFileName.empty() )
+            targetImage.SavePng( targetFileName );
+    }
+    catch( std::exception& excuse )
+    {
+        // complain
+        std::cerr << "squishpng error: " << excuse.what() << std::endl;
+        return -1;
+    }
+
+    // done
+    return 0;
+}
--- a/extern/libsquish-1.15/extra/squishtest.cpp
+++ b/extern/libsquish-1.15/extra/squishtest.cpp
@ -0,0 +1,206 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+/*! @file
+
+    @brief This program tests the error for 1 and 2-colour DXT compression.
+
+    This tests the effectiveness of the DXT compression algorithm for all
+    possible 1 and 2-colour blocks of pixels.
+*/
+
+#include <squish.h>
+#include <iostream>
+#include <cmath>
+#include <cfloat>
+#include <cstdlib>
+
+using namespace squish;
+
+double GetColourError( u8 const* a, u8 const* b )
+{
+    double error = 0.0;
+    for( int i = 0; i < 16; ++i )
+    {
+        for( int j = 0; j < 3; ++j )
+        {
+            int index = 4*i + j;
+            int diff = ( int )a[index] - ( int )b[index];
+            error += ( double )( diff*diff );
+        }
+    }
+    return error / 16.0;
+}
+
+void TestOneColour( int flags )
+{
+    u8 input[4*16];
+    u8 output[4*16];
+    u8 block[16];
+
+    double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
+    int counter = 0;
+
+    // test all single-channel colours
+    for( int i = 0; i < 16*4; ++i )
+        input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0;
+    for( int channel = 0; channel < 3; ++channel )
+    {
+        for( int value = 0; value < 255; ++value )
+        {
+            // set the channnel value
+            for( int i = 0; i < 16; ++i )
+                input[4*i + channel] = ( u8 )value;
+
+            // compress and decompress
+            Compress( input, block, flags );
+            Decompress( output, block, flags );
+
+            // test the results
+            double rm = GetColourError( input, output );
+            double rms = std::sqrt( rm );
+
+            // accumulate stats
+            min = std::min( min, rms );
+            max = std::max( max, rms );
+            avg += rm;
+            ++counter;
+        }
+
+        // reset the channel value
+        for( int i = 0; i < 16; ++i )
+            input[4*i + channel] = 0;
+    }
+
+    // finish stats
+    avg = std::sqrt( avg/counter );
+
+    // show stats
+    std::cout << "one colour error (min, max, avg): "
+        << min << ", " << max << ", " << avg << std::endl;
+}
+
+void TestOneColourRandom( int flags )
+{
+    u8 input[4*16];
+    u8 output[4*16];
+    u8 block[16];
+
+    double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
+    int counter = 0;
+
+    // test all single-channel colours
+    for( int test = 0; test < 1000; ++test )
+    {
+        // set a constant random colour
+        for( int channel = 0; channel < 3; ++channel )
+        {
+            u8 value = ( u8 )( rand() & 0xff );
+            for( int i = 0; i < 16; ++i )
+                input[4*i + channel] = value;
+        }
+        for( int i = 0; i < 16; ++i )
+            input[4*i + 3] = 255;
+
+        // compress and decompress
+        Compress( input, block, flags );
+        Decompress( output, block, flags );
+
+        // test the results
+        double rm = GetColourError( input, output );
+        double rms = std::sqrt( rm );
+
+        // accumulate stats
+        min = std::min( min, rms );
+        max = std::max( max, rms );
+        avg += rm;
+        ++counter;
+    }
+
+    // finish stats
+    avg = std::sqrt( avg/counter );
+
+    // show stats
+    std::cout << "random one colour error (min, max, avg): "
+        << min << ", " << max << ", " << avg << std::endl;
+}
+
+void TestTwoColour( int flags )
+{
+    u8 input[4*16];
+    u8 output[4*16];
+    u8 block[16];
+
+    double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
+    int counter = 0;
+
+    // test all single-channel colours
+    for( int i = 0; i < 16*4; ++i )
+        input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0;
+    for( int channel = 0; channel < 3; ++channel )
+    {
+        for( int value1 = 0; value1 < 255; ++value1 )
+        {
+            for( int value2 = value1 + 1; value2 < 255; ++value2 )
+            {
+                // set the channnel value
+                for( int i = 0; i < 16; ++i )
+                    input[4*i + channel] = ( u8 )( ( i < 8 ) ? value1 : value2 );
+
+                // compress and decompress
+                Compress( input, block, flags );
+                Decompress( output, block, flags );
+
+                // test the results
+                double rm = GetColourError( input, output );
+                double rms = std::sqrt( rm );
+
+                // accumulate stats
+                min = std::min( min, rms );
+                max = std::max( max, rms );
+                avg += rm;
+                ++counter;
+            }
+        }
+
+        // reset the channel value
+        for( int i = 0; i < 16; ++i )
+            input[4*i + channel] = 0;
+    }
+
+    // finish stats
+    avg = std::sqrt( avg/counter );
+
+    // show stats
+    std::cout << "two colour error (min, max, avg): "
+        << min << ", " << max << ", " << avg << std::endl;
+}
+
+int main()
+{
+    TestOneColourRandom( kDxt1 | kColourRangeFit );
+    TestOneColour( kDxt1 );
+    TestTwoColour( kDxt1 );
+}
--- a/extern/libsquish-1.15/libSquish.png
+++ b/extern/libsquish-1.15/libSquish.png
--- a/extern/libsquish-1.15/libSquish.pri
+++ b/extern/libsquish-1.15/libSquish.pri
@ -0,0 +1,26 @@
+HEADERS += \
+   squish.h
+
+SOURCES += \
+   alpha.cpp \
+   alpha.h \
+   clusterfit.cpp \
+   clusterfit.h \
+   colourblock.cpp \
+   colourblock.h \
+   colourfit.cpp \
+   colourfit.h \
+   colourset.cpp \
+   colourset.h \
+   maths.cpp \
+   maths.h \
+   rangefit.cpp \
+   rangefit.h \
+   simd.h \
+   simd_float.h \
+   simd_sse.h \
+   simd_ve.h \
+   singlecolourfit.cpp \
+   singlecolourfit.h \
+   singlecolourlookup.inl \
+   squish.cpp
--- a/extern/libsquish-1.15/libSquish.pro
+++ b/extern/libsquish-1.15/libSquish.pro
@ -0,0 +1,32 @@
+TARGET = squish
+TEMPLATE = lib
+
+include(libSquish.pri)
+
+QT -= gui
+
+CONFIG += staticlib thread
+CONFIG += debug_and_release
+
+CONFIG(debug, debug|release) {
+   unix:TARGET = $$join(TARGET,,,_debug)
+}
+
+MOC_DIR = mocs
+OBJECTS_DIR = objs
+RCC_DIR = rccs
+UI_DIR = uics
+
+CONFIG(debug, debug|release) {
+   unix:MOC_DIR = $$join(MOC_DIR,,,_debug)
+   unix:OBJECTS_DIR = $$join(OBJECTS_DIR,,,_debug)
+   unix:RCC_DIR = $$join(RCC_DIR,,,_debug)
+   unix:UI_DIR = $$join(UI_DIR,,,_debug)
+   win32:MOC_DIR = $$join(MOC_DIR,,,d)
+   win32:OBJECTS_DIR = $$join(OBJECTS_DIR,,,d)
+   win32:RCC_DIR = $$join(RCC_DIR,,,d)
+   win32:UI_DIR = $$join(UI_DIR,,,d)
+}
+
+unix:QMAKE_CXXFLAGS += -DSQUISH_USE_OPENMP -fopenmp
+
--- a/extern/libsquish-1.15/libSquish.svg
+++ b/extern/libsquish-1.15/libSquish.svg
@ -0,0 +1,238 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="630"
+   height="230"
+   viewBox="0 0 630 230"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.0 r9654"
+   sodipodi:docname="libSquish.svg"
+   inkscape:export-filename="/Users/roettger/Projects/libsquish/libSquish.png"
+   inkscape:export-xdpi="119.99844"
+   inkscape:export-ydpi="119.99844">
+  <metadata
+     id="metadata26">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs24">
+    <marker
+       inkscape:stockid="DotL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="DotL"
+       style="overflow:visible">
+      <path
+         id="path3691"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none;marker-end:none"
+         transform="matrix(0.8,0,0,0.8,5.92,0.8)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="TriangleInM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="TriangleInM"
+       style="overflow:visible">
+      <path
+         id="path3766"
+         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         transform="scale(-0.4,-0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="TriangleOutM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="TriangleOutM"
+       style="overflow:visible">
+      <path
+         id="path3775"
+         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         transform="scale(0.4,0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="DotM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="DotM"
+       style="overflow:visible">
+      <path
+         id="path3694"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none;marker-end:none"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow1Mend"
+       style="overflow:visible">
+      <path
+         id="path3638"
+         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 200 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="420 : 200 : 1"
+       inkscape:persp3d-origin="210 : 133.33333 : 1"
+       id="perspective28" />
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1436"
+     inkscape:window-height="856"
+     id="namedview22"
+     showgrid="false"
+     inkscape:zoom="0.79420663"
+     inkscape:cx="437.50383"
+     inkscape:cy="-3.1396505"
+     inkscape:window-x="4"
+     inkscape:window-y="22"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g3960" />
+  <text
+     xml:space="preserve"
+     style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Verdana;-inkscape-font-specification:Verdana"
+     x="102.93208"
+     y="-7.535553"
+     id="text3010"
+     sodipodi:linespacing="125%"><tspan
+       sodipodi:role="line"
+       id="tspan3012"
+       x="102.93208"
+       y="-7.535553" /><tspan
+       sodipodi:role="line"
+       x="102.93208"
+       y="4.964447"
+       id="tspan3014" /></text>
+  <g
+     id="g3805"
+     transform="matrix(1,0,0,0.38948748,-4,-80.62777)" />
+  <text
+     xml:space="preserve"
+     style="font-size:11.97706985px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000080;fill-opacity:1;stroke:none;font-family:Sans;-inkscape-font-specification:Terminal"
+     x="205.95784"
+     y="34.59861"
+     id="text3041-8-9-9"
+     sodipodi:linespacing="125%"
+     transform="scale(1.1185212,0.8940376)"><tspan
+       sodipodi:role="line"
+       id="tspan3043-42-2-8"
+       x="205.95784"
+       y="34.59861"
+       style="font-size:86.2348938px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#000080;font-family:Sans;-inkscape-font-specification:Terminal" /></text>
+  <g
+     id="g3960"
+     transform="matrix(1.2774265,0,0,1.2774265,-32.35617,-208.47432)">
+    <text
+       xml:space="preserve"
+       style="font-size:10.98347282px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000080;fill-opacity:1;stroke:none;font-family:Sans;-inkscape-font-specification:Terminal"
+       x="192.10129"
+       y="275.97144"
+       id="text3041-8-9"
+       sodipodi:linespacing="125%"
+       transform="scale(1.0257307,0.9749148)"><tspan
+         sodipodi:role="line"
+         id="tspan3043-42-2"
+         x="192.10129"
+         y="275.97144"
+         style="font-size:79.08100128px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#000080;font-family:Sans;-inkscape-font-specification:Terminal" /></text>
+    <text
+       transform="scale(1.0257307,0.97491477)"
+       sodipodi:linespacing="125%"
+       id="text3041-8-6"
+       y="248.2854"
+       x="87.743195"
+       style="font-size:85.09155273px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000080;fill-opacity:1;stroke:none;font-family:Bank Gothic;-inkscape-font-specification:Bank Gothic"
+       xml:space="preserve"><tspan
+         style="font-size:85.09155273px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000080;font-family:Bank Gothic;-inkscape-font-specification:Bank Gothic"
+         y="248.2854"
+         x="87.743195"
+         id="tspan3043-42-9"
+         sodipodi:role="line">lib</tspan></text>
+    <flowRoot
+       xml:space="preserve"
+       id="flowRoot3097"
+       style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
+       transform="matrix(0.7828239,0,0,0.7828239,25.329183,69.259813)"><flowRegion
+         id="flowRegion3099"><rect
+           id="rect3101"
+           width="33.49387"
+           height="62.420395"
+           x="534.37952"
+           y="241.90614" /></flowRegion><flowPara
+         id="flowPara3103"></flowPara></flowRoot>    <g
+       id="g3907"
+       transform="matrix(0.72690637,0,0,0.71996495,139.8595,-23.170483)">
+      <path
+         sodipodi:nodetypes="sscccssccsssssscss"
+         transform="matrix(0.7828239,0,0,0.7828239,25.329183,69.259813)"
+         inkscape:connector-curvature="0"
+         id="path3105"
+         d="m 339.50606,300.90773 c -1.95856,3.69785 -0.75722,15.26794 4.45166,22.34714 14.00779,19.03753 45.78914,51.10431 45.78914,51.10431 l 51.57631,56.40186 5.19946,85.81798 c 0,0 7.52237,4.52608 11.73442,5.45768 4.95508,1.09594 10.28542,1.16596 15.22449,0 4.41764,-1.04286 12.17959,-6.08979 12.17959,-6.08979 l 4.56735,-85.25713 c 0,0 64.30741,-68.15797 96.54638,-102.89439 3.40006,-3.66345 5.37777,-8.75489 6.0898,-13.70204 0.77539,-5.38744 1.0823,-11.8102 -2.15457,-16.18608 -5.64272,-7.62832 -16.74691,-9.28331 -25.88163,-11.85011 -20.66448,-5.80658 -42.50429,-6.55141 -63.94284,-7.61225 -25.85497,-1.27937 -51.84264,-0.56565 -77.64489,1.52245 -17.88376,1.44728 -53.2857,7.61224 -53.2857,7.61224 0,0 -13.56919,3.10259 -19.79183,6.0898 -3.87132,1.85844 -8.6472,3.44346 -10.65714,7.23833 z"
+         style="fill:#000000;fill-opacity:1;stroke:#8b8b8b;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
+      <path
+         transform="matrix(0.7828239,0,0,0.7828239,73.071791,66.123437)"
+         d="m 516.39198,310.38028 c 0,10.32604 -49.82684,18.69695 -111.29136,18.69695 -61.46453,0 -111.29137,-8.37091 -111.29137,-18.69695 0,-10.32604 49.82684,-18.69695 111.29137,-18.69695 61.46452,0 111.29136,8.37091 111.29136,18.69695 z"
+         sodipodi:ry="18.696951"
+         sodipodi:rx="111.29137"
+         sodipodi:cy="310.38028"
+         sodipodi:cx="405.10062"
+         id="path3905"
+         style="fill:#cccccc;fill-opacity:1;fill-rule:evenodd;stroke:#8b8b8b;stroke-opacity:1"
+         sodipodi:type="arc" />
+    </g>
+    <text
+       xml:space="preserve"
+       style="font-size:85.09155273px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000080;fill-opacity:1;stroke:none;font-family:Bank Gothic;-inkscape-font-specification:Bank Gothic"
+       x="40.24892"
+       y="316.10275"
+       id="text3041-8"
+       sodipodi:linespacing="125%"
+       transform="scale(1.0257307,0.97491477)"><tspan
+         sodipodi:role="line"
+         id="tspan3043-42"
+         x="40.24892"
+         y="316.10275"
+         style="font-size:85.09155273px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000080;font-family:Bank Gothic;-inkscape-font-specification:Bank Gothic">Squish</tspan></text>
+  </g>
+</svg>
--- a/extern/libsquish-1.15/libsquish.pc.in
+++ b/extern/libsquish-1.15/libsquish.pc.in
@ -0,0 +1,13 @@
+prefix=@PREFIX@
+exec_prefix=${prefix}
+libdir=${prefix}/@LIB_PATH@
+sharedlibdir=${libdir}
+includedir=${prefix}/include
+
+Name: libsquish
+Description: squish DXT library
+Version: 1.14
+
+Requires:
+Libs: -L${libdir} -L${sharedlibdir} -llibsquish
+Cflags: -I${includedir}
--- a/extern/libsquish-1.15/maths.cpp
+++ b/extern/libsquish-1.15/maths.cpp
@ -0,0 +1,259 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+/*! @file
+
+    The symmetric eigensystem solver algorithm is from
+    http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf
+*/
+
+#include "maths.h"
+#include "simd.h"
+#include <cfloat>
+
+namespace squish {
+
+Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
+{
+    // compute the centroid
+    float total = 0.0f;
+    Vec3 centroid( 0.0f );
+    for( int i = 0; i < n; ++i )
+    {
+        total += weights[i];
+        centroid += weights[i]*points[i];
+    }
+    if( total > FLT_EPSILON )
+        centroid /= total;
+
+    // accumulate the covariance matrix
+    Sym3x3 covariance( 0.0f );
+    for( int i = 0; i < n; ++i )
+    {
+        Vec3 a = points[i] - centroid;
+        Vec3 b = weights[i]*a;
+
+        covariance[0] += a.X()*b.X();
+        covariance[1] += a.X()*b.Y();
+        covariance[2] += a.X()*b.Z();
+        covariance[3] += a.Y()*b.Y();
+        covariance[4] += a.Y()*b.Z();
+        covariance[5] += a.Z()*b.Z();
+    }
+
+    // return it
+    return covariance;
+}
+
+#if 0
+
+static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
+{
+    // compute M
+    Sym3x3 m;
+    m[0] = matrix[0] - evalue;
+    m[1] = matrix[1];
+    m[2] = matrix[2];
+    m[3] = matrix[3] - evalue;
+    m[4] = matrix[4];
+    m[5] = matrix[5] - evalue;
+
+    // compute U
+    Sym3x3 u;
+    u[0] = m[3]*m[5] - m[4]*m[4];
+    u[1] = m[2]*m[4] - m[1]*m[5];
+    u[2] = m[1]*m[4] - m[2]*m[3];
+    u[3] = m[0]*m[5] - m[2]*m[2];
+    u[4] = m[1]*m[2] - m[4]*m[0];
+    u[5] = m[0]*m[3] - m[1]*m[1];
+
+    // find the largest component
+    float mc = std::fabs( u[0] );
+    int mi = 0;
+    for( int i = 1; i < 6; ++i )
+    {
+        float c = std::fabs( u[i] );
+        if( c > mc )
+        {
+            mc = c;
+            mi = i;
+        }
+    }
+
+    // pick the column with this component
+    switch( mi )
+    {
+    case 0:
+        return Vec3( u[0], u[1], u[2] );
+
+    case 1:
+    case 3:
+        return Vec3( u[1], u[3], u[4] );
+
+    default:
+        return Vec3( u[2], u[4], u[5] );
+    }
+}
+
+static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
+{
+    // compute M
+    Sym3x3 m;
+    m[0] = matrix[0] - evalue;
+    m[1] = matrix[1];
+    m[2] = matrix[2];
+    m[3] = matrix[3] - evalue;
+    m[4] = matrix[4];
+    m[5] = matrix[5] - evalue;
+
+    // find the largest component
+    float mc = std::fabs( m[0] );
+    int mi = 0;
+    for( int i = 1; i < 6; ++i )
+    {
+        float c = std::fabs( m[i] );
+        if( c > mc )
+        {
+            mc = c;
+            mi = i;
+        }
+    }
+
+    // pick the first eigenvector based on this index
+    switch( mi )
+    {
+    case 0:
+    case 1:
+        return Vec3( -m[1], m[0], 0.0f );
+
+    case 2:
+        return Vec3( m[2], 0.0f, -m[0] );
+
+    case 3:
+    case 4:
+        return Vec3( 0.0f, -m[4], m[3] );
+
+    default:
+        return Vec3( 0.0f, -m[5], m[4] );
+    }
+}
+
+Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
+{
+    // compute the cubic coefficients
+    float c0 = matrix[0]*matrix[3]*matrix[5]
+        + 2.0f*matrix[1]*matrix[2]*matrix[4]
+        - matrix[0]*matrix[4]*matrix[4]
+        - matrix[3]*matrix[2]*matrix[2]
+        - matrix[5]*matrix[1]*matrix[1];
+    float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
+        - matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
+    float c2 = matrix[0] + matrix[3] + matrix[5];
+
+    // compute the quadratic coefficients
+    float a = c1 - ( 1.0f/3.0f )*c2*c2;
+    float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
+
+    // compute the root count check
+    float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
+
+    // test the multiplicity
+    if( FLT_EPSILON < Q )
+    {
+        // only one root, which implies we have a multiple of the identity
+        return Vec3( 1.0f );
+    }
+    else if( Q < -FLT_EPSILON )
+    {
+        // three distinct roots
+        float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
+        float rho = std::sqrt( 0.25f*b*b - Q );
+
+        float rt = std::pow( rho, 1.0f/3.0f );
+        float ct = std::cos( theta/3.0f );
+        float st = std::sin( theta/3.0f );
+
+        float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
+        float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
+        float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
+
+        // pick the larger
+        if( std::fabs( l2 ) > std::fabs( l1 ) )
+            l1 = l2;
+        if( std::fabs( l3 ) > std::fabs( l1 ) )
+            l1 = l3;
+
+        // get the eigenvector
+        return GetMultiplicity1Evector( matrix, l1 );
+    }
+    else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
+    {
+        // two roots
+        float rt;
+        if( b < 0.0f )
+            rt = -std::pow( -0.5f*b, 1.0f/3.0f );
+        else
+            rt = std::pow( 0.5f*b, 1.0f/3.0f );
+
+        float l1 = ( 1.0f/3.0f )*c2 + rt;        // repeated
+        float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
+
+        // get the eigenvector
+        if( std::fabs( l1 ) > std::fabs( l2 ) )
+            return GetMultiplicity2Evector( matrix, l1 );
+        else
+            return GetMultiplicity1Evector( matrix, l2 );
+    }
+}
+
+#else
+
+#define POWER_ITERATION_COUNT    8
+
+Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
+{
+    Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
+    Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
+    Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
+    Vec4 v = VEC4_CONST( 1.0f );
+    for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
+    {
+        // matrix multiply
+        Vec4 w = row0*v.SplatX();
+        w = MultiplyAdd(row1, v.SplatY(), w);
+        w = MultiplyAdd(row2, v.SplatZ(), w);
+
+        // get max component from xyz in all channels
+        Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
+
+        // divide through and advance
+        v = w*Reciprocal(a);
+    }
+    return v.GetVec3();
+}
+
+#endif
+
+} // namespace squish
--- a/extern/libsquish-1.15/maths.h
+++ b/extern/libsquish-1.15/maths.h
@ -0,0 +1,233 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_MATHS_H
+#define SQUISH_MATHS_H
+
+#include <cmath>
+#include <algorithm>
+#include "config.h"
+
+namespace squish {
+
+class Vec3
+{
+public:
+    typedef Vec3 const& Arg;
+
+    Vec3()
+    {
+    }
+
+    explicit Vec3( float s )
+    {
+        m_x = s;
+        m_y = s;
+        m_z = s;
+    }
+
+    Vec3( float x, float y, float z )
+    {
+        m_x = x;
+        m_y = y;
+        m_z = z;
+    }
+
+    float X() const { return m_x; }
+    float Y() const { return m_y; }
+    float Z() const { return m_z; }
+
+    Vec3 operator-() const
+    {
+        return Vec3( -m_x, -m_y, -m_z );
+    }
+
+    Vec3& operator+=( Arg v )
+    {
+        m_x += v.m_x;
+        m_y += v.m_y;
+        m_z += v.m_z;
+        return *this;
+    }
+
+    Vec3& operator-=( Arg v )
+    {
+        m_x -= v.m_x;
+        m_y -= v.m_y;
+        m_z -= v.m_z;
+        return *this;
+    }
+
+    Vec3& operator*=( Arg v )
+    {
+        m_x *= v.m_x;
+        m_y *= v.m_y;
+        m_z *= v.m_z;
+        return *this;
+    }
+
+    Vec3& operator*=( float s )
+    {
+        m_x *= s;
+        m_y *= s;
+        m_z *= s;
+        return *this;
+    }
+
+    Vec3& operator/=( Arg v )
+    {
+        m_x /= v.m_x;
+        m_y /= v.m_y;
+        m_z /= v.m_z;
+        return *this;
+    }
+
+    Vec3& operator/=( float s )
+    {
+        float t = 1.0f/s;
+        m_x *= t;
+        m_y *= t;
+        m_z *= t;
+        return *this;
+    }
+
+    friend Vec3 operator+( Arg left, Arg right )
+    {
+        Vec3 copy( left );
+        return copy += right;
+    }
+
+    friend Vec3 operator-( Arg left, Arg right )
+    {
+        Vec3 copy( left );
+        return copy -= right;
+    }
+
+    friend Vec3 operator*( Arg left, Arg right )
+    {
+        Vec3 copy( left );
+        return copy *= right;
+    }
+
+    friend Vec3 operator*( Arg left, float right )
+    {
+        Vec3 copy( left );
+        return copy *= right;
+    }
+
+    friend Vec3 operator*( float left, Arg right )
+    {
+        Vec3 copy( right );
+        return copy *= left;
+    }
+
+    friend Vec3 operator/( Arg left, Arg right )
+    {
+        Vec3 copy( left );
+        return copy /= right;
+    }
+
+    friend Vec3 operator/( Arg left, float right )
+    {
+        Vec3 copy( left );
+        return copy /= right;
+    }
+
+    friend float Dot( Arg left, Arg right )
+    {
+        return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z;
+    }
+
+    friend Vec3 Min( Arg left, Arg right )
+    {
+        return Vec3(
+            std::min( left.m_x, right.m_x ),
+            std::min( left.m_y, right.m_y ),
+            std::min( left.m_z, right.m_z )
+        );
+    }
+
+    friend Vec3 Max( Arg left, Arg right )
+    {
+        return Vec3(
+            std::max( left.m_x, right.m_x ),
+            std::max( left.m_y, right.m_y ),
+            std::max( left.m_z, right.m_z )
+        );
+    }
+
+    friend Vec3 Truncate( Arg v )
+    {
+        return Vec3(
+            v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ),
+            v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ),
+            v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z )
+        );
+    }
+
+private:
+    float m_x;
+    float m_y;
+    float m_z;
+};
+
+inline float LengthSquared( Vec3::Arg v )
+{
+    return Dot( v, v );
+}
+
+class Sym3x3
+{
+public:
+    Sym3x3()
+    {
+    }
+
+    Sym3x3( float s )
+    {
+        for( int i = 0; i < 6; ++i )
+            m_x[i] = s;
+    }
+
+    float operator[]( int index ) const
+    {
+        return m_x[index];
+    }
+
+    float& operator[]( int index )
+    {
+        return m_x[index];
+    }
+
+private:
+    float m_x[6];
+};
+
+Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights );
+Vec3 ComputePrincipleComponent( Sym3x3 const& matrix );
+
+} // namespace squish
+
+#endif // ndef SQUISH_MATHS_H
--- a/extern/libsquish-1.15/rangefit.cpp
+++ b/extern/libsquish-1.15/rangefit.cpp
@ -0,0 +1,201 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include "rangefit.h"
+#include "colourset.h"
+#include "colourblock.h"
+#include <cfloat>
+
+namespace squish {
+
+RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric )
+  : ColourFit( colours, flags )
+{
+    // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
+    if( metric )
+        m_metric = Vec3( metric[0], metric[1], metric[2] );
+    else
+        m_metric = Vec3( 1.0f );
+
+    // initialise the best error
+    m_besterror = FLT_MAX;
+
+    // cache some values
+    int const count = m_colours->GetCount();
+    Vec3 const* values = m_colours->GetPoints();
+    float const* weights = m_colours->GetWeights();
+
+    // get the covariance matrix
+    Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights );
+
+    // compute the principle component
+    Vec3 principle = ComputePrincipleComponent( covariance );
+
+    // get the min and max range as the codebook endpoints
+    Vec3 start( 0.0f );
+    Vec3 end( 0.0f );
+    if( count > 0 )
+    {
+        float min, max;
+
+        // compute the range
+        start = end = values[0];
+        min = max = Dot( values[0], principle );
+        for( int i = 1; i < count; ++i )
+        {
+            float val = Dot( values[i], principle );
+            if( val < min )
+            {
+                start = values[i];
+                min = val;
+            }
+            else if( val > max )
+            {
+                end = values[i];
+                max = val;
+            }
+        }
+    }
+
+    // clamp the output to [0, 1]
+    Vec3 const one( 1.0f );
+    Vec3 const zero( 0.0f );
+    start = Min( one, Max( zero, start ) );
+    end = Min( one, Max( zero, end ) );
+
+    // clamp to the grid and save
+    Vec3 const grid( 31.0f, 63.0f, 31.0f );
+    Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
+    Vec3 const half( 0.5f );
+    m_start = Truncate( grid*start + half )*gridrcp;
+    m_end = Truncate( grid*end + half )*gridrcp;
+}
+
+void RangeFit::Compress3( void* block )
+{
+    // cache some values
+    int const count = m_colours->GetCount();
+    Vec3 const* values = m_colours->GetPoints();
+
+    // create a codebook
+    Vec3 codes[3];
+    codes[0] = m_start;
+    codes[1] = m_end;
+    codes[2] = 0.5f*m_start + 0.5f*m_end;
+
+    // match each point to the closest code
+    u8 closest[16];
+    float error = 0.0f;
+    for( int i = 0; i < count; ++i )
+    {
+        // find the closest code
+        float dist = FLT_MAX;
+        int idx = 0;
+        for( int j = 0; j < 3; ++j )
+        {
+            float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
+            if( d < dist )
+            {
+                dist = d;
+                idx = j;
+            }
+        }
+
+        // save the index
+        closest[i] = ( u8 )idx;
+
+        // accumulate the error
+        error += dist;
+    }
+
+    // save this scheme if it wins
+    if( error < m_besterror )
+    {
+        // remap the indices
+        u8 indices[16];
+        m_colours->RemapIndices( closest, indices );
+
+        // save the block
+        WriteColourBlock3( m_start, m_end, indices, block );
+
+        // save the error
+        m_besterror = error;
+    }
+}
+
+void RangeFit::Compress4( void* block )
+{
+    // cache some values
+    int const count = m_colours->GetCount();
+    Vec3 const* values = m_colours->GetPoints();
+
+    // create a codebook
+    Vec3 codes[4];
+    codes[0] = m_start;
+    codes[1] = m_end;
+    codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
+    codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;
+
+    // match each point to the closest code
+    u8 closest[16];
+    float error = 0.0f;
+    for( int i = 0; i < count; ++i )
+    {
+        // find the closest code
+        float dist = FLT_MAX;
+        int idx = 0;
+        for( int j = 0; j < 4; ++j )
+        {
+            float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
+            if( d < dist )
+            {
+                dist = d;
+                idx = j;
+            }
+        }
+
+        // save the index
+        closest[i] = ( u8 )idx;
+
+        // accumulate the error
+        error += dist;
+    }
+
+    // save this scheme if it wins
+    if( error < m_besterror )
+    {
+        // remap the indices
+        u8 indices[16];
+        m_colours->RemapIndices( closest, indices );
+
+        // save the block
+        WriteColourBlock4( m_start, m_end, indices, block );
+
+        // save the error
+        m_besterror = error;
+    }
+}
+
+} // namespace squish
--- a/extern/libsquish-1.15/rangefit.h
+++ b/extern/libsquish-1.15/rangefit.h
@ -0,0 +1,54 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_RANGEFIT_H
+#define SQUISH_RANGEFIT_H
+
+#include "squish.h"
+#include "colourfit.h"
+#include "maths.h"
+
+namespace squish {
+
+class ColourSet;
+
+class RangeFit : public ColourFit
+{
+public:
+    RangeFit( ColourSet const* colours, int flags, float* metric );
+
+private:
+    virtual void Compress3( void* block );
+    virtual void Compress4( void* block );
+
+    Vec3 m_metric;
+    Vec3 m_start;
+    Vec3 m_end;
+    float m_besterror;
+};
+
+} // squish
+
+#endif // ndef SQUISH_RANGEFIT_H
--- a/extern/libsquish-1.15/simd.h
+++ b/extern/libsquish-1.15/simd.h
@ -0,0 +1,40 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_SIMD_H
+#define SQUISH_SIMD_H
+
+#include "maths.h"
+
+#if SQUISH_USE_ALTIVEC
+#include "simd_ve.h"
+#elif SQUISH_USE_SSE
+#include "simd_sse.h"
+#else
+#include "simd_float.h"
+#endif
+
+
+#endif // ndef SQUISH_SIMD_H
--- a/extern/libsquish-1.15/simd_float.h
+++ b/extern/libsquish-1.15/simd_float.h
@ -0,0 +1,183 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_SIMD_FLOAT_H
+#define SQUISH_SIMD_FLOAT_H
+
+#include <algorithm>
+
+namespace squish {
+
+#define VEC4_CONST( X ) Vec4( X )
+
+class Vec4
+{
+public:
+    typedef Vec4 const& Arg;
+
+    Vec4() {}
+
+    explicit Vec4( float s )
+      : m_x( s ),
+        m_y( s ),
+        m_z( s ),
+        m_w( s )
+    {
+    }
+
+    Vec4( float x, float y, float z, float w )
+      : m_x( x ),
+        m_y( y ),
+        m_z( z ),
+        m_w( w )
+    {
+    }
+
+    Vec3 GetVec3() const
+    {
+        return Vec3( m_x, m_y, m_z );
+    }
+
+    Vec4 SplatX() const { return Vec4( m_x ); }
+    Vec4 SplatY() const { return Vec4( m_y ); }
+    Vec4 SplatZ() const { return Vec4( m_z ); }
+    Vec4 SplatW() const { return Vec4( m_w ); }
+
+    Vec4& operator+=( Arg v )
+    {
+        m_x += v.m_x;
+        m_y += v.m_y;
+        m_z += v.m_z;
+        m_w += v.m_w;
+        return *this;
+    }
+
+    Vec4& operator-=( Arg v )
+    {
+        m_x -= v.m_x;
+        m_y -= v.m_y;
+        m_z -= v.m_z;
+        m_w -= v.m_w;
+        return *this;
+    }
+
+    Vec4& operator*=( Arg v )
+    {
+        m_x *= v.m_x;
+        m_y *= v.m_y;
+        m_z *= v.m_z;
+        m_w *= v.m_w;
+        return *this;
+    }
+
+    friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
+    {
+        Vec4 copy( left );
+        return copy += right;
+    }
+
+    friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
+    {
+        Vec4 copy( left );
+        return copy -= right;
+    }
+
+    friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
+    {
+        Vec4 copy( left );
+        return copy *= right;
+    }
+
+    //! Returns a*b + c
+    friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
+    {
+        return a*b + c;
+    }
+
+    //! Returns -( a*b - c )
+    friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
+    {
+        return c - a*b;
+    }
+
+    friend Vec4 Reciprocal( Vec4::Arg v )
+    {
+        return Vec4(
+            1.0f/v.m_x,
+            1.0f/v.m_y,
+            1.0f/v.m_z,
+            1.0f/v.m_w
+        );
+    }
+
+    friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
+    {
+        return Vec4(
+            std::min( left.m_x, right.m_x ),
+            std::min( left.m_y, right.m_y ),
+            std::min( left.m_z, right.m_z ),
+            std::min( left.m_w, right.m_w )
+        );
+    }
+
+    friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
+    {
+        return Vec4(
+            std::max( left.m_x, right.m_x ),
+            std::max( left.m_y, right.m_y ),
+            std::max( left.m_z, right.m_z ),
+            std::max( left.m_w, right.m_w )
+        );
+    }
+
+    friend Vec4 Truncate( Vec4::Arg v )
+    {
+        return Vec4(
+            v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ),
+            v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ),
+            v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ),
+            v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w )
+        );
+    }
+
+    friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
+    {
+        return left.m_x < right.m_x
+            || left.m_y < right.m_y
+            || left.m_z < right.m_z
+            || left.m_w < right.m_w;
+    }
+
+private:
+    float m_x;
+    float m_y;
+    float m_z;
+    float m_w;
+};
+
+} // namespace squish
+
+#endif // ndef SQUISH_SIMD_FLOAT_H
+
--- a/extern/libsquish-1.15/simd_sse.h
+++ b/extern/libsquish-1.15/simd_sse.h
@ -0,0 +1,180 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_SIMD_SSE_H
+#define SQUISH_SIMD_SSE_H
+
+#include <xmmintrin.h>
+#if ( SQUISH_USE_SSE > 1 )
+#include <emmintrin.h>
+#endif
+
+#define SQUISH_SSE_SPLAT( a )                                        \
+    ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
+
+#define SQUISH_SSE_SHUF( x, y, z, w )                                \
+    ( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) )
+
+namespace squish {
+
+#define VEC4_CONST( X ) Vec4( X )
+
+class Vec4
+{
+public:
+    typedef Vec4 const& Arg;
+
+    Vec4() {}
+
+    explicit Vec4( __m128 v ) : m_v( v ) {}
+
+    Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
+
+    Vec4& operator=( Vec4 const& arg )
+    {
+        m_v = arg.m_v;
+        return *this;
+    }
+
+    explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {}
+
+    Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {}
+
+    Vec3 GetVec3() const
+    {
+#ifdef __GNUC__
+        __attribute__ ((__aligned__ (16))) float c[4];
+#else
+        __declspec(align(16)) float c[4];
+#endif
+        _mm_store_ps( c, m_v );
+        return Vec3( c[0], c[1], c[2] );
+    }
+
+    Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
+    Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }
+    Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); }
+    Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); }
+
+    Vec4& operator+=( Arg v )
+    {
+        m_v = _mm_add_ps( m_v, v.m_v );
+        return *this;
+    }
+
+    Vec4& operator-=( Arg v )
+    {
+        m_v = _mm_sub_ps( m_v, v.m_v );
+        return *this;
+    }
+
+    Vec4& operator*=( Arg v )
+    {
+        m_v = _mm_mul_ps( m_v, v.m_v );
+        return *this;
+    }
+
+    friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
+    {
+        return Vec4( _mm_add_ps( left.m_v, right.m_v ) );
+    }
+
+    friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
+    {
+        return Vec4( _mm_sub_ps( left.m_v, right.m_v ) );
+    }
+
+    friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
+    {
+        return Vec4( _mm_mul_ps( left.m_v, right.m_v ) );
+    }
+
+    //! Returns a*b + c
+    friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
+    {
+        return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) );
+    }
+
+    //! Returns -( a*b - c )
+    friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
+    {
+        return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) );
+    }
+
+    friend Vec4 Reciprocal( Vec4::Arg v )
+    {
+        // get the reciprocal estimate
+        __m128 estimate = _mm_rcp_ps( v.m_v );
+
+        // one round of Newton-Rhaphson refinement
+        __m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) );
+        return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
+    }
+
+    friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
+    {
+        return Vec4( _mm_min_ps( left.m_v, right.m_v ) );
+    }
+
+    friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
+    {
+        return Vec4( _mm_max_ps( left.m_v, right.m_v ) );
+    }
+
+    friend Vec4 Truncate( Vec4::Arg v )
+    {
+#if ( SQUISH_USE_SSE == 1 )
+        // convert to ints
+        __m128 input = v.m_v;
+        __m64 lo = _mm_cvttps_pi32( input );
+        __m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) );
+
+        // convert to floats
+        __m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) );
+        __m128 truncated = _mm_cvtpi32_ps( part, lo );
+
+        // clear out the MMX multimedia state to allow FP calls later
+        _mm_empty();
+        return Vec4( truncated );
+#else
+        // use SSE2 instructions
+        return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) );
+#endif
+    }
+
+    friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
+    {
+        __m128 bits = _mm_cmplt_ps( left.m_v, right.m_v );
+        int value = _mm_movemask_ps( bits );
+        return value != 0;
+    }
+
+private:
+    __m128 m_v;
+};
+
+} // namespace squish
+
+#endif // ndef SQUISH_SIMD_SSE_H
--- a/extern/libsquish-1.15/simd_ve.h
+++ b/extern/libsquish-1.15/simd_ve.h
@ -0,0 +1,166 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_SIMD_VE_H
+#define SQUISH_SIMD_VE_H
+
+#include <altivec.h>
+#undef bool
+
+namespace squish {
+
+#define VEC4_CONST( X ) Vec4( ( vector float ){ X } )
+
+class Vec4
+{
+public:
+    typedef Vec4 Arg;
+
+    Vec4() {}
+
+    explicit Vec4( vector float v ) : m_v( v ) {}
+
+    Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
+
+    Vec4& operator=( Vec4 const& arg )
+    {
+        m_v = arg.m_v;
+        return *this;
+    }
+
+    explicit Vec4( float s )
+    {
+        union { vector float v; float c[4]; } u;
+        u.c[0] = s;
+        u.c[1] = s;
+        u.c[2] = s;
+        u.c[3] = s;
+        m_v = u.v;
+    }
+
+    Vec4( float x, float y, float z, float w )
+    {
+        union { vector float v; float c[4]; } u;
+        u.c[0] = x;
+        u.c[1] = y;
+        u.c[2] = z;
+        u.c[3] = w;
+        m_v = u.v;
+    }
+
+    Vec3 GetVec3() const
+    {
+        union { vector float v; float c[4]; } u;
+        u.v = m_v;
+        return Vec3( u.c[0], u.c[1], u.c[2] );
+    }
+
+    Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
+    Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
+    Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }
+    Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); }
+
+    Vec4& operator+=( Arg v )
+    {
+        m_v = vec_add( m_v, v.m_v );
+        return *this;
+    }
+
+    Vec4& operator-=( Arg v )
+    {
+        m_v = vec_sub( m_v, v.m_v );
+        return *this;
+    }
+
+    Vec4& operator*=( Arg v )
+    {
+        m_v = vec_madd( m_v, v.m_v, ( vector float ){ -0.0f } );
+        return *this;
+    }
+
+    friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
+    {
+        return Vec4( vec_add( left.m_v, right.m_v ) );
+    }
+
+    friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
+    {
+        return Vec4( vec_sub( left.m_v, right.m_v ) );
+    }
+
+    friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
+    {
+        return Vec4( vec_madd( left.m_v, right.m_v, ( vector float ){ -0.0f } ) );
+    }
+
+    //! Returns a*b + c
+    friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
+    {
+        return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) );
+    }
+
+    //! Returns -( a*b - c )
+    friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
+    {
+        return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) );
+    }
+
+    friend Vec4 Reciprocal( Vec4::Arg v )
+    {
+        // get the reciprocal estimate
+        vector float estimate = vec_re( v.m_v );
+
+        // one round of Newton-Rhaphson refinement
+        vector float diff = vec_nmsub( estimate, v.m_v, ( vector float ){ 1.0f } );
+        return Vec4( vec_madd( diff, estimate, estimate ) );
+    }
+
+    friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
+    {
+        return Vec4( vec_min( left.m_v, right.m_v ) );
+    }
+
+    friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
+    {
+        return Vec4( vec_max( left.m_v, right.m_v ) );
+    }
+
+    friend Vec4 Truncate( Vec4::Arg v )
+    {
+        return Vec4( vec_trunc( v.m_v ) );
+    }
+
+    friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
+    {
+        return vec_any_lt( left.m_v, right.m_v ) != 0;
+    }
+
+private:
+    vector float m_v;
+};
+
+} // namespace squish
+
+#endif // ndef SQUISH_SIMD_VE_H
--- a/extern/libsquish-1.15/singlecolourfit.cpp
+++ b/extern/libsquish-1.15/singlecolourfit.cpp
@ -0,0 +1,172 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include "singlecolourfit.h"
+#include "colourset.h"
+#include "colourblock.h"
+
+namespace squish {
+
+struct SourceBlock
+{
+    u8 start;
+    u8 end;
+    u8 error;
+};
+
+struct SingleColourLookup
+{
+    SourceBlock sources[2];
+};
+
+#include "singlecolourlookup.inl"
+
+static int FloatToInt( float a, int limit )
+{
+    // use ANSI round-to-zero behaviour to get round-to-nearest
+    int i = ( int )( a + 0.5f );
+
+    // clamp to the limit
+    if( i < 0 )
+        i = 0;
+    else if( i > limit )
+        i = limit;
+
+    // done
+    return i;
+}
+
+SingleColourFit::SingleColourFit( ColourSet const* colours, int flags )
+  : ColourFit( colours, flags )
+{
+    // grab the single colour
+    Vec3 const* values = m_colours->GetPoints();
+    m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 );
+    m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 );
+    m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 );
+
+    // initialise the best error
+    m_besterror = INT_MAX;
+}
+
+void SingleColourFit::Compress3( void* block )
+{
+    // build the table of lookups
+    SingleColourLookup const* const lookups[] =
+    {
+        lookup_5_3,
+        lookup_6_3,
+        lookup_5_3
+    };
+
+    // find the best end-points and index
+    ComputeEndPoints( lookups );
+
+    // build the block if we win
+    if( m_error < m_besterror )
+    {
+        // remap the indices
+        u8 indices[16];
+        m_colours->RemapIndices( &m_index, indices );
+
+        // save the block
+        WriteColourBlock3( m_start, m_end, indices, block );
+
+        // save the error
+        m_besterror = m_error;
+    }
+}
+
+void SingleColourFit::Compress4( void* block )
+{
+    // build the table of lookups
+    SingleColourLookup const* const lookups[] =
+    {
+        lookup_5_4,
+        lookup_6_4,
+        lookup_5_4
+    };
+
+    // find the best end-points and index
+    ComputeEndPoints( lookups );
+
+    // build the block if we win
+    if( m_error < m_besterror )
+    {
+        // remap the indices
+        u8 indices[16];
+        m_colours->RemapIndices( &m_index, indices );
+
+        // save the block
+        WriteColourBlock4( m_start, m_end, indices, block );
+
+        // save the error
+        m_besterror = m_error;
+    }
+}
+
+void SingleColourFit::ComputeEndPoints( SingleColourLookup const* const* lookups )
+{
+    // check each index combination (endpoint or intermediate)
+    m_error = INT_MAX;
+    for( int index = 0; index < 2; ++index )
+    {
+        // check the error for this codebook index
+        SourceBlock const* sources[3];
+        int error = 0;
+        for( int channel = 0; channel < 3; ++channel )
+        {
+            // grab the lookup table and index for this channel
+            SingleColourLookup const* lookup = lookups[channel];
+            int target = m_colour[channel];
+
+            // store a pointer to the source for this channel
+            sources[channel] = lookup[target].sources + index;
+
+            // accumulate the error
+            int diff = sources[channel]->error;
+            error += diff*diff;
+        }
+
+        // keep it if the error is lower
+        if( error < m_error )
+        {
+            m_start = Vec3(
+                ( float )sources[0]->start/31.0f,
+                ( float )sources[1]->start/63.0f,
+                ( float )sources[2]->start/31.0f
+            );
+            m_end = Vec3(
+                ( float )sources[0]->end/31.0f,
+                ( float )sources[1]->end/63.0f,
+                ( float )sources[2]->end/31.0f
+            );
+            m_index = ( u8 )( 2*index );
+            m_error = error;
+        }
+    }
+}
+
+} // namespace squish
--- a/extern/libsquish-1.15/singlecolourfit.h
+++ b/extern/libsquish-1.15/singlecolourfit.h
@ -0,0 +1,58 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#ifndef SQUISH_SINGLECOLOURFIT_H
+#define SQUISH_SINGLECOLOURFIT_H
+
+#include "squish.h"
+#include "colourfit.h"
+
+namespace squish {
+
+class ColourSet;
+struct SingleColourLookup;
+
+class SingleColourFit : public ColourFit
+{
+public:
+    SingleColourFit( ColourSet const* colours, int flags );
+
+private:
+    virtual void Compress3( void* block );
+    virtual void Compress4( void* block );
+
+    void ComputeEndPoints( SingleColourLookup const* const* lookups );
+
+    u8 m_colour[3];
+    Vec3 m_start;
+    Vec3 m_end;
+    u8 m_index;
+    int m_error;
+    int m_besterror;
+};
+
+} // namespace squish
+
+#endif // ndef SQUISH_SINGLECOLOURFIT_H
--- a/extern/libsquish-1.15/singlecolourlookup.inl
+++ b/extern/libsquish-1.15/singlecolourlookup.inl
--- a/extern/libsquish-1.15/squish.cpp
+++ b/extern/libsquish-1.15/squish.cpp
@ -0,0 +1,403 @@
+/* -----------------------------------------------------------------------------
+
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+   -------------------------------------------------------------------------- */
+
+#include <string.h>
+#include "squish.h"
+#include "colourset.h"
+#include "maths.h"
+#include "rangefit.h"
+#include "clusterfit.h"
+#include "colourblock.h"
+#include "alpha.h"
+#include "singlecolourfit.h"
+
+namespace squish {
+
+static int FixFlags( int flags )
+{
+    // grab the flag bits
+    int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
+    int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
+    int extra = flags & kWeightColourByAlpha;
+
+    // set defaults
+    if ( method != kDxt3
+    &&   method != kDxt5
+    &&   method != kBc4
+    &&   method != kBc5 )
+    {
+        method = kDxt1;
+    }
+    if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
+        fit = kColourClusterFit;
+
+    // done
+    return method | fit | extra;
+}
+
+void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
+{
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
+    {
+        u8 alpha[16*4];
+        for( int i = 0; i < 16; ++i )
+        {
+            alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
+        }
+
+        u8* rBlock = reinterpret_cast< u8* >( block );
+        CompressAlphaDxt5( alpha, mask, rBlock );
+
+        if ( ( flags & ( kBc5 ) ) != 0 )
+        {
+            for( int i = 0; i < 16; ++i )
+            {
+                alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
+            }
+
+            u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
+            CompressAlphaDxt5( alpha, mask, gBlock );
+        }
+
+        return;
+    }
+
+    // get the block locations
+    void* colourBlock = block;
+    void* alphaBlock = block;
+    if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
+        colourBlock = reinterpret_cast< u8* >( block ) + 8;
+
+    // create the minimal point set
+    ColourSet colours( rgba, mask, flags );
+
+    // check the compression type and compress colour
+    if( colours.GetCount() == 1 )
+    {
+        // always do a single colour fit
+        SingleColourFit fit( &colours, flags );
+        fit.Compress( colourBlock );
+    }
+    else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
+    {
+        // do a range fit
+        RangeFit fit( &colours, flags, metric );
+        fit.Compress( colourBlock );
+    }
+    else
+    {
+        // default to a cluster fit (could be iterative or not)
+        ClusterFit fit( &colours, flags, metric );
+        fit.Compress( colourBlock );
+    }
+
+    // compress alpha separately if necessary
+    if( ( flags & kDxt3 ) != 0 )
+        CompressAlphaDxt3( rgba, mask, alphaBlock );
+    else if( ( flags & kDxt5 ) != 0 )
+        CompressAlphaDxt5( rgba, mask, alphaBlock );
+}
+
+void Decompress( u8* rgba, void const* block, int flags )
+{
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    // get the block locations
+    void const* colourBlock = block;
+    void const* alphaBlock = block;
+    if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
+        colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
+
+    // decompress colour
+    DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
+
+    // decompress alpha separately if necessary
+    if( ( flags & kDxt3 ) != 0 )
+        DecompressAlphaDxt3( rgba, alphaBlock );
+    else if( ( flags & kDxt5 ) != 0 )
+        DecompressAlphaDxt5( rgba, alphaBlock );
+}
+
+int GetStorageRequirements( int width, int height, int flags )
+{
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    // compute the storage requirements
+    int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
+    int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+    return blockcount*blocksize;
+}
+
+void CopyRGBA( u8 const* source, u8* dest, int flags )
+{
+    if (flags & kSourceBGRA)
+    {
+        // convert from bgra to rgba
+        dest[0] = source[2];
+        dest[1] = source[1];
+        dest[2] = source[0];
+        dest[3] = source[3];
+    }
+    else
+    {
+        for( int i = 0; i < 4; ++i )
+            *dest++ = *source++;
+    }
+}
+
+void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
+{
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    // loop over blocks
+#ifdef SQUISH_USE_OPENMP
+#   pragma omp parallel for
+#endif
+    for( int y = 0; y < height; y += 4 )
+    {
+        // initialise the block output
+        u8* targetBlock = reinterpret_cast< u8* >( blocks );
+        int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+        targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
+
+        for( int x = 0; x < width; x += 4 )
+        {
+            // build the 4x4 block of pixels
+            u8 sourceRgba[16*4];
+            u8* targetPixel = sourceRgba;
+            int mask = 0;
+            for( int py = 0; py < 4; ++py )
+            {
+                for( int px = 0; px < 4; ++px )
+                {
+                    // get the source pixel in the image
+                    int sx = x + px;
+                    int sy = y + py;
+
+                    // enable if we're in the image
+                    if( sx < width && sy < height )
+                    {
+                        // copy the rgba value
+                        u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
+                        CopyRGBA(sourcePixel, targetPixel, flags);
+                        // enable this pixel
+                        mask |= ( 1 << ( 4*py + px ) );
+                    }
+
+                    // advance to the next pixel
+                    targetPixel += 4;
+                }
+            }
+
+            // compress it into the output
+            CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
+
+            // advance
+            targetBlock += bytesPerBlock;
+        }
+    }
+}
+
+void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
+{
+    CompressImage(rgba, width, height, width*4, blocks, flags, metric);
+}
+
+void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
+{
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    // loop over blocks
+#ifdef SQUISH_USE_OPENMP
+#   pragma omp parallel for
+#endif
+    for( int y = 0; y < height; y += 4 )
+    {
+        // initialise the block input
+        u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
+        int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+        sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
+
+        for( int x = 0; x < width; x += 4 )
+        {
+            // decompress the block
+            u8 targetRgba[4*16];
+            Decompress( targetRgba, sourceBlock, flags );
+
+            // write the decompressed pixels to the correct image locations
+            u8 const* sourcePixel = targetRgba;
+            for( int py = 0; py < 4; ++py )
+            {
+                for( int px = 0; px < 4; ++px )
+                {
+                    // get the target location
+                    int sx = x + px;
+                    int sy = y + py;
+
+                    // write if we're in the image
+                    if( sx < width && sy < height )
+                    {
+                        // copy the rgba value
+                        u8* targetPixel = rgba + pitch*sy + 4*sx;
+                        CopyRGBA(sourcePixel, targetPixel, flags);
+                    }
+
+                    // advance to the next pixel
+                    sourcePixel += 4;
+                }
+            }
+
+            // advance
+            sourceBlock += bytesPerBlock;
+        }
+    }
+}
+
+void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
+{
+    DecompressImage( rgba, width, height, width*4, blocks, flags );
+}
+
+static double ErrorSq(double x, double y)
+{
+    return (x - y) * (x - y);
+}
+
+static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
+{
+    // Computes the MSE for the block and weights it by the variance of the original block.
+    // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
+    // then the block is close to being a single colour. Quantisation errors in single colour blocks
+    // are easier to see than similar errors in blocks that contain more colours, particularly when there
+    // are many such blocks in a large area (eg a blue sky background) as they cause banding.  Given that
+    // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
+    // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
+    // than images with lots of detail.
+
+    cmse = amse = 0;
+    unsigned int sum_p[4];  // per channel sum of pixels
+    unsigned int sum_p2[4]; // per channel sum of pixels squared
+    memset(sum_p, 0, sizeof(sum_p));
+    memset(sum_p2, 0, sizeof(sum_p2));
+    for( unsigned int py = 0; py < 4; ++py )
+    {
+        for( unsigned int px = 0; px < 4; ++px )
+        {
+            if( px < w && py < h )
+            {
+                double pixelCMSE = 0;
+                for( int i = 0; i < 3; ++i )
+                {
+                    pixelCMSE += ErrorSq(original[i], compressed[i]);
+                    sum_p[i] += original[i];
+                    sum_p2[i] += (unsigned int)original[i]*original[i];
+                }
+                if( original[3] == 0 && compressed[3] == 0 )
+                    pixelCMSE = 0; // transparent in both, so colour is inconsequential
+                amse += ErrorSq(original[3], compressed[3]);
+                cmse += pixelCMSE;
+                sum_p[3] += original[3];
+                sum_p2[3] += (unsigned int)original[3]*original[3];
+            }
+            original += 4;
+            compressed += 4;
+        }
+    }
+    unsigned int variance = 0;
+    for( int i = 0; i < 4; ++i )
+        variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
+    if( variance < 4 * w * w * h * h )
+    {
+        amse *= 5;
+        cmse *= 5;
+    }
+}
+
+void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
+{
+    // fix any bad flags
+    flags = FixFlags( flags );
+    colourMSE = alphaMSE = 0;
+
+    // initialise the block input
+    squish::u8 const* sourceBlock = dxt;
+    int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
+
+    // loop over blocks
+    for( int y = 0; y < height; y += 4 )
+    {
+        for( int x = 0; x < width; x += 4 )
+        {
+            // decompress the block
+            u8 targetRgba[4*16];
+            Decompress( targetRgba, sourceBlock, flags );
+            u8 const* sourcePixel = targetRgba;
+
+            // copy across to a similar pixel block
+            u8 originalRgba[4*16];
+            u8* originalPixel = originalRgba;
+
+            for( int py = 0; py < 4; ++py )
+            {
+                for( int px = 0; px < 4; ++px )
+                {
+                    int sx = x + px;
+                    int sy = y + py;
+                    if( sx < width && sy < height )
+                    {
+                        u8 const* targetPixel = rgba + pitch*sy + 4*sx;
+                        CopyRGBA(targetPixel, originalPixel, flags);
+                    }
+                    sourcePixel += 4;
+                    originalPixel += 4;
+                }
+            }
+
+            // compute the weighted MSE of the block
+            double blockCMSE, blockAMSE;
+            ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
+            colourMSE += blockCMSE;
+            alphaMSE += blockAMSE;
+            // advance
+            sourceBlock += bytesPerBlock;
+        }
+    }
+    colourMSE /= (width * height * 3);
+    alphaMSE /= (width * height);
+}
+
+void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
+{
+    ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
+}
+
+} // namespace squish
--- a/src/nvtt/tests/CMakeLists.txt
+++ b/src/nvtt/tests/CMakeLists.txt
@ -28,6 +28,9 @@ TARGET_LINK_LIBRARIES(cubemaptest nvcore nvmath nvimage nvtt)
 ADD_EXECUTABLE(nvhdrtest hdrtest.cpp)
 TARGET_LINK_LIBRARIES(nvhdrtest nvcore nvimage nvtt bc6h nvmath)

+ADD_EXECUTABLE(bc1enc bc1enc.cpp)
+TARGET_LINK_LIBRARIES(bc1enc nvcore nvimage nvmath nvtt squish CMP_Core)
+
 INSTALL(TARGETS nvtestsuite nvhdrtest DESTINATION bin)
 
 #include_directories("/usr/include/ffmpeg/")