From 9a16bebf8f87b8cbf19c0e6fec6faa73e108b09a Mon Sep 17 00:00:00 2001 From: Ignacio Date: Mon, 23 Mar 2020 10:07:38 -0700 Subject: [PATCH] Add external libs for comparisons and benchmarks. --- extern/CMP_Core/CMP_Core.def | 56 + extern/CMP_Core/CMakeLists.txt | 33 + extern/CMP_Core/shaders/BC1_Encode_kernel.cpp | 582 ++ extern/CMP_Core/shaders/BC1_Encode_kernel.h | 48 + extern/CMP_Core/shaders/BC2_Encode_kernel.cpp | 261 + extern/CMP_Core/shaders/BC2_Encode_kernel.h | 34 + extern/CMP_Core/shaders/BC3_Encode_kernel.cpp | 218 + extern/CMP_Core/shaders/BC3_Encode_kernel.h | 31 + extern/CMP_Core/shaders/BC4_Encode_kernel.cpp | 200 + extern/CMP_Core/shaders/BC4_Encode_kernel.h | 31 + extern/CMP_Core/shaders/BC5_Encode_kernel.cpp | 264 + extern/CMP_Core/shaders/BC5_Encode_kernel.h | 31 + extern/CMP_Core/shaders/BC6_Encode_kernel.cpp | 3990 ++++++++++++ extern/CMP_Core/shaders/BC6_Encode_kernel.h | 480 ++ extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp | 5489 +++++++++++++++++ extern/CMP_Core/shaders/BC7_Encode_Kernel.h | 1580 +++++ extern/CMP_Core/shaders/BCn_Common_Kernel.h | 2360 +++++++ extern/CMP_Core/shaders/Common_Def.h | 300 + extern/CMP_Core/shaders/CopyFiles.bat | 50 + extern/CMP_Core/source/CMP_Core.h | 153 + extern/CMP_Core/source/cmp_math_vec4.h | 417 ++ extern/CMP_Core/test/BlockConstants.h | 228 + extern/CMP_Core/test/CMakeLists.txt | 13 + extern/CMP_Core/test/CompressonatorTests.cpp | 1143 ++++ extern/CMP_Core/test/CompressonatorTests.h | 6 + extern/CMP_Core/test/TestsMain.cpp | 10 + extern/CMakeLists.txt | 6 +- extern/libsquish-1.15/CMakeLists.txt | 117 + .../CMakeModules/FindlibSquish.cmake | 14 + extern/libsquish-1.15/ChangeLog.txt | 66 + extern/libsquish-1.15/Doxyfile | 214 + extern/libsquish-1.15/LICENSE.txt | 20 + extern/libsquish-1.15/Makefile | 65 + extern/libsquish-1.15/README.txt | 18 + extern/libsquish-1.15/alpha.cpp | 350 ++ extern/libsquish-1.15/alpha.h | 41 + extern/libsquish-1.15/clusterfit.cpp | 392 ++ extern/libsquish-1.15/clusterfit.h | 61 + extern/libsquish-1.15/colourblock.cpp | 214 + extern/libsquish-1.15/colourblock.h | 41 + extern/libsquish-1.15/colourfit.cpp | 54 + extern/libsquish-1.15/colourfit.h | 56 + extern/libsquish-1.15/colourset.cpp | 121 + extern/libsquish-1.15/colourset.h | 58 + extern/libsquish-1.15/config | 38 + extern/libsquish-1.15/config.h | 49 + extern/libsquish-1.15/extra/squishgen.cpp | 151 + extern/libsquish-1.15/extra/squishpng.cpp | 546 ++ extern/libsquish-1.15/extra/squishtest.cpp | 206 + extern/libsquish-1.15/libSquish.png | Bin 0 -> 17907 bytes extern/libsquish-1.15/libSquish.pri | 26 + extern/libsquish-1.15/libSquish.pro | 32 + extern/libsquish-1.15/libSquish.svg | 238 + extern/libsquish-1.15/libsquish.pc.in | 13 + extern/libsquish-1.15/maths.cpp | 259 + extern/libsquish-1.15/maths.h | 233 + extern/libsquish-1.15/rangefit.cpp | 201 + extern/libsquish-1.15/rangefit.h | 54 + extern/libsquish-1.15/simd.h | 40 + extern/libsquish-1.15/simd_float.h | 183 + extern/libsquish-1.15/simd_sse.h | 180 + extern/libsquish-1.15/simd_ve.h | 166 + extern/libsquish-1.15/singlecolourfit.cpp | 172 + extern/libsquish-1.15/singlecolourfit.h | 58 + extern/libsquish-1.15/singlecolourlookup.inl | 1064 ++++ extern/libsquish-1.15/squish.cpp | 403 ++ src/nvtt/tests/CMakeLists.txt | 3 + 67 files changed, 24230 insertions(+), 1 deletion(-) create mode 100644 extern/CMP_Core/CMP_Core.def create mode 100644 extern/CMP_Core/CMakeLists.txt create mode 100644 extern/CMP_Core/shaders/BC1_Encode_kernel.cpp create mode 100644 extern/CMP_Core/shaders/BC1_Encode_kernel.h create mode 100644 extern/CMP_Core/shaders/BC2_Encode_kernel.cpp create mode 100644 extern/CMP_Core/shaders/BC2_Encode_kernel.h create mode 100644 extern/CMP_Core/shaders/BC3_Encode_kernel.cpp create mode 100644 extern/CMP_Core/shaders/BC3_Encode_kernel.h create mode 100644 extern/CMP_Core/shaders/BC4_Encode_kernel.cpp create mode 100644 extern/CMP_Core/shaders/BC4_Encode_kernel.h create mode 100644 extern/CMP_Core/shaders/BC5_Encode_kernel.cpp create mode 100644 extern/CMP_Core/shaders/BC5_Encode_kernel.h create mode 100644 extern/CMP_Core/shaders/BC6_Encode_kernel.cpp create mode 100644 extern/CMP_Core/shaders/BC6_Encode_kernel.h create mode 100644 extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp create mode 100644 extern/CMP_Core/shaders/BC7_Encode_Kernel.h create mode 100644 extern/CMP_Core/shaders/BCn_Common_Kernel.h create mode 100644 extern/CMP_Core/shaders/Common_Def.h create mode 100644 extern/CMP_Core/shaders/CopyFiles.bat create mode 100644 extern/CMP_Core/source/CMP_Core.h create mode 100644 extern/CMP_Core/source/cmp_math_vec4.h create mode 100644 extern/CMP_Core/test/BlockConstants.h create mode 100644 extern/CMP_Core/test/CMakeLists.txt create mode 100644 extern/CMP_Core/test/CompressonatorTests.cpp create mode 100644 extern/CMP_Core/test/CompressonatorTests.h create mode 100644 extern/CMP_Core/test/TestsMain.cpp create mode 100644 extern/libsquish-1.15/CMakeLists.txt create mode 100644 extern/libsquish-1.15/CMakeModules/FindlibSquish.cmake create mode 100644 extern/libsquish-1.15/ChangeLog.txt create mode 100644 extern/libsquish-1.15/Doxyfile create mode 100644 extern/libsquish-1.15/LICENSE.txt create mode 100644 extern/libsquish-1.15/Makefile create mode 100644 extern/libsquish-1.15/README.txt create mode 100644 extern/libsquish-1.15/alpha.cpp create mode 100644 extern/libsquish-1.15/alpha.h create mode 100644 extern/libsquish-1.15/clusterfit.cpp create mode 100644 extern/libsquish-1.15/clusterfit.h create mode 100644 extern/libsquish-1.15/colourblock.cpp create mode 100644 extern/libsquish-1.15/colourblock.h create mode 100644 extern/libsquish-1.15/colourfit.cpp create mode 100644 extern/libsquish-1.15/colourfit.h create mode 100644 extern/libsquish-1.15/colourset.cpp create mode 100644 extern/libsquish-1.15/colourset.h create mode 100644 extern/libsquish-1.15/config create mode 100644 extern/libsquish-1.15/config.h create mode 100644 extern/libsquish-1.15/extra/squishgen.cpp create mode 100644 extern/libsquish-1.15/extra/squishpng.cpp create mode 100644 extern/libsquish-1.15/extra/squishtest.cpp create mode 100644 extern/libsquish-1.15/libSquish.png create mode 100644 extern/libsquish-1.15/libSquish.pri create mode 100644 extern/libsquish-1.15/libSquish.pro create mode 100644 extern/libsquish-1.15/libSquish.svg create mode 100644 extern/libsquish-1.15/libsquish.pc.in create mode 100644 extern/libsquish-1.15/maths.cpp create mode 100644 extern/libsquish-1.15/maths.h create mode 100644 extern/libsquish-1.15/rangefit.cpp create mode 100644 extern/libsquish-1.15/rangefit.h create mode 100644 extern/libsquish-1.15/simd.h create mode 100644 extern/libsquish-1.15/simd_float.h create mode 100644 extern/libsquish-1.15/simd_sse.h create mode 100644 extern/libsquish-1.15/simd_ve.h create mode 100644 extern/libsquish-1.15/singlecolourfit.cpp create mode 100644 extern/libsquish-1.15/singlecolourfit.h create mode 100644 extern/libsquish-1.15/singlecolourlookup.inl create mode 100644 extern/libsquish-1.15/squish.cpp diff --git a/extern/CMP_Core/CMP_Core.def b/extern/CMP_Core/CMP_Core.def new file mode 100644 index 0000000..baa5bc1 --- /dev/null +++ b/extern/CMP_Core/CMP_Core.def @@ -0,0 +1,56 @@ +; Core def : Declares the module parameters for the DLL. + +EXPORTS +CreateOptionsBC1 +CreateOptionsBC2 +CreateOptionsBC3 +CreateOptionsBC4 +CreateOptionsBC5 +CreateOptionsBC6 +CreateOptionsBC7 + +DestroyOptionsBC1 +DestroyOptionsBC2 +DestroyOptionsBC3 +DestroyOptionsBC4 +DestroyOptionsBC5 +DestroyOptionsBC6 +DestroyOptionsBC7 + +SetDecodeChannelMapping + +SetChannelWeightsBC1 +SetChannelWeightsBC2 +SetChannelWeightsBC3 + +SetQualityBC1 +SetQualityBC2 +SetQualityBC3 +SetQualityBC4 +SetQualityBC5 +SetQualityBC6 +SetQualityBC7 + +SetAlphaThresholdBC1 + +SetMaskBC6 +SetMaskBC7 + +SetErrorThresholdBC7 +SetAlphaOptionsBC7 + +CompressBlockBC1 +CompressBlockBC2 +CompressBlockBC3 +CompressBlockBC4 +CompressBlockBC5 +CompressBlockBC6 +CompressBlockBC7 + +DecompressBlockBC1 +DecompressBlockBC2 +DecompressBlockBC3 +DecompressBlockBC4 +DecompressBlockBC5 +DecompressBlockBC6 +DecompressBlockBC7 diff --git a/extern/CMP_Core/CMakeLists.txt b/extern/CMP_Core/CMakeLists.txt new file mode 100644 index 0000000..e89ea3d --- /dev/null +++ b/extern/CMP_Core/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_minimum_required(VERSION 3.10) + +add_library(CMP_Core STATIC "") + +target_sources(CMP_Core + PRIVATE + shaders/BC1_Encode_kernel.h + shaders/BC1_Encode_kernel.cpp + shaders/BC2_Encode_kernel.h + shaders/BC2_Encode_kernel.cpp + shaders/BC3_Encode_kernel.h + shaders/BC3_Encode_kernel.cpp + shaders/BC4_Encode_kernel.h + shaders/BC4_Encode_kernel.cpp + shaders/BC5_Encode_kernel.h + shaders/BC5_Encode_kernel.cpp + shaders/BC6_Encode_kernel.h + shaders/BC6_Encode_kernel.cpp + shaders/BC7_Encode_Kernel.h + shaders/BC7_Encode_Kernel.cpp + shaders/BCn_Common_Kernel.h + shaders/Common_Def.h + ) + +target_include_directories(CMP_Core + PRIVATE + shaders + source) +#add_subdirectory(test) + +if (UNIX) +target_compile_definitions(CMP_Core PRIVATE _LINUX ASPM_GPU) +endif() diff --git a/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp new file mode 100644 index 0000000..4c68e42 --- /dev/null +++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp @@ -0,0 +1,582 @@ +//===================================================================== +// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#include "BC1_Encode_kernel.h" + +//============================================== BC1 INTERFACES ======================================================= +void CompressBlockBC1_Fast( + CMP_Vec4uc srcBlockTemp[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[2]) +{ + int i, k; + + CMP_Vec3f rgb; + CMP_Vec3f average_rgb; // The centrepoint of the axis + CMP_Vec3f v_rgb; // The axis + CMP_Vec3f uniques[16]; // The list of unique colours + int unique_pixels; // The number of unique pixels + CGU_FLOAT unique_recip; // Reciprocal of the above for fast multiplication + int index_map[16]; // The map of source pixels to unique indices + + CGU_FLOAT pos_on_axis[16]; // The distance each unique falls along the compression axis + CGU_FLOAT dist_from_axis[16]; // The distance each unique falls from the compression axis + CGU_FLOAT left = 0, right = 0, centre = 0; // The extremities and centre (average of left/right) of uniques along the compression axis + CGU_FLOAT axis_mapping_error = 0; // The total computed error in mapping pixels to the axis + + int swap; // Indicator if the RGB values need swapping to generate an opaque result + + // ------------------------------------------------------------------------------------- + // (3) Find the array of unique pixel values and sum them to find their average position + // ------------------------------------------------------------------------------------- + { + // Find the array of unique pixel values and sum them to find their average position + int current_pixel, firstdiff; + current_pixel = unique_pixels = 0; + average_rgb = 0.0f; + firstdiff = -1; + for (i = 0; i<16; i++) + { + for (k = 0; k 0) { rg_pos += rgb.y; rb_pos += rgb.z; } + if (rgb.z > 0) bg_pos += rgb.y; + } + v_rgb = v_rgb*unique_recip; + if (rg_pos < 0) v_rgb.x = -v_rgb.x; + if (bg_pos < 0) v_rgb.z = -v_rgb.z; + if ((rg_pos == bg_pos) && (rg_pos == 0)) + if (rb_pos < 0) v_rgb.z = -v_rgb.z; + } + + // ------------------------------------------------------------------------------------- + // (5) Axis projection and remapping + // ------------------------------------------------------------------------------------- + { + CGU_FLOAT v2_recip; + // Normalise the axis for simplicity of future calculation + v2_recip = (v_rgb.x*v_rgb.x + v_rgb.y*v_rgb.y + v_rgb.z*v_rgb.z); + if (v2_recip > 0) + v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip); + else + v2_recip = 1.0f; + v_rgb = v_rgb*v2_recip; + } + + // ------------------------------------------------------------------------------------- + // (6) Map the axis + // ------------------------------------------------------------------------------------- + // the line joining (and extended on either side of) average and axis + // defines the axis onto which the points will be projected + // Project all the points onto the axis, calculate the distance along + // the axis from the centre of the axis (average) + // From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is + // P + ((R-P).v) / (v.v))v + // The distance along v is therefore (R-P).v / (v.v) + // (v.v) is 1 if v is a unit vector. + // + // Calculate the extremities at the same time - these need to be reasonably accurately + // represented in all cases + // + // In this first calculation, also find the error of mapping the points to the axis - this + // is our major indicator of whether or not the block has compressed well - if the points + // map well onto the axis then most of the noise introduced is high-frequency noise + { + left = 10000.0f; + right = -10000.0f; + axis_mapping_error = 0; + for (i = 0; i < unique_pixels; i++) + { + // Compute the distance along the axis of the point of closest approach + CMP_Vec3f temp = (uniques[i] - average_rgb); + pos_on_axis[i] = (temp.x * v_rgb.x) + (temp.y * v_rgb.y) + (temp.z * v_rgb.z); + + // Compute the actual point and thence the mapping error + rgb = uniques[i] - (average_rgb + (v_rgb * pos_on_axis[i])); + dist_from_axis[i] = rgb.x*rgb.x + rgb.y*rgb.y + rgb.z*rgb.z; + axis_mapping_error += dist_from_axis[i]; + + // Work out the extremities + if (pos_on_axis[i] < left) + left = pos_on_axis[i]; + if (pos_on_axis[i] > right) + right = pos_on_axis[i]; + } + } + + // ------------------------------------------------------------------------------------- + // (7) Now we have a good axis and the basic information about how the points are mapped + // to it + // Our initial guess is to represent the endpoints accurately, by moving the average + // to the centre and recalculating the point positions along the line + // ------------------------------------------------------------------------------------- + { + centre = (left + right) / 2; + average_rgb = average_rgb + (v_rgb*centre); + for (i = 0; i> 3); + + rgb = average_rgb + (v_rgb * right); + rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z); + gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z); + bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z); + ROUND_AND_CLAMP(rd, 5); + ROUND_AND_CLAMP(gd, 6); + ROUND_AND_CLAMP(bd, 5); + c1 = (((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3)); + + // Force to be a 4-colour opaque block - in which case, c0 is greater than c1 + // blocktype == 4 + { + if (c0 < c1) + { + t = c0; + c0 = c1; + c1 = t; + swap = 1; + } + else if (c0 == c1) + { + // This block will always be encoded in 3-colour mode + // Need to ensure that only one of the two points gets used, + // avoiding accidentally setting some transparent pixels into the block + for (i = 0; i average) are 0 and 1, while + // interpolants are 2 and 3 + if (fabs(rgb.z) >= division) + bit = 0; + else + bit = 2; + // Positive is in the latter half of the block + if (rgb.z >= centre) + bit += 1; + // Set the output, taking swapping into account + compressedBlock[1] |= ((bit^swap) << (2 * i)); + + // Average the X and Y locations for each cluster + cluster_x[bit] += (CGU_FLOAT)(i & 3); + cluster_y[bit] += (CGU_FLOAT)(i >> 2); + cluster_count[bit]++; + } + + for (i = 0; i<4; i++) + { + CGU_FLOAT cr; + if (cluster_count[i]) + { + cr = 1.0f / cluster_count[i]; + cluster_x[i] *= cr; + cluster_y[i] *= cr; + } + else + { + cluster_x[i] = cluster_y[i] = -1; + } + } + + // patterns in axis position detection + // (same algorithm as used in the SSE version) + if ((compressedBlock[0] & 0xffff) != (compressedBlock[0] >> 16)) + { + CGU_UINT32 i1, k1; + CGU_UINT32 x = 0, y = 0; + int xstep = 0, ystep = 0; + + // Find a corner to search from + for (k1 = 0; k1<4; k1++) + { + switch (k1) + { + case 0: + x = 0; y = 0; xstep = 1; ystep = 1; + break; + case 1: + x = 0; y = 3; xstep = 1; ystep = -1; + break; + case 2: + x = 3; y = 0; xstep = -1; ystep = 1; + break; + case 3: + x = 3; y = 3; xstep = -1; ystep = -1; + break; + } + + for (i1 = 0; i1<4; i1++) + { + if ((POS(x, y + ystep*i1) < POS(x + xstep, y + ystep*i1)) || + (POS(x + xstep, y + ystep*i1) < POS(x + 2 * xstep, y + ystep*i1)) || + (POS(x + 2 * xstep, y + ystep*i1) < POS(x + 3 * xstep, y + ystep*i1)) + ) + break; + if ((POS(x + xstep*i1, y) < POS(x + xstep*i1, y + ystep)) || + (POS(x + xstep*i1, y + ystep) < POS(x + xstep*i1, y + 2 * ystep)) || + (POS(x + xstep*i1, y + 2 * ystep) < POS(x + xstep*i1, y + 3 * ystep)) + ) + break; + } + if (i1 == 4) + break; + } + } + } + + } + // done +} + +INLINE void store_uint8(CMP_GLOBAL CGU_UINT8 u_dstptr[8], CGU_UINT32 data[2]) +{ + int shift = 0; + for (CGU_INT k=0; k<4; k++) + { + u_dstptr[k] = (data[0] >> shift)&0xFF; + shift += 8; + } + shift = 0; + for (CGU_INT k=4; k<8; k++) + { + u_dstptr[k] = (data[1] >> shift)&0xFF; + shift += 8; + } +} + +void CompressBlockBC1_Internal( + const CMP_Vec4uc srcBlockTemp[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[2], + CMP_GLOBAL const CMP_BC15Options *BC15options) +{ + CGU_UINT8 blkindex = 0; + CGU_UINT8 srcindex = 0; + CGU_UINT8 rgbBlock[64]; + for ( CGU_INT32 j = 0; j < 4; j++) { + for ( CGU_INT32 i = 0; i < 4; i++) { + rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B + rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G + rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R + rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A + srcindex++; + } + } + + CMP_BC15Options internalOptions = *BC15options; + CalculateColourWeightings(rgbBlock, &internalOptions); + + CompressRGBBlock(rgbBlock, + compressedBlock, + &internalOptions, + TRUE, + FALSE, + internalOptions.m_nAlphaThreshold); +} + +//============================================== USER INTERFACES ======================================================== +#ifndef ASPM_GPU +int CMP_CDECL CreateOptionsBC1(void **options) +{ + CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options; + if (BC15optionsDefault) { + SetDefaultBC15Options(BC15optionsDefault); + (*options) = BC15optionsDefault; + } + else { + (*options) = NULL; + return CGU_CORE_ERR_NEWMEM; + } + return CGU_CORE_OK; +} + +int CMP_CDECL DestroyOptionsBC1(void *options) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BCOptions = reinterpret_cast (options); + delete BCOptions; + return CGU_CORE_OK; +} + +int CMP_CDECL SetQualityBC1(void *options, + CGU_FLOAT fquality) +{ + if (!options) return CGU_CORE_ERR_NEWMEM; + CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); + if (fquality < 0.0f) fquality = 0.0f; + else + if (fquality > 1.0f) fquality = 1.0f; + BC15optionsDefault->m_fquality = fquality; + return CGU_CORE_OK; +} + + +int CMP_CDECL SetAlphaThresholdBC1(void *options, + CGU_UINT8 alphaThreshold) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); + BC15optionsDefault->m_nAlphaThreshold = alphaThreshold; + return CGU_CORE_OK; +} + +int CMP_CDECL SetDecodeChannelMapping(void *options, + CGU_BOOL mapRGBA) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); + BC15optionsDefault->m_mapDecodeRGBA = mapRGBA; + return CGU_CORE_OK; +} + +int CMP_CDECL SetChannelWeightsBC1(void *options, + CGU_FLOAT WeightRed, + CGU_FLOAT WeightGreen, + CGU_FLOAT WeightBlue) { + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options; + + if ((WeightRed < 0.0f) || (WeightRed > 1.0f)) return CGU_CORE_ERR_RANGERED; + if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f)) return CGU_CORE_ERR_RANGEGREEN; + if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f)) return CGU_CORE_ERR_RANGEBLUE; + + BC15optionsDefault->m_bUseChannelWeighting = true; + BC15optionsDefault->m_fChannelWeights[0] = WeightRed; + BC15optionsDefault->m_fChannelWeights[1] = WeightGreen; + BC15optionsDefault->m_fChannelWeights[2] = WeightBlue; + return CGU_CORE_OK; +} + +int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock, + unsigned int srcStrideInBytes, + CMP_GLOBAL unsigned char cmpBlock[8], + const void *options = NULL) { + CMP_Vec4uc inBlock[16]; + + //---------------------------------- + // Fill the inBlock with source data + //---------------------------------- + CGU_INT srcpos = 0; + CGU_INT dstptr = 0; + for (CGU_UINT8 row=0; row < 4; row++) + { + srcpos = row * srcStrideInBytes; + for (CGU_UINT8 col = 0; col < 4; col++) + { + inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]); + dstptr++; + } + } + + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) + { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + + CompressBlockBC1_Internal(inBlock, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options); + return CGU_CORE_OK; +} + +int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8], + CMP_GLOBAL unsigned char srcBlock[64], + const void *options = NULL) { + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) + { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + DecompressDXTRGB_Internal(srcBlock, ( CGU_UINT32 *)cmpBlock, BC15options); + + + return CGU_CORE_OK; +} +#endif + +//============================================== OpenCL USER INTERFACE ======================================================== +#ifdef ASPM_GPU +CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( + CMP_GLOBAL const CMP_Vec4uc* ImageSource, + CMP_GLOBAL CGU_UINT8* ImageDestination, + CMP_GLOBAL Source_Info* SourceInfo, + CMP_GLOBAL CMP_BC15Options* BC15options +) +{ + CGU_UINT32 xID; + CGU_UINT32 yID; + +//printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality); +#ifdef ASPM_GPU + xID = get_global_id(0); + yID = get_global_id(1); +#else + xID = 0; + yID = 0; +#endif + + if (xID >= (SourceInfo->m_src_width / BlockX)) return; + if (yID >= (SourceInfo->m_src_height / BlockX)) return; + int srcWidth = SourceInfo->m_src_width; + + CGU_UINT32 destI = (xID*BC1CompBlockSize) + (yID*(srcWidth / BlockX)*BC1CompBlockSize); + int srcindex = 4 * (yID * srcWidth + xID); + int blkindex = 0; + CMP_Vec4uc srcData[16]; + srcWidth = srcWidth - 4; + + for ( CGU_INT32 j = 0; j < 4; j++) { + for ( CGU_INT32 i = 0; i < 4; i++) { + srcData[blkindex++] = ImageSource[srcindex++]; + } + srcindex += srcWidth; + } + + // fast low quality mode that matches v3.1 code + if (SourceInfo->m_fquality <= 0.04f) + CompressBlockBC1_Fast(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI]); + else + CompressBlockBC1_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options); +} +#endif diff --git a/extern/CMP_Core/shaders/BC1_Encode_kernel.h b/extern/CMP_Core/shaders/BC1_Encode_kernel.h new file mode 100644 index 0000000..73a0acf --- /dev/null +++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.h @@ -0,0 +1,48 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef BC1_ENCODE_KERNEL_H +#define BC1_ENCODE_KERNEL_H + +#include "Common_Def.h" +#include "BCn_Common_Kernel.h" + +#define CS_RED(r, g, b) (r) +#define CS_GREEN(r, g, b) (g) +#define CS_BLUE(r, g, b) ((b+g)*0.5f) +#define DCS_RED(r, g, b) (r) +#define DCS_GREEN(r, g, b) (g) +#define DCS_BLUE(r, g, b) ((2.0f*b)-g) +#define BYTEPP 4 +#define BC1CompBlockSize 8 + + +#define ROUND_AND_CLAMP(v, shift) \ +{\ + if (v < 0) v = 0;\ + else if (v > 255) v = 255;\ + else v += (0x80>>shift) - (v>>shift);\ +} + +#define POS(x,y) (pos_on_axis[(x)+(y)*4]) + +#endif \ No newline at end of file diff --git a/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp new file mode 100644 index 0000000..a8b355b --- /dev/null +++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp @@ -0,0 +1,261 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#include "BC2_Encode_kernel.h" + +//============================================== BC2 INTERFACES ======================================================= + +void DXTCV11CompressExplicitAlphaBlock(const CGU_UINT8 block_8[16], CMP_GLOBAL CGU_UINT32 block_dxtc[2]) +{ + CGU_UINT8 i; + block_dxtc[0] = block_dxtc[1] = 0; + for (i = 0; i < 16; i++) + { + int v = block_8[i]; + v = (v + 7 - (v >> 4)); + v >>= 4; + if (v < 0) + v = 0; + if (v > 0xf) + v = 0xf; + if (i < 8) + block_dxtc[0] |= v << (4 * i); + else + block_dxtc[1] |= v << (4 * (i - 8)); + } +} + +#define EXPLICIT_ALPHA_PIXEL_MASK 0xf +#define EXPLICIT_ALPHA_PIXEL_BPP 4 + +CGU_INT CompressExplicitAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], + CMP_GLOBAL CGU_UINT32 compressedBlock[2]) +{ + DXTCV11CompressExplicitAlphaBlock(alphaBlock, compressedBlock); + return CGU_CORE_OK; +} + +void CompressBlockBC2_Internal(const CMP_Vec4uc srcBlockTemp[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[4], + CMP_GLOBAL const CMP_BC15Options *BC15options) +{ + CGU_UINT8 blkindex = 0; + CGU_UINT8 srcindex = 0; + CGU_UINT8 rgbaBlock[64]; + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B + rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G + rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R + rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A + srcindex++; + } + } + + CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; + for (CGU_INT32 i = 0; i < 16; i++) + alphaBlock[i] = (CGU_UINT8)(((CGU_INT32*)rgbaBlock)[i] >> RGBA8888_OFFSET_A); + + // Need a copy, as CalculateColourWeightings sets variables in the BC15options + CMP_BC15Options internalOptions = *BC15options; + CalculateColourWeightings(rgbaBlock, &internalOptions); + + CGU_INT err = CompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); + if (err != 0) + return; + + CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,FALSE,FALSE,0); +} + +//============================================== USER INTERFACES ======================================================== +#ifndef ASPM_GPU + +int CMP_CDECL CreateOptionsBC2(void **options) +{ + CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options; + if (BC15optionsDefault) { + SetDefaultBC15Options(BC15optionsDefault); + (*options) = BC15optionsDefault; + } + else { + (*options) = NULL; + return CGU_CORE_ERR_NEWMEM; + } + return CGU_CORE_OK; +} + +int CMP_CDECL DestroyOptionsBC2(void *options) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BCOptions = reinterpret_cast (options); + delete BCOptions; + return CGU_CORE_OK; +} + +int CMP_CDECL SetQualityBC2(void *options, + CGU_FLOAT fquality) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); + if (fquality < 0.0f) fquality = 0.0f; + else + if (fquality > 1.0f) fquality = 1.0f; + BC15optionsDefault->m_fquality = fquality; + return CGU_CORE_OK; +} + +int CMP_CDECL SetChannelWeightsBC2(void *options, + CGU_FLOAT WeightRed, + CGU_FLOAT WeightGreen, + CGU_FLOAT WeightBlue) { + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options; + + if ((WeightRed < 0.0f) || (WeightRed > 1.0f)) return CGU_CORE_ERR_RANGERED; + if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f)) return CGU_CORE_ERR_RANGEGREEN; + if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f)) return CGU_CORE_ERR_RANGEBLUE; + + BC15optionsDefault->m_bUseChannelWeighting = true; + BC15optionsDefault->m_fChannelWeights[0] = WeightRed; + BC15optionsDefault->m_fChannelWeights[1] = WeightGreen; + BC15optionsDefault->m_fChannelWeights[2] = WeightBlue; + return CGU_CORE_OK; +} + +// Decompresses an explicit alpha block (DXT3) +void DecompressExplicitAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], + const CGU_UINT32 compressedBlock[2]) +{ + for (int i = 0; i < 16; i++) + { + int nBlock = i < 8 ? 0 : 1; + CGU_UINT8 cAlpha = (CGU_UINT8)((compressedBlock[nBlock] >> ((i % 8) * EXPLICIT_ALPHA_PIXEL_BPP)) & EXPLICIT_ALPHA_PIXEL_MASK); + alphaBlock[i] = (CGU_UINT8)((cAlpha << EXPLICIT_ALPHA_PIXEL_BPP) | cAlpha); + } +} + +void DecompressBC2_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[BLOCK_SIZE_4X4X4], + const CGU_UINT32 compressedBlock[4], + const CMP_BC15Options *BC15options) +{ + CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; + + DecompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); + DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options); + + for (CGU_UINT32 i = 0; i < 16; i++) + ((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] = (alphaBlock[i] << RGBA8888_OFFSET_A) | (((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] & ~(BYTE_MASK << RGBA8888_OFFSET_A)); +} + +int CMP_CDECL CompressBlockBC2(const unsigned char *srcBlock, + unsigned int srcStrideInBytes, + CMP_GLOBAL unsigned char cmpBlock[16], + CMP_GLOBAL const void *options = NULL) { + + CMP_Vec4uc inBlock[16]; + + //---------------------------------- + // Fill the inBlock with source data + //---------------------------------- + CGU_INT srcpos = 0; + CGU_INT dstptr = 0; + for (CGU_UINT8 row = 0; row < 4; row++) + { + srcpos = row * srcStrideInBytes; + for (CGU_UINT8 col = 0; col < 4; col++) + { + inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]); + dstptr++; + } + } + + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) + { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + CompressBlockBC2_Internal(inBlock, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options); + return CGU_CORE_OK; +} + +int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16], + CMP_GLOBAL unsigned char srcBlock[64], + const void *options = NULL) { + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) + { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + DecompressBC2_Internal(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options); + + return CGU_CORE_OK; +} +#endif + +//============================================== OpenCL USER INTERFACE ======================================================== +#ifdef ASPM_GPU +CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( + CMP_GLOBAL const CMP_Vec4uc* ImageSource, + CMP_GLOBAL CGU_UINT8* ImageDestination, + CMP_GLOBAL Source_Info* SourceInfo, + CMP_GLOBAL CMP_BC15Options* BC15options +) +{ + CGU_UINT32 xID; + CGU_UINT32 yID; + +#ifdef ASPM_GPU + xID = get_global_id(0); + yID = get_global_id(1); +#else + xID = 0; + yID = 0; +#endif + + if (xID >= (SourceInfo->m_src_width / BlockX)) return; + if (yID >= (SourceInfo->m_src_height / BlockX)) return; + int srcWidth = SourceInfo->m_src_width; + + CGU_UINT32 destI = (xID*BC2CompBlockSize) + (yID*(srcWidth / BlockX)*BC2CompBlockSize); + int srcindex = 4 * (yID * srcWidth + xID); + int blkindex = 0; + CMP_Vec4uc srcData[16]; + srcWidth = srcWidth - 4; + + for ( CGU_INT32 j = 0; j < 4; j++) { + for ( CGU_INT32 i = 0; i < 4; i++) { + srcData[blkindex++] = ImageSource[srcindex++]; + } + srcindex += srcWidth; + } + + CompressBlockBC2_Internal(srcData,(CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options); +} +#endif + diff --git a/extern/CMP_Core/shaders/BC2_Encode_kernel.h b/extern/CMP_Core/shaders/BC2_Encode_kernel.h new file mode 100644 index 0000000..a152751 --- /dev/null +++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.h @@ -0,0 +1,34 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef BC2_ENCODE_KERNEL_H +#define BC2_ENCODE_KERNEL_H + +#include "Common_Def.h" +#include "BCn_Common_Kernel.h" + +#define BC2CompBlockSize 16 +#define NUM_CHANNELS 4 +#define NUM_ENDPOINTS 2 + + +#endif \ No newline at end of file diff --git a/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp new file mode 100644 index 0000000..8fc30e6 --- /dev/null +++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp @@ -0,0 +1,218 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#include "BC3_Encode_kernel.h" + +//============================================== BC3 INTERFACES ======================================================= + +void CompressBlockBC3_Internal(const CMP_Vec4uc srcBlockTemp[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[4], + CMP_GLOBAL const CMP_BC15Options *BC15options) { + CGU_UINT8 blkindex = 0; + CGU_UINT8 srcindex = 0; + CGU_UINT8 rgbaBlock[64]; + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B + rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G + rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R + rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A + srcindex++; + } + } + + CMP_BC15Options internalOptions = *BC15options; + CalculateColourWeightings(rgbaBlock, &internalOptions); + + CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; + for (CGU_INT32 i = 0; i < 16; i++) + alphaBlock[i] = + (CGU_UINT8)(((CGU_INT32 *)rgbaBlock)[i] >> RGBA8888_OFFSET_A); + + CGU_INT err = CompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); + if (err != 0) return; + + CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions, + FALSE, FALSE, 0); +} + +//============================================== USER INTERFACES ======================================================== +#ifndef ASPM_GPU + +int CMP_CDECL CreateOptionsBC3(void **options) +{ + CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options; + if (BC15optionsDefault) { + SetDefaultBC15Options(BC15optionsDefault); + (*options) = BC15optionsDefault; + } + else { + (*options) = NULL; + return CGU_CORE_ERR_NEWMEM; + } + return CGU_CORE_OK; +} + + +int CMP_CDECL DestroyOptionsBC3(void *options) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BCOptions = reinterpret_cast (options); + delete BCOptions; + return CGU_CORE_OK; +} + +int CMP_CDECL SetQualityBC3(void *options, + CGU_FLOAT fquality) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); + if (fquality < 0.0f) fquality = 0.0f; + else + if (fquality > 1.0f) fquality = 1.0f; + BC15optionsDefault->m_fquality = fquality; + return CGU_CORE_OK; +} + +int CMP_CDECL SetChannelWeightsBC3(void *options, + CGU_FLOAT WeightRed, + CGU_FLOAT WeightGreen, + CGU_FLOAT WeightBlue) { + if (!options) return 1; + CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options; + + if ((WeightRed < 0.0f) || (WeightRed > 1.0f)) return CGU_CORE_ERR_RANGERED; + if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f)) return CGU_CORE_ERR_RANGEGREEN; + if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f)) return CGU_CORE_ERR_RANGEBLUE; + + BC15optionsDefault->m_bUseChannelWeighting = true; + BC15optionsDefault->m_fChannelWeights[0] = WeightRed; + BC15optionsDefault->m_fChannelWeights[1] = WeightGreen; + BC15optionsDefault->m_fChannelWeights[2] = WeightBlue; + return CGU_CORE_OK; +} + + +void DecompressBC3_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], + const CGU_UINT32 compressedBlock[4], + const CMP_BC15Options *BC15options) { + CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; + + DecompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); + DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options); + + for (CGU_UINT32 i = 0; i < 16; i++) + ((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] = + (alphaBlock[i] << RGBA8888_OFFSET_A) | + (((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] & + ~(BYTE_MASK << RGBA8888_OFFSET_A)); +} + +int CMP_CDECL CompressBlockBC3( const unsigned char *srcBlock, + unsigned int srcStrideInBytes, + CMP_GLOBAL unsigned char cmpBlock[16], + const void *options = NULL) { + CMP_Vec4uc inBlock[16]; + + //---------------------------------- + // Fill the inBlock with source data + //---------------------------------- + CGU_INT srcpos = 0; + CGU_INT dstptr = 0; + for (CGU_UINT8 row = 0; row < 4; row++) + { + srcpos = row * srcStrideInBytes; + for (CGU_UINT8 col = 0; col < 4; col++) + { + inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]); + dstptr++; + } + } + + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + + CompressBlockBC3_Internal(inBlock,(CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options); + return CGU_CORE_OK; +} + +int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16], + CMP_GLOBAL unsigned char srcBlock[64], + const void *options = NULL) { + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) + { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + DecompressBC3_Internal(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options); + return CGU_CORE_OK; +} +#endif + +//============================================== OpenCL USER INTERFACE ==================================================== +#ifdef ASPM_GPU +CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( + CMP_GLOBAL const CMP_Vec4uc *ImageSource, + CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo, + CMP_GLOBAL CMP_BC15Options *BC15options) { + CGU_UINT32 xID; + CGU_UINT32 yID; + +#ifdef ASPM_GPU + xID = get_global_id(0); + yID = get_global_id(1); +#else + xID = 0; + yID = 0; +#endif + + if (xID >= (SourceInfo->m_src_width / BlockX)) return; + if (yID >= (SourceInfo->m_src_height / BlockX)) return; + int srcWidth = SourceInfo->m_src_width; + + CGU_UINT32 destI = + (xID * BC3CompBlockSize) + (yID * (srcWidth / BlockX) * BC3CompBlockSize); + int srcindex = 4 * (yID * srcWidth + xID); + int blkindex = 0; + CMP_Vec4uc srcData[16]; + srcWidth = srcWidth - 4; + + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + srcData[blkindex++] = ImageSource[srcindex++]; + } + srcindex += srcWidth; + } + + CompressBlockBC3_Internal( + srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options); +} +#endif diff --git a/extern/CMP_Core/shaders/BC3_Encode_kernel.h b/extern/CMP_Core/shaders/BC3_Encode_kernel.h new file mode 100644 index 0000000..9e97da1 --- /dev/null +++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.h @@ -0,0 +1,31 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef BC3_ENCODE_KERNEL_H +#define BC3_ENCODE_KERNEL_H + +#include "Common_Def.h" +#include "BCn_Common_Kernel.h" + +#define BC3CompBlockSize 16 + +#endif \ No newline at end of file diff --git a/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp new file mode 100644 index 0000000..6242cf8 --- /dev/null +++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp @@ -0,0 +1,200 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#include "BC4_Encode_kernel.h" + +//============================================== BC4 INTERFACES ======================================================= + +void CompressBlockBC4_Internal(const CMP_Vec4uc srcBlockTemp[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[2], + CMP_GLOBAL const CMP_BC15Options *BC15options) { + if (BC15options->m_fquality) { + // Reserved! + } + CGU_UINT8 blkindex = 0; + CGU_UINT8 srcindex = 0; + CGU_UINT8 alphaBlock[16]; + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + alphaBlock[blkindex++] = + (CGU_UINT8)srcBlockTemp[srcindex].x; // Red channel + srcindex++; + } + } + CompressAlphaBlock(alphaBlock, (CMP_GLOBAL CGU_UINT32 *)compressedBlock); +} + +void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], + const CGU_UINT32 compressedBlock[2], + const CMP_BC15Options *BC15options) { + if (BC15options) {} + CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; + DecompressAlphaBlock(alphaBlock, compressedBlock); + + CGU_UINT8 blkindex = 0; + CGU_UINT8 srcindex = 0; + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex]; // R + rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex]; // G + rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex]; // B + rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex]; // A + srcindex++; + } + } +} + +void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[2], + CMP_GLOBAL const CMP_BC15Options *BC15options) { + if (BC15options) {} + CompressAlphaBlock(srcBlockTemp, (CMP_GLOBAL CGU_UINT32 *)compressedBlock); +} + +void DecompressBlockBC4_SingleChannel(CGU_UINT8 srcBlockTemp[16], + const CGU_UINT32 compressedBlock[2], + const CMP_BC15Options *BC15options) { + if (BC15options) {} + DecompressAlphaBlock(srcBlockTemp, compressedBlock); +} + +//============================================== USER INTERFACES ======================================================== +#ifndef ASPM_GPU + +int CMP_CDECL CreateOptionsBC4(void **options) +{ + CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options; + if (BC15optionsDefault) { + SetDefaultBC15Options(BC15optionsDefault); + (*options) = BC15optionsDefault; + } + else { + (*options) = NULL; + return CGU_CORE_ERR_NEWMEM; + } + return CGU_CORE_OK; +} + +int CMP_CDECL DestroyOptionsBC4(void *options) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BCOptions = reinterpret_cast (options); + delete BCOptions; + return CGU_CORE_OK; +} + +int CMP_CDECL SetQualityBC4(void *options, + CGU_FLOAT fquality) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); + if (fquality < 0.0f) fquality = 0.0f; + else + if (fquality > 1.0f) fquality = 1.0f; + BC15optionsDefault->m_fquality = fquality; + return CGU_CORE_OK; +} + +int CMP_CDECL CompressBlockBC4(const unsigned char *srcBlock, + unsigned int srcStrideInBytes, + CMP_GLOBAL unsigned char cmpBlock[8], + const void *options = NULL) { + + unsigned char inBlock[16]; + //---------------------------------- + // Fill the inBlock with source data + //---------------------------------- + CGU_INT srcpos = 0; + CGU_INT dstptr = 0; + for (CGU_UINT8 row = 0; row < 4; row++) + { + srcpos = row * srcStrideInBytes; + for (CGU_UINT8 col = 0; col < 4; col++) + { + inBlock[dstptr++] = CGU_UINT8(srcBlock[srcpos++]); + } + } + + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + if (BC15options == NULL) { + CMP_BC15Options BC15optionsDefault; + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + + CompressBlockBC4_SingleChannel(inBlock,(CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options); + return CGU_CORE_OK; +} + +int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8], + CMP_GLOBAL unsigned char srcBlock[16], + const void *options = NULL) { + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) + { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + DecompressBlockBC4_SingleChannel(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options); + return CGU_CORE_OK; +} +#endif + +//============================================== OpenCL USER INTERFACE ==================================================== +#ifdef ASPM_GPU +CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( + CMP_GLOBAL const CMP_Vec4uc *ImageSource, + CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo, + CMP_GLOBAL CMP_BC15Options *BC15options) { + CGU_UINT32 xID; + CGU_UINT32 yID; + +#ifdef ASPM_GPU + xID = get_global_id(0); + yID = get_global_id(1); +#else + xID = 0; + yID = 0; +#endif + + if (xID >= (SourceInfo->m_src_width / BlockX)) return; + if (yID >= (SourceInfo->m_src_height / BlockX)) return; + int srcWidth = SourceInfo->m_src_width; + + CGU_UINT32 destI = + (xID * BC4CompBlockSize) + (yID * (srcWidth / BlockX) * BC4CompBlockSize); + int srcindex = 4 * (yID * srcWidth + xID); + int blkindex = 0; + CMP_Vec4uc srcData[16]; + srcWidth = srcWidth - 4; + + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + srcData[blkindex++] = ImageSource[srcindex++]; + } + srcindex += srcWidth; + } + + CompressBlockBC4_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options); +} +#endif diff --git a/extern/CMP_Core/shaders/BC4_Encode_kernel.h b/extern/CMP_Core/shaders/BC4_Encode_kernel.h new file mode 100644 index 0000000..65af4a7 --- /dev/null +++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.h @@ -0,0 +1,31 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef BC4_ENCODE_KERNEL_H +#define BC4_ENCODE_KERNEL_H + +#include "Common_Def.h" +#include "BCn_Common_Kernel.h" + +#define BC4CompBlockSize 8 + +#endif \ No newline at end of file diff --git a/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp new file mode 100644 index 0000000..d4784dd --- /dev/null +++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp @@ -0,0 +1,264 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#include "BC5_Encode_kernel.h" + +//============================================== BC5 INTERFACES ======================================================= + +void CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[4], + CMP_GLOBAL CMP_BC15Options *BC15options) +{ + if (BC15options->m_fquality) { + // Resreved + } + CGU_UINT8 blkindex = 0; + CGU_UINT8 srcindex = 0; + CGU_UINT8 alphaBlock[16]; + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // Red channel + srcindex++; + } + } + CompressAlphaBlock(alphaBlock,&compressedBlock[0]); + + blkindex = 0; + srcindex = 0; + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // Green channel + srcindex++; + } + } + CompressAlphaBlock(alphaBlock,&compressedBlock[2]); + +} + +void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], + CGU_UINT32 compressedBlock[4], + CMP_BC15Options *BC15options) +{ + CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4]; + CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4]; + + DecompressAlphaBlock(alphaBlockR, &compressedBlock[0]); + DecompressAlphaBlock(alphaBlockG, &compressedBlock[2]); + + CGU_UINT8 blkindex = 0; + CGU_UINT8 srcindex = 0; + + if (BC15options->m_mapDecodeRGBA) + { + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex]; + rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex]; + rgbaBlock[blkindex++] = 0; + rgbaBlock[blkindex++] = 255; + srcindex++; + } + } + } + else + { + for (CGU_INT32 j = 0; j < 4; j++) { + for (CGU_INT32 i = 0; i < 4; i++) { + rgbaBlock[blkindex++] = 0; + rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex]; + rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex]; + rgbaBlock[blkindex++] = 255; + srcindex++; + } + } + } + +} + + +void CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[16], + const CGU_UINT8 srcBlockG[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[4], + CMP_GLOBAL const CMP_BC15Options *BC15options) +{ + if (BC15options) {} + CompressAlphaBlock(srcBlockR,&compressedBlock[0]); + CompressAlphaBlock(srcBlockG,&compressedBlock[2]); +} + +void DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16], + CMP_GLOBAL CGU_UINT8 srcBlockG[16], + const CGU_UINT32 compressedBlock[4], + const CMP_BC15Options *BC15options) +{ + if (BC15options) {} + DecompressAlphaBlock(srcBlockR, &compressedBlock[0]); + DecompressAlphaBlock(srcBlockG, &compressedBlock[2]); +} + + +//============================================== USER INTERFACES ======================================================== +#ifndef ASPM_GPU + +int CMP_CDECL CreateOptionsBC5(void **options) +{ + CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options; + if (BC15optionsDefault) { + SetDefaultBC15Options(BC15optionsDefault); + (*options) = BC15optionsDefault; + } + else { + (*options) = NULL; + return CGU_CORE_ERR_NEWMEM; + } + return CGU_CORE_OK; +} + +int CMP_CDECL DestroyOptionsBC5(void *options) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BCOptions = reinterpret_cast (options); + delete BCOptions; + return CGU_CORE_OK; +} + +int CMP_CDECL SetQualityBC5(void *options, + CGU_FLOAT fquality) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); + if (fquality < 0.0f) fquality = 0.0f; + else + if (fquality > 1.0f) fquality = 1.0f; + BC15optionsDefault->m_fquality = fquality; + return CGU_CORE_OK; +} + + +int CMP_CDECL CompressBlockBC5(const CGU_UINT8 *srcBlockR, + unsigned int srcStrideInBytes1, + const CGU_UINT8 *srcBlockG, + unsigned int srcStrideInBytes2, + CMP_GLOBAL CGU_UINT8 cmpBlock[16], + const void *options = NULL) { + CGU_UINT8 inBlockR[16]; + + //---------------------------------- + // Fill the inBlock with source data + //---------------------------------- + CGU_INT srcpos = 0; + CGU_INT dstptr = 0; + for (CGU_UINT8 row = 0; row < 4; row++) + { + srcpos = row * srcStrideInBytes1; + for (CGU_UINT8 col = 0; col < 4; col++) + { + inBlockR[dstptr++] = CGU_UINT8(srcBlockR[srcpos++]); + } + } + + + CGU_UINT8 inBlockG[16]; + //---------------------------------- + // Fill the inBlock with source data + //---------------------------------- + srcpos = 0; + dstptr = 0; + for (CGU_UINT8 row = 0; row < 4; row++) + { + srcpos = row * srcStrideInBytes2; + for (CGU_UINT8 col = 0; col < 4; col++) + { + inBlockG[dstptr++] = CGU_UINT8(srcBlockG[srcpos++]); + } + } + + + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) + { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + + CompressBlockBC5_DualChannel_Internal(inBlockR,inBlockG, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options); + return CGU_CORE_OK; +} + +int CMP_CDECL DecompressBlockBC5(const CGU_UINT8 cmpBlock[16], + CMP_GLOBAL CGU_UINT8 srcBlockR[16], + CMP_GLOBAL CGU_UINT8 srcBlockG[16], + const void *options = NULL) { + CMP_BC15Options *BC15options = (CMP_BC15Options *)options; + CMP_BC15Options BC15optionsDefault; + if (BC15options == NULL) + { + BC15options = &BC15optionsDefault; + SetDefaultBC15Options(BC15options); + } + DecompressBC5_DualChannel_Internal(srcBlockR,srcBlockG,(CGU_UINT32 *)cmpBlock,BC15options); + + return CGU_CORE_OK; +} + +#endif + +//============================================== OpenCL USER INTERFACE ==================================================== +#ifdef ASPM_GPU +CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL const CMP_Vec4uc* ImageSource, + CMP_GLOBAL CGU_UINT8* ImageDestination, + CMP_GLOBAL Source_Info* SourceInfo, + CMP_GLOBAL CMP_BC15Options* BC15options +) +{ + CGU_UINT32 xID; + CGU_UINT32 yID; + +#ifdef ASPM_GPU + xID = get_global_id(0); + yID = get_global_id(1); +#else + xID = 0; + yID = 0; +#endif + + if (xID >= (SourceInfo->m_src_width / BlockX)) return; + if (yID >= (SourceInfo->m_src_height / BlockX)) return; + int srcWidth = SourceInfo->m_src_width; + + CGU_UINT32 destI = (xID*BC5CompBlockSize) + (yID*(srcWidth / BlockX)*BC5CompBlockSize); + int srcindex = 4 * (yID * srcWidth + xID); + int blkindex = 0; + CMP_Vec4uc srcData[16]; + srcWidth = srcWidth - 4; + + for ( CGU_INT32 j = 0; j < 4; j++) { + for ( CGU_INT32 i = 0; i < 4; i++) { + srcData[blkindex++] = ImageSource[srcindex++]; + } + srcindex += srcWidth; + } + + CompressBlockBC5_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options); +} +#endif diff --git a/extern/CMP_Core/shaders/BC5_Encode_kernel.h b/extern/CMP_Core/shaders/BC5_Encode_kernel.h new file mode 100644 index 0000000..89cffcc --- /dev/null +++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.h @@ -0,0 +1,31 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef BC5_ENCODE_KERNEL_H +#define BC5_ENCODE_KERNEL_H + +#include "Common_Def.h" +#include "BCn_Common_Kernel.h" + +#define BC5CompBlockSize 16 + +#endif \ No newline at end of file diff --git a/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp new file mode 100644 index 0000000..f131583 --- /dev/null +++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp @@ -0,0 +1,3990 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#include "BC6_Encode_kernel.h" + +#ifdef ASPM_GPU +void memset(CGU_UINT8 *srcdata, CGU_UINT8 value, CGU_INT size) +{ + for (CGU_INT i = 0; i < size; i++) + *srcdata++ = value; +} + +void memcpy(CGU_UINT8 *srcdata, CGU_UINT8 *dstdata, CGU_INT size) +{ + for (CGU_INT i = 0; i < size; i++) + { + *srcdata = *dstdata; + srcdata++; + dstdata++; + } +} + +void swap(CGU_INT A, CGU_INT B) +{ + CGU_INT hold = A; + A = B; + B = hold; +} + +#define abs fabs +#define floorf floor +#define sqrtf sqrt +#define logf log +#define ceilf ceil + +#endif + +__constant CGU_UINT8 BC6_PARTITIONS[MAX_BC6H_PARTITIONS][MAX_SUBSET_SIZE] = { + { // 0 + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 + }, + + { // 1 + 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 + }, + + { // 2 + 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1 + }, + + { // 3 + 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 + }, + + { // 4 + 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1 + }, + + { // 5 + 0,0,1,1,0,1,1,1, 0,1,1,1,1,1,1,1 + }, + + { // 6 + 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1 + }, + + { // 7 + 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1 + }, + + { // 8 + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 + }, + + { // 9 + 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1 + }, + + { // 10 + 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 + }, + + { // 11 + 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1 + }, + + { // 12 + 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1 + }, + + { // 13 + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1 + }, + + { // 14 + 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 + }, + + { // 15 + 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1 + }, + + { // 16 + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1 + }, + + { // 17 + 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0 + }, + + { // 18 + 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0 + }, + + { // 19 + 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 + }, + + { // 20 + 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 + }, + + { // 21 + 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 + }, + + { // 22 + 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0 + }, + + { // 23 + 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1 + }, + + { // 24 + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 + }, + + { // 25 + 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0 + }, + + { // 26 + 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 + }, + + { // 27 + 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0 + }, + + { // 28 + 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0 + }, + + { // 29 + 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0 + }, + + { // 30 + 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0 + }, + + { // 31 + 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0 + }, +}; + +CGU_DWORD get_partition_subset(CGU_INT subset, CGU_INT partI, CGU_INT index) +{ + if (subset) + return BC6_PARTITIONS[partI][index]; + else + return 0; +} + +void Partition(CGU_INT shape, + CGU_FLOAT in[][MAX_DIMENSION_BIG], + CGU_FLOAT subsets[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], //[3][16][4] + CGU_INT count[MAX_SUBSETS], + CGU_INT8 ShapeTableToUse, + CGU_INT dimension) +{ + int i, j; + int insubset = -1, inpart = 0; + + // Dont use memset: this is better for now + for (i = 0; i < MAX_SUBSETS; i++) count[i] = 0; + + switch (ShapeTableToUse) + { + case 0: + case 1: + insubset = 0; + inpart = 0; + break; + case 2: + insubset = 1; + inpart = shape; + break; + default: + break; + } + + // Nothing to do!!: Must indicate an error to user + if (insubset == -1) return; // Nothing to do!! + + for (i = 0; i < MAX_SUBSET_SIZE; i++) + { + int subset = get_partition_subset(insubset, inpart, i); + for (j = 0; j < dimension; j++) + { + subsets[subset][count[subset]][j] = in[i][j]; + } + if (dimension < MAX_DIMENSION_BIG) + { + subsets[subset][count[subset]][j] = 0.0; + } + count[subset]++; + } + +} + +void GetEndPoints(CGU_FLOAT EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_FLOAT outB[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], CGU_INT max_subsets, int entryCount[MAX_SUBSETS]) +{ + // Should have some sort of error notification! + if (max_subsets > MAX_SUBSETS) return; + + // Save Min and Max OutB points as EndPoints + for (int subset = 0; subset < max_subsets; subset++) + { + // We now have points on direction vector(s) + // find the min and max points + CGU_FLOAT min = CMP_HALF_MAX; + CGU_FLOAT max = 0; + CGU_FLOAT val; + int mini = 0; + int maxi = 0; + + + for (int i = 0; i < entryCount[subset]; i++) + { + val = outB[subset][i][0] + outB[subset][i][1] + outB[subset][i][2]; + if (val < min) + { + min = val; + mini = i; + } + if (val > max) + { + max = val; + maxi = i; + } + } + + // Is round best for this ! + for (int c = 0; c < MAX_DIMENSION_BIG; c++) + { + EndPoints[subset][0][c] = outB[subset][mini][c]; + } + + for (int c = 0; c < MAX_DIMENSION_BIG; c++) + { + EndPoints[subset][1][c] = outB[subset][maxi][c]; + } + } +} + +void covariance_d(CGU_FLOAT data[][MAX_DIMENSION_BIG], CGU_INT numEntries, CGU_FLOAT cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], CGU_INT dimension) +{ +#ifdef USE_DBGTRACE + DbgTrace(()); +#endif + int i, j, k; + + for (i = 0; i < dimension; i++) + for (j = 0; j <= i; j++) + { + cov[i][j] = 0; + for (k = 0; k < numEntries; k++) + cov[i][j] += data[k][i] * data[k][j]; + } + + for (i = 0; i < dimension; i++) + for (j = i + 1; j < dimension; j++) + cov[i][j] = cov[j][i]; +} + +void centerInPlace_d(CGU_FLOAT data[][MAX_DIMENSION_BIG], int numEntries, CGU_FLOAT mean[MAX_DIMENSION_BIG], CGU_INT dimension) +{ +#ifdef USE_DBGTRACE + DbgTrace(()); +#endif + int i, k; + + for (i = 0; i < dimension; i++) + { + mean[i] = 0; + for (k = 0; k < numEntries; k++) + mean[i] += data[k][i]; + } + + if (!numEntries) + return; + + for (i = 0; i < dimension; i++) + { + mean[i] /= numEntries; + for (k = 0; k < numEntries; k++) + data[k][i] -= mean[i]; + } +} + +void eigenVector_d(CGU_FLOAT cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], CGU_FLOAT vector[MAX_DIMENSION_BIG], CGU_INT dimension) +{ +#ifdef USE_DBGTRACE + DbgTrace(()); +#endif + // calculate an eigenvecto corresponding to a biggest eigenvalue + // will work for non-zero non-negative matricies only + +#define EV_ITERATION_NUMBER 20 +#define EV_SLACK 2 /* additive for exp base 2)*/ + + + CGU_INT i, j, k, l, m, n, p, q; + CGU_FLOAT c[2][MAX_DIMENSION_BIG][MAX_DIMENSION_BIG]; + CGU_FLOAT maxDiag; + + for (i = 0; i < dimension; i++) + for (j = 0; j < dimension; j++) + c[0][i][j] = cov[i][j]; + + p = (int)floorf(log((FLT_MAX_EXP - EV_SLACK) / ceilf(logf((CGU_FLOAT)dimension) / logf(2.0f))) / logf(2.0f)); + + //assert(p>0); + + p = p > 0 ? p : 1; + + q = (EV_ITERATION_NUMBER + p - 1) / p; + + l = 0; + + for (n = 0; n < q; n++) + { + maxDiag = 0; + + for (i = 0; i < dimension; i++) + maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag; + + if (maxDiag <= 0) + { + return; + } + + //assert(maxDiag >0); + + for (i = 0; i < dimension; i++) + for (j = 0; j < dimension; j++) + c[l][i][j] /= maxDiag; + + for (m = 0; m < p; m++) { + for (i = 0; i < dimension; i++) + for (j = 0; j < dimension; j++) { + CGU_FLOAT temp = 0; + for (k = 0; k < dimension; k++) + { + // Notes: + // This is the most consuming portion of the code and needs optimizing for perfromance + temp += c[l][i][k] * c[l][k][j]; + } + c[1 - l][i][j] = temp; + } + l = 1 - l; + } + } + + maxDiag = 0; + k = 0; + + for (i = 0; i < dimension; i++) + { + k = c[l][i][i] > maxDiag ? i : k; + maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag; + } + CGU_FLOAT t; + t = 0; + for (i = 0; i < dimension; i++) + { + t += c[l][k][i] * c[l][k][i]; + vector[i] = c[l][k][i]; + } + // normalization is really optional + t = sqrtf(t); + //assert(t>0); + + if (t <= 0) + { + return; + } + for (i = 0; i < dimension; i++) + vector[i] /= t; +} + +void project_d(CGU_FLOAT data[][MAX_DIMENSION_BIG], CGU_INT numEntries, CGU_FLOAT vector[MAX_DIMENSION_BIG], CGU_FLOAT projection[MAX_ENTRIES], CGU_INT dimension) +{ +#ifdef USE_DBGTRACE + DbgTrace(()); +#endif + // assume that vector is normalized already + int i, k; + + for (k = 0; k < numEntries; k++) + { + projection[k] = 0; + for (i = 0; i < dimension; i++) + { + projection[k] += data[k][i] * vector[i]; + } + } +} + +typedef struct { + CGU_FLOAT d; + int i; +} a; + +inline CGU_INT a_compare(const void *arg1, const void *arg2) +{ + if (((a*)arg1)->d - ((a*)arg2)->d > 0) return 1; + if (((a*)arg1)->d - ((a*)arg2)->d < 0) return -1; + return 0; +}; + +void sortProjection(CGU_FLOAT projection[MAX_ENTRIES], CGU_INT order[MAX_ENTRIES], CGU_INT numEntries) +{ + int i; + a what[MAX_ENTRIES + MAX_PARTITIONS_TABLE]; + + for (i = 0; i < numEntries; i++) + what[what[i].i = i].d = projection[i]; + +#ifdef USE_QSORT + qsort((void*)&what, numEntries, sizeof(a), a_compare); +#else + { + int j; + int tmp; + CGU_FLOAT tmp_d; + for (i = 1; i < numEntries; i++) + { + for (j = i; j > 0; j--) + { + if (what[j - 1].d > what[j].d) + { + tmp = what[j].i; + tmp_d = what[j].d; + what[j].i = what[j - 1].i; + what[j].d = what[j - 1].d; + what[j - 1].i = tmp; + what[j - 1].d = tmp_d; + } + } + } + } +#endif + + + for (i = 0; i < numEntries; i++) + order[i] = what[i].i; +}; + +CGU_FLOAT totalError_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_FLOAT data2[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_INT numEntries, CGU_INT dimension) +{ + int i, j; + CGU_FLOAT t = 0; + for (i = 0; i < numEntries; i++) + for (j = 0; j < dimension; j++) + t += (data[i][j] - data2[i][j])*(data[i][j] - data2[i][j]); + + return t; +}; + +// input: +// +// v_ points, might be uncentered +// k - number of points in the ramp +// n - number of points in v_ +// +// output: +// index, uncentered, in the range 0..k-1 +// + +void quant_AnD_Shell(CGU_FLOAT* v_, CGU_INT k, CGU_INT n, CGU_INT *idx) +{ +#define MAX_BLOCK MAX_ENTRIES + CGU_INT i, j; + CGU_FLOAT v[MAX_BLOCK]; + CGU_FLOAT z[MAX_BLOCK]; + a d[MAX_BLOCK]; + CGU_FLOAT l; + CGU_FLOAT mm; + CGU_FLOAT r = 0; + CGU_INT mi; + + CGU_FLOAT m, M, s, dm = 0.; + m = M = v_[0]; + + for (i = 1; i < n; i++) { + m = m < v_[i] ? m : v_[i]; + M = M > v_[i] ? M : v_[i]; + } + if (M == m) { + for (i = 0; i < n; i++) + idx[i] = 0; + return; + } + + //assert(M - m >0); + s = (k - 1) / (M - m); + for (i = 0; i < n; i++) { + v[i] = v_[i] * s; + + idx[i] = (int)(z[i] = (v[i] + 0.5f /* stabilizer*/ - m * s)); //floorf(v[i] + 0.5f /* stabilizer*/ - m *s)); + + d[i].d = v[i] - z[i] - m * s; + d[i].i = i; + dm += d[i].d; + r += d[i].d*d[i].d; + } + if (n*r - dm * dm >= (CGU_FLOAT)(n - 1) / 4 /*slack*/ / 2) { + + dm /= (CGU_FLOAT)n; + + for (i = 0; i < n; i++) + d[i].d -= dm; + + + //!!! Need an OpenCL version of qsort +#ifdef USE_QSORT + qsort((void*)&d, n, sizeof(a), a_compare); +#else + { + CGU_INT tmp; + CGU_FLOAT tmp_d; + for (i = 1; i < n; i++) { + for (j = i; j > 0; j--) + { + if (d[j - 1].d > d[j].d) + { + tmp = d[j].i; + tmp_d = d[j].d; + d[j].i = d[j - 1].i; + d[j].d = d[j - 1].d; + d[j - 1].i = tmp; + d[j - 1].d = tmp_d; + } + } + } + } +#endif + // got into fundamental simplex + // move coordinate system origin to its center + for (i = 0; i < n; i++) + d[i].d -= (2.0f*(CGU_FLOAT)i + 1.0f - (CGU_FLOAT)n) / 2.0f / (CGU_FLOAT)n; + + mm = l = 0.; + j = -1; + for (i = 0; i < n; i++) { + l += d[i].d; + if (l < mm) { + mm = l; + j = i; + } + } + + // position which should be in 0 + j = j + 1; + j = j % n; + + for (i = j; i < n; i++) + idx[d[i].i]++; + } + // get rid of an offset in idx + mi = idx[0]; + for (i = 1; i < n; i++) + mi = mi < idx[i] ? mi : idx[i]; + + for (i = 0; i < n; i++) + idx[i] -= mi; +} + +CGU_FLOAT optQuantAnD_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG], + CGU_INT numEntries, + CGU_INT numClusters, + CGU_INT index[MAX_ENTRIES], + CGU_FLOAT out[MAX_ENTRIES][MAX_DIMENSION_BIG], + CGU_FLOAT direction[MAX_DIMENSION_BIG], CGU_FLOAT *step, + CGU_INT dimension, + CGU_FLOAT quality) +{ + CGU_INT index_[MAX_ENTRIES]; + + CGU_INT maxTry = (int)(MAX_TRY * quality); + CGU_INT try_two = 50; + + CGU_INT i, j, k; + CGU_FLOAT t, s; + + CGU_FLOAT centered[MAX_ENTRIES][MAX_DIMENSION_BIG]; + + CGU_FLOAT mean[MAX_DIMENSION_BIG]; + + CGU_FLOAT cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG]; + + CGU_FLOAT projected[MAX_ENTRIES]; + + CGU_INT order_[MAX_ENTRIES]; + + + for (i = 0; i < numEntries; i++) + for (j = 0; j < dimension; j++) + centered[i][j] = data[i][j]; + + centerInPlace_d(centered, numEntries, mean, dimension); + covariance_d(centered, numEntries, cov, dimension); + + // check if they all are the same + + t = 0; + for (j = 0; j < dimension; j++) + t += cov[j][j]; + + if (numEntries == 0) { + for (i = 0; i < numEntries; i++) { + index[i] = 0; + for (j = 0; j < dimension; j++) + out[i][j] = mean[j]; + } + return 0.0f; + } + + eigenVector_d(cov, direction, dimension); + project_d(centered, numEntries, direction, projected, dimension); + + for (i = 0; i < maxTry; i++) + { + CGU_INT done = 0; + + if (i) + { + do + { + CGU_FLOAT q; + q = s = t = 0; + + for (k = 0; k < numEntries; k++) + { + s += index[k]; + t += index[k] * index[k]; + } + + for (j = 0; j < dimension; j++) + { + direction[j] = 0; + for (k = 0; k < numEntries; k++) + direction[j] += centered[k][j] * index[k]; + q += direction[j] * direction[j]; + + } + + s /= (CGU_FLOAT)numEntries; + t = t - s * s * (CGU_FLOAT)numEntries; + //assert(t != 0); + t = (t == 0.0f ? 0.0f : 1.0f / t); + // We need to requantize + + q = sqrtf(q); + t *= q; + + if (q != 0) + for (j = 0; j < dimension; j++) + direction[j] /= q; + + // direction normalized + + project_d(centered, numEntries, direction, projected, dimension); + sortProjection(projected, order_, numEntries); + + CGU_INT index__[MAX_ENTRIES]; + + // it's projected and centered; cluster centers are (index[i]-s)*t (*dir) + k = 0; + for (j = 0; j < numEntries; j++) + { + while (projected[order_[j]] > (k + 0.5 - s)*t && k < numClusters - 1) + k++; + index__[order_[j]] = k; + } + done = 1; + for (j = 0; j < numEntries; j++) + { + done = (done && (index__[j] == index[j])); + index[j] = index__[j]; + } + } while (!done && try_two--); + + if (i == 1) + for (j = 0; j < numEntries; j++) + index_[j] = index[j]; + else + { + done = 1; + for (j = 0; j < numEntries; j++) + { + done = (done && (index_[j] == index[j])); + index_[j] = index_[j]; + } + if (done) + break; + + } + } + + quant_AnD_Shell(projected, numClusters, numEntries, index); + } + s = t = 0; + + CGU_FLOAT q = 0; + + for (k = 0; k < numEntries; k++) + { + s += index[k]; + t += index[k] * index[k]; + } + + for (j = 0; j < dimension; j++) + { + direction[j] = 0; + for (k = 0; k < numEntries; k++) + direction[j] += centered[k][j] * index[k]; + q += direction[j] * direction[j]; + } + + s /= (CGU_FLOAT)numEntries; + + t = t - s * s * (CGU_FLOAT)numEntries; + + //assert(t != 0); + + t = (t == 0.0 ? 0.0f : 1.0f / t); + + for (i = 0; i < numEntries; i++) + for (j = 0; j < dimension; j++) + out[i][j] = mean[j] + direction[j] * t*(index[i] - s); + + // normalize direction for output + + q = sqrtf(q); + *step = t * q; + for (j = 0; j < dimension; j++) + direction[j] /= q; + + return totalError_d(data, out, numEntries, dimension); +} + +void clampF16Max(CGU_FLOAT EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_BOOL isSigned) +{ + for (CGU_INT region = 0; region < 2; region++) + for (CGU_INT ab = 0; ab < 2; ab++) + for (CGU_INT rgb = 0; rgb < 3; rgb++) + { + if (isSigned) + { + if (EndPoints[region][ab][rgb] < -FLT16_MAX) EndPoints[region][ab][rgb] = -FLT16_MAX; + else if (EndPoints[region][ab][rgb] > FLT16_MAX) EndPoints[region][ab][rgb] = FLT16_MAX; + } + else + { + if (EndPoints[region][ab][rgb] < 0.0) EndPoints[region][ab][rgb] = 0.0; + else if (EndPoints[region][ab][rgb] > FLT16_MAX) EndPoints[region][ab][rgb] = FLT16_MAX; + } + // Zero region + // if ((EndPoints[region][ab][rgb] > -0.01) && ((EndPoints[region][ab][rgb] < 0.01))) EndPoints[region][ab][rgb] = 0.0; + } +} + +//===================================================================================================================== +#define LOG_CL_BASE 2 +#define BIT_BASE 5 +#define LOG_CL_RANGE 5 +#define BIT_RANGE 9 +#define MAX_CLUSTERS_BIG 16 +#define BTT(bits) (bits-BIT_BASE) +#define CLT(cl) (cl-LOG_CL_BASE) + +#ifdef USE_BC6RAMPS + +int spidx(int in_data, int in_clogs, int in_bits, int in_p2, int in_o1, int in_o2, int in_i) +{ + // use BC7 sp_idx + return 0; +} + +float sperr(int in_data, int clogs, int bits, int p2, int o1, int o2) +{ + // use BC7 sp_err + return 0,0f; +} +#endif + +__constant CGU_FLOAT rampLerpWeightsBC6[5][16] = +{ + { 0.0 }, // 0 bit index + { 0.0, 1.0 }, // 1 bit index + { 0.0, 21.0 / 64.0, 43.0 / 64.0, 1.0 }, // 2 bit index + { 0.0, 9.0 / 64.0, 18.0 / 64.0, 27.0 / 64.0, 37.0 / 64.0, 46.0 / 64.0, 55.0 / 64.0, 1.0 }, // 3 bit index + { 0.0, 4.0 / 64.0, 9.0 / 64.0, 13.0 / 64.0, 17.0 / 64.0, 21.0 / 64.0, 26.0 / 64.0, 30.0 / 64.0, + 34.0 / 64.0, 38.0 / 64.0, 43.0 / 64.0, 47.0 / 64.0, 51.0 / 64.0, 55.0 / 64.0, 60.0 / 64.0, 1.0 } // 4 bit index +}; + + +CGU_FLOAT rampf(CGU_INT clogs, CGU_FLOAT p1, CGU_FLOAT p2, CGU_INT indexPos) +{ + // (clogs+ LOG_CL_BASE) starts from 2 to 4 + return (CGU_FLOAT)p1 + rampLerpWeightsBC6[clogs + LOG_CL_BASE][indexPos] * (p2 - p1); +} + +CGU_INT all_same_d(CGU_FLOAT d[][MAX_DIMENSION_BIG], CGU_INT n, CGU_INT dimension) +{ + CGU_INT i, j; + CGU_INT same = 1; + for (i = 1; i < n; i++) + for (j = 0; j < dimension; j++) + same = same && (d[0][j] == d[i][j]); + + return(same); +} + +// return the max index from a set of indexes +CGU_INT max_index(CGU_INT a[], CGU_INT n) +{ + CGU_INT i, m = a[0]; + for (i = 0; i < n; i++) + m = m > a[i] ? m : a[i]; + return (m); +} + +CGU_INT cluster_mean_d_d(CGU_FLOAT d[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_FLOAT mean[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_INT index[], CGU_INT i_comp[], CGU_INT i_cnt[], CGU_INT n, CGU_INT dimension) +{ + // unused index values are underfined + CGU_INT i, j, k; + //assert(n!=0); + + for (i = 0; i < n; i++) + for (j = 0; j < dimension; j++) { + // assert(index[i] index[k] ? Mi : index[k]; + } + D = 1; + for (d = 2; d <= Mi - mi; d++) { + + for (k = 0; k < numEntries; k++) + if ((index[k] - mi) % d != 0) + break; + if (k >= numEntries) + D = d; + } + for (k = 0; k < numEntries; k++) + index[k] = (index[k] - mi) / D; +} + +CGU_INT max_int(CGU_INT a[], CGU_INT n) +{ + CGU_INT i, m = a[0]; + for (i = 0; i < n; i++) + m = m > a[i] ? m : a[i]; + return (m); +} + +__constant CGU_INT npv_nd[2][2 * MAX_DIMENSION_BIG] = +{ + { 1,2,4,8,16,32,0,0 }, //dimension = 3 + { 1,2,4,0,0,0,0,0 } //dimension = 4 +}; + +__constant short par_vectors_nd[2][8][128][2][MAX_DIMENSION_BIG] = +{ + { // Dimension = 3 + { + { { 0,0,0,0 },{ 0,0,0,0 } }, + { { 0,0,0,0 },{ 0,0,0,0 } } + }, + + // 3*n+1 BCC 3*n+1 Cartesian 3*n //same parity + { // SAME_PAR + { { 0,0,0 },{ 0,0,0 } }, + { { 1,1,1 },{ 1,1,1 } } + }, + // 3*n+2 BCC 3*n+1 BCC 3*n+1 + { // BCC + { { 0,0,0 },{ 0,0,0 } }, + { { 0,0,0 },{ 1,1,1 } }, + { { 1,1,1 },{ 0,0,0 } }, + { { 1,1,1 },{ 1,1,1 } } + }, + // 3*n+3 FCC ??? // ?????? + // BCC with FCC same or inverted, symmetric + { // BCC_SAME_FCC + { { 0,0,0 },{ 0,0,0 } }, + { { 1,1,0 },{ 1,1,0 } }, + { { 1,0,1 },{ 1,0,1 } }, + { { 0,1,1 },{ 0,1,1 } }, + + { { 0,0,0 },{ 1,1,1 } }, + { { 1,1,1 },{ 0,0,0 } }, + { { 0,1,0 },{ 0,1,0 } }, // ?? + { { 1,1,1 },{ 1,1,1 } }, + + }, + // 3*n+4 FCC 3*n+2 FCC 3*n+2 + { + + { { 0,0,0 },{ 0,0,0 } }, + { { 1,1,0 },{ 0,0,0 } }, + { { 1,0,1 },{ 0,0,0 } }, + { { 0,1,1 },{ 0,0,0 } }, + + { { 0,0,0 },{ 1,1,0 } }, + { { 1,1,0 },{ 1,1,0 } }, + { { 1,0,1 },{ 1,1,0 } }, + { { 0,1,1 },{ 1,1,0 } }, + + { { 0,0,0 },{ 1,0,1 } }, + { { 1,1,0 },{ 1,0,1 } }, + { { 1,0,1 },{ 1,0,1 } }, + { { 0,1,1 },{ 1,0,1 } }, + + { { 0,0,0 },{ 0,1,1 } }, + { { 1,1,0 },{ 0,1,1 } }, + { { 1,0,1 },{ 0,1,1 } }, + { { 0,1,1 },{ 0,1,1 } } + }, + + + // 3*n+5 Cartesian 3*n+3 FCC 3*n+2 //D^*[6] + { + + { { 0,0,0 },{ 0,0,0 } }, + { { 1,1,0 },{ 0,0,0 } }, + { { 1,0,1 },{ 0,0,0 } }, + { { 0,1,1 },{ 0,0,0 } }, + + { { 0,0,0 },{ 1,1,0 } }, + { { 1,1,0 },{ 1,1,0 } }, + { { 1,0,1 },{ 1,1,0 } }, + { { 0,1,1 },{ 1,1,0 } }, + + { { 0,0,0 },{ 1,0,1 } }, + { { 1,1,0 },{ 1,0,1 } }, + { { 1,0,1 },{ 1,0,1 } }, + { { 0,1,1 },{ 1,0,1 } }, + + { { 0,0,0 },{ 0,1,1 } }, + { { 1,1,0 },{ 0,1,1 } }, + { { 1,0,1 },{ 0,1,1 } }, + { { 0,1,1 },{ 0,1,1 } }, + + + { { 1,0,0 },{ 1,1,1 } }, + { { 0,1,0 },{ 1,1,1 } }, + { { 0,0,1 },{ 1,1,1 } }, + { { 1,1,1 },{ 1,1,1 } }, + + { { 1,0,0 },{ 0,0,1 } }, + { { 0,1,0 },{ 0,0,1 } }, + { { 0,0,1 },{ 0,0,1 } }, + { { 1,1,1 },{ 0,0,1 } }, + + { { 1,0,0 },{ 1,0,0 } }, + { { 0,1,0 },{ 1,0,0 } }, + { { 0,0,1 },{ 1,0,0 } }, + { { 1,1,1 },{ 1,0,0 } }, + + { { 1,0,0 },{ 0,1,0 } }, + { { 0,1,0 },{ 0,1,0 } }, + { { 0,0,1 },{ 0,1,0 } }, + { { 1,1,1 },{ 0,1,0 } } + } + },// Dimension = 3 + { // Dimension = 4 + { + { { 0,0,0,0 },{ 0,0,0,0 } }, + { { 0,0,0,0 },{ 0,0,0,0 } } + }, + + // 3*n+1 BCC 3*n+1 Cartesian 3*n //same parity + { // SAME_PAR + { { 0,0,0,0 },{ 0,0,0,0 } }, + { { 1,1,1,1 },{ 1,1,1,1 } } + }, + // 3*n+2 BCC 3*n+1 BCC 3*n+1 + { // BCC + { { 0,0,0,0 },{ 0,0,0,0 } }, + { { 0,0,0,0 },{ 1,1,1,1 } }, + { { 1,1,1,1 },{ 0,0,0,0 } }, + { { 1,1,1,1 },{ 1,1,1,1 } } + }, + // 3 PBIT + { + { { 0,0,0,0 },{ 0,0,0,0 } }, + { { 0,0,0,0 },{ 0,1,1,1 } }, + { { 0,1,1,1 },{ 0,0,0,0 } }, + { { 0,1,1,1 },{ 0,1,1,1 } }, + + { { 1,0,0,0 },{ 1,0,0,0 } }, + { { 1,0,0,0 },{ 1,1,1,1 } }, + { { 1,1,1,1 },{ 1,0,0,0 } }, + { { 1,1,1,1 },{ 1,1,1,1 } } + }, + + // 4 PBIT + { + { { 0,0,0,0 },{ 0,0,0,0 } }, + { { 0,0,0,0 },{ 0,1,1,1 } }, + { { 0,1,1,1 },{ 0,0,0,0 } }, + { { 0,1,1,1 },{ 0,1,1,1 } }, + + { { 1,0,0,0 },{ 1,0,0,0 } }, + { { 1,0,0,0 },{ 1,1,1,1 } }, + { { 1,1,1,1 },{ 1,0,0,0 } }, + { { 1,1,1,1 },{ 1,1,1,1 } }, + + { { 0,0,0,0 },{ 0,0,0,0 } }, + { { 0,0,0,0 },{ 0,0,1,1 } }, + { { 0,0,1,1 },{ 0,0,0,0 } }, + { { 0,1,0,1 },{ 0,1,0,1 } }, + + { { 1,0,0,0 },{ 1,0,0,0 } }, + { { 1,0,0,0 },{ 1,0,1,1 } }, + { { 1,0,1,1 },{ 1,0,0,0 } }, + { { 1,1,0,1 },{ 1,1,0,1 } }, + + }, + + } // Dimension = 4 + +}; + +CGU_INT get_par_vector(CGU_INT dim1, CGU_INT dim2, CGU_INT dim3, CGU_INT dim4, CGU_INT dim5) +{ + return par_vectors_nd[dim1][dim2][dim3][dim4][dim5]; +} + +CGU_FLOAT quant_single_point_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG], + CGU_INT numEntries, CGU_INT index[MAX_ENTRIES], + CGU_FLOAT out[MAX_ENTRIES][MAX_DIMENSION_BIG], + CGU_INT epo_1[2][MAX_DIMENSION_BIG], + CGU_INT Mi_, // last cluster + CGU_INT type, + CGU_INT dimension) +{ + if (dimension < 3) return CMP_FLOAT_MAX; + + CGU_INT i, j; + + CGU_FLOAT err_0 = CMP_FLOAT_MAX; + CGU_FLOAT err_1 = CMP_FLOAT_MAX; + + CGU_INT idx = 0; + CGU_INT idx_1 = 0; + + CGU_INT epo_0[2][MAX_DIMENSION_BIG]; + + CGU_INT use_par = (type != 0); + + CGU_INT clogs = 0; + i = Mi_ + 1; + while (i >>= 1) + clogs++; + + // assert((1< sperr(tc, CLT(clogs), BTT(bits[j]), t1, t2, i)) + dr[j] = tc; + else if (sperr(tf, CLT(clogs), BTT(bits[j]), t1, t2, i) < sperr(tc, CLT(clogs), BTT(bits[j]), t1, t2, i)) + dr[j] = tf; + else +#endif + dr[j] = (int)floorf(data[0][j] + 0.5f); + +#ifdef USE_BC6RAMPS + tr = sperr(dr[j], CLT(clogs), BTT(bits[j]), t1, t2, i) + 2.0f * sqrtf(sperr(dr[j], CLT(clogs), BTT(bits[j]), t1, t2, i)) * fabsf((float)dr[j] - data[0][j]) + + (dr[j] - data[0][j])* (dr[j] - data[0][j]); + if (tr < t_) + { + t_ = tr; +#else + t_ = 0; +#endif + + t1o[j] = t1; + t2o[j] = t2; + dr_0[j] = dr[j]; +#ifdef USE_BC6RAMPS + if ((dr_0[j] < 0) || (dr_0[j] > 255)) + { + dr_0[j] = 0; // Error! + } + } +#endif + } // B + } //C + + t += t_; + } // D + + + if (t < err_0) + { + + idx = i; + + for (j = 0; j < dimension; j++) + { +#ifdef USE_BC6RAMPS + CGU_INT p1 = CLT(clogs); // < 3 + CGU_INT p2 = BTT(bits[j]); // < 4 + CGU_INT in_data = dr_0[j]; // < SP_ERRIDX_MAX + CGU_INT p4 = t1o[j]; // < 2 + CGU_INT p5 = t2o[j]; // < 2 + CGU_INT p6 = i; // < 16 + + // New spidx + epo_0[0][j] = spidx(in_data, p1, p2, p4, p5, p6, 0); + epo_0[1][j] = spidx(in_data, p1, p2, p4, p5, p6, 1); + + if (epo_0[1][j] >= SP_ERRIDX_MAX) + { + epo_0[1][j] = 0; // Error!! + } +#else + epo_0[0][j] = 0; + epo_0[1][j] = 0; +#endif + } + err_0 = t; + } + if (err_0 == 0) + break; + } // E + + if (err_0 < err_1) + { + idx_1 = idx; + for (j = 0; j < dimension; j++) + { + epo_1[0][j] = epo_0[0][j]; + epo_1[1][j] = epo_0[1][j]; + } + err_1 = err_0; + } + + if (err_1 == 0) + break; + } //1 + +for (i = 0; i < numEntries; i++) +{ + index[i] = idx_1; + for (j = 0; j < dimension; j++) + { + CGU_INT p1 = CLT(clogs); // < 3 + CGU_INT p3 = epo_1[0][j]; // < SP_ERRIDX_MAX + CGU_INT p4 = epo_1[1][j]; // < SP_ERRIDX_MAX + CGU_INT p5 = idx_1; // < 16 +#pragma warning( push ) +#pragma warning(disable:4244) + out[i][j] = (int)rampf(p1, p3, p4, p5); +#pragma warning( pop ) + } +} +return err_1 * numEntries; +} + +//======================================================================================================================== + +CGU_FLOAT ep_shaker_HD(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG], + CGU_INT numEntries, + CGU_INT index_[MAX_ENTRIES], + CGU_FLOAT out[MAX_ENTRIES][MAX_DIMENSION_BIG], + CGU_INT epo_code_out[2][MAX_DIMENSION_BIG], + CGU_INT Mi_, // last cluster + CGU_INT bits[3], // including parity + CGU_INT channels3or4 +) +{ + CGU_INT i, j, k; + CGU_INT use_par = 0; + CGU_INT clogs = 0; + + i = Mi_ + 1; + while (i >>= 1) + clogs++; + + CGU_FLOAT mean[MAX_DIMENSION_BIG]; + CGU_INT index[MAX_ENTRIES]; + CGU_INT Mi; + + CGU_INT maxTry = 1; + + for (k = 0; k < numEntries; k++) + { + index[k] = index_[k]; + } + + CGU_INT done; + CGU_INT change; + + CGU_INT better; + + CGU_FLOAT err_o = CMP_FLOAT_MAX; + CGU_FLOAT out_2[MAX_ENTRIES][MAX_DIMENSION_BIG]; + CGU_INT idx_2[MAX_ENTRIES]; + CGU_INT epo_2[2][MAX_DIMENSION_BIG]; + + CGU_INT max_bits[MAX_DIMENSION_BIG]; + CGU_INT type = bits[0] % (2 * channels3or4); + + for (j = 0; j < channels3or4; j++) + max_bits[j] = (bits[0] + 2 * channels3or4 - 1) / (2 * channels3or4); + + + // handled below automatically + CGU_INT alls = all_same_d(data, numEntries, channels3or4); + + mean_d_d(data, mean, numEntries, channels3or4); + + do { + index_collapse_kernel(index, numEntries); + + Mi = max_index(index, numEntries); // index can be from requantizer + + CGU_INT p, q; + CGU_INT p0 = -1, q0 = -1; + + CGU_FLOAT err_2 = CMP_FLOAT_MAX; + + if (Mi == 0) { + CGU_FLOAT t; + CGU_INT epo_0[2][MAX_DIMENSION_BIG]; + // either sinle point from the beginning or collapsed index + if (alls) { + t = quant_single_point_d(data, numEntries, index, out_2, epo_0, Mi_, type, channels3or4); + } + else + { + quant_single_point_d(&mean, numEntries, index, out_2, epo_0, Mi_, type, channels3or4); + t = totalError_d(data, out_2, numEntries, channels3or4); + } + + if (t < err_o) { + for (k = 0; k < numEntries; k++) { + index_[k] = index[k]; + for (j = 0; j < channels3or4; j++) { + out[k][j] = out_2[k][j]; + epo_code_out[0][j] = epo_0[0][j]; + epo_code_out[1][j] = epo_0[1][j]; + } + }; + err_o = t; + } + return err_o; + } + + //=============================== + // We have ramp colors to process + //=============================== + + for (q = 1; Mi != 0 && q*Mi <= Mi_; q++) // does not work for single point collapsed index!!! + { + for (p = 0; p <= Mi_ - q * Mi; p++) + { + + //------------------------------------- + // set a new index data to try + //------------------------------------- + CGU_INT cidx[MAX_ENTRIES]; + + for (k = 0; k < numEntries; k++) + { + cidx[k] = index[k] * q + p; + } + + CGU_FLOAT epa[2][MAX_DIMENSION_BIG]; + + // + // solve RMS problem for center + // + + CGU_FLOAT im[2][2] = { { 0,0 },{ 0,0 } }; // matrix /inverse matrix + CGU_FLOAT rp[2][MAX_DIMENSION_BIG]; // right part for RMS fit problem + + // get ideal clustr centers + CGU_FLOAT cc[MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG]; + CGU_INT index_cnt[MAX_CLUSTERS_BIG]; // count of index entries + CGU_INT index_comp[MAX_CLUSTERS_BIG]; // compacted index + CGU_INT index_ncl; // number of unique indexes + + index_ncl = cluster_mean_d_d(data, cc, cidx, index_comp, index_cnt, numEntries, channels3or4); // unrounded + + for (i = 0; i < index_ncl; i++) + for (j = 0; j < channels3or4; j++) + cc[index_comp[i]][j] = (CGU_FLOAT)floorf(cc[index_comp[i]][j] + 0.5f); // more or less ideal location + + for (j = 0; j < channels3or4; j++) + { + rp[0][j] = rp[1][j] = 0; + } + + // weight with cnt if runnning on compacted index + for (k = 0; k < numEntries; k++) + { + im[0][0] += (Mi_ - cidx[k])* (Mi_ - cidx[k]); + im[0][1] += cidx[k] * (Mi_ - cidx[k]); // im is symmetric + im[1][1] += cidx[k] * cidx[k]; + + for (j = 0; j < channels3or4; j++) + { + rp[0][j] += (Mi_ - cidx[k]) * cc[cidx[k]][j]; + rp[1][j] += cidx[k] * cc[cidx[k]][j]; + } + } + + CGU_FLOAT dd = im[0][0] * im[1][1] - im[0][1] * im[0][1]; + + //assert(dd !=0); + + // dd=0 means that cidx[k] and (Mi_-cidx[k]) collinear which implies only one active index; + // taken care of separately + + im[1][0] = im[0][0]; + im[0][0] = im[1][1] / dd; + im[1][1] = im[1][0] / dd; + im[1][0] = im[0][1] = -im[0][1] / dd; + + for (j = 0; j < channels3or4; j++) { + epa[0][j] = (im[0][0] * rp[0][j] + im[0][1] * rp[1][j])*Mi_; + epa[1][j] = (im[1][0] * rp[0][j] + im[1][1] * rp[1][j])*Mi_; + } + + CGU_FLOAT err_1 = CMP_FLOAT_MAX; + CGU_FLOAT out_1[MAX_ENTRIES][MAX_DIMENSION_BIG]; + CGU_INT idx_1[MAX_ENTRIES]; + CGU_INT epo_1[2][MAX_DIMENSION_BIG]; + CGU_INT s1 = 0; + CGU_FLOAT epd[2][MAX_DIMENSION_BIG][2]; // first second, coord, begin range end range + + for (j = 0; j < channels3or4; j++) + { + for (i = 0; i < 2; i++) + { // set range + epd[i][j][0] = epd[i][j][1] = epa[i][j]; + epd[i][j][1] += ((1 << bits[j]) - 1 - (int)epd[i][j][1] < (1 << use_par) ? + (1 << bits[j]) - 1 - (int)epd[i][j][1] : (1 << use_par)) & (~use_par); + } + } + + CGU_FLOAT ce[MAX_ENTRIES][MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG]; + CGU_FLOAT err_0 = 0; + CGU_FLOAT out_0[MAX_ENTRIES][MAX_DIMENSION_BIG]; + CGU_INT idx_0[MAX_ENTRIES]; + + for (i = 0; i < numEntries; i++) + { + CGU_FLOAT d[4]; + d[0] = data[i][0]; + d[1] = data[i][1]; + d[2] = data[i][2]; + d[3] = data[i][3]; + for (j = 0; j < (1 << clogs); j++) + for (k = 0; k < channels3or4; k++) + { + ce[i][j][k] = (rampf(CLT(clogs), epd[0][k][0], epd[1][k][0], j) - d[k])* + (rampf(CLT(clogs), epd[0][k][0], epd[1][k][0], j) - d[k]); + } + } + + CGU_INT s = 0, p1, g; + CGU_INT ei0 = 0, ei1 = 0; + + for (p1 = 0; p1 < 64; p1++) + { + CGU_INT j0 = 0; + + // Gray code increment + g = p1 & (-p1); + + err_0 = 0; + + for (j = 0; j < channels3or4; j++) + { + if (((g >> (2 * j)) & 0x3) != 0) + { + j0 = j; + // new cords + ei0 = (((s^g) >> (2 * j)) & 0x1); + ei1 = (((s^g) >> (2 * j + 1)) & 0x1); + } + } + s = s ^ g; + err_0 = 0; + + for (i = 0; i < numEntries; i++) + { + CGU_FLOAT d[4]; + d[0] = data[i][0]; + d[1] = data[i][1]; + d[2] = data[i][2]; + d[3] = data[i][3]; + CGU_INT ci = 0; + CGU_FLOAT cmin = CMP_FLOAT_MAX; + + for (j = 0; j < (1 << clogs); j++) + { + float t_ = 0.; + ce[i][j][j0] = (rampf(CLT(clogs), epd[0][j0][ei0], epd[1][j0][ei1], j) - d[j0])* + (rampf(CLT(clogs), epd[0][j0][ei0], epd[1][j0][ei1], j) - d[j0]); + for (k = 0; k < channels3or4; k++) + { + t_ += ce[i][j][k]; + } + + if (t_ < cmin) + { + cmin = t_; + ci = j; + } + } + + idx_0[i] = ci; + for (k = 0; k < channels3or4; k++) + { + out_0[i][k] = rampf(CLT(clogs), epd[0][k][ei0], epd[1][k][ei1], ci); + } + err_0 += cmin; + } + + if (err_0 < err_1) + { + // best in the curent ep cube run + for (i = 0; i < numEntries; i++) + { + idx_1[i] = idx_0[i]; + for (j = 0; j < channels3or4; j++) + out_1[i][j] = out_0[i][j]; + } + err_1 = err_0; + + s1 = s; // epo coding + } + } + + // reconstruct epo + for (j = 0; j < channels3or4; j++) + { + { + // new cords + ei0 = ((s1 >> (2 * j)) & 0x1); + ei1 = ((s1 >> (2 * j + 1)) & 0x1); + epo_1[0][j] = (int)epd[0][j][ei0]; + epo_1[1][j] = (int)epd[1][j][ei1]; + } + } + + if (err_1 < err_2) + { + // best in the curent ep cube run + for (i = 0; i < numEntries; i++) + { + idx_2[i] = idx_1[i]; + for (j = 0; j < channels3or4; j++) + out_2[i][j] = out_1[i][j]; + } + err_2 = err_1; + for (j = 0; j < channels3or4; j++) + { + epo_2[0][j] = epo_1[0][j]; + epo_2[1][j] = epo_1[1][j]; + } + p0 = p; + q0 = q; + } + } + } + + // change/better + change = 0; + for (k = 0; k < numEntries; k++) + change = change || (index[k] * q0 + p0 != idx_2[k]); + + better = err_2 < err_o; + + if (better) + { + for (k = 0; k < numEntries; k++) + { + index_[k] = index[k] = idx_2[k]; + for (j = 0; j < channels3or4; j++) + { + out[k][j] = out_2[k][j]; + epo_code_out[0][j] = epo_2[0][j]; + epo_code_out[1][j] = epo_2[1][j]; + } + } + err_o = err_2; + } + + done = !(change && better); + + if (maxTry > 0) maxTry--; + else maxTry = 0; + + } while (!done && maxTry); + + return err_o; +} + + +#ifndef ASPM_GPU +static CGU_INT g_aWeights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; // 3 bit color Indices +static CGU_INT g_aWeights4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; // 4 bit color indices + +CGU_FLOAT lerpf(CGU_FLOAT a, CGU_FLOAT b, CGU_INT i, CGU_INT denom) +{ + assert(denom == 3 || denom == 7 || denom == 15); + assert(i >= 0 && i <= denom); + + CGU_INT *weights = NULL; + + switch (denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 7: weights = g_aWeights3; break; + case 15: weights = g_aWeights4; break; + default: assert(0); + } + return (a*weights[denom - i] + b * weights[i]) / 64.0f; +} +#else + +CGU_FLOAT lerpf(CGU_FLOAT a, CGU_FLOAT b, CGU_INT i, CGU_INT denom) +{ + CGU_INT g_aWeights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; // 3 bit color Indices + CGU_INT g_aWeights4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; // 4 bit color indices + switch (denom) + { + case 7: return ((a*g_aWeights3[denom - i] + b * g_aWeights3[i]) / 64.0f); break; + case 15: return ((a*g_aWeights4[denom - i] + b * g_aWeights4[i]) / 64.0f); break; + default: + case 3:// fall through to case 15 + denom *= 5; + i *= 5; + return ((a*g_aWeights3[denom - i] + b * g_aWeights3[i]) / 64.0f); break; + } +} +#endif + +void palitizeEndPointsF(BC6H_Encode_local *BC6H_data, CGU_FLOAT fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]) +{ + // scale endpoints + CGU_FLOAT Ar, Ag, Ab, Br, Bg, Bb; + + + // Compose index colors from end points + if (BC6H_data->region == 1) + { + Ar = fEndPoints[0][0][0]; + Ag = fEndPoints[0][0][1]; + Ab = fEndPoints[0][0][2]; + Br = fEndPoints[0][1][0]; + Bg = fEndPoints[0][1][1]; + Bb = fEndPoints[0][1][2]; + + for (CGU_INT i = 0; i < 16; i++) + { + + // Red + BC6H_data->Paletef[0][i].x = lerpf(Ar, Br, i, 15); + // Green + BC6H_data->Paletef[0][i].y = lerpf(Ag, Bg, i, 15); + // Blue + BC6H_data->Paletef[0][i].z = lerpf(Ab, Bb, i, 15); + } + + } + else //mode.type == BC6_TWO + { + for (CGU_INT region = 0; region < 2; region++) + { + Ar = fEndPoints[region][0][0]; + Ag = fEndPoints[region][0][1]; + Ab = fEndPoints[region][0][2]; + Br = fEndPoints[region][1][0]; + Bg = fEndPoints[region][1][1]; + Bb = fEndPoints[region][1][2]; + for (CGU_INT i = 0; i < 8; i++) + { + // Red + BC6H_data->Paletef[region][i].x = lerpf(Ar, Br, i, 7); + // Greed + BC6H_data->Paletef[region][i].y = lerpf(Ag, Bg, i, 7); + // Blue + BC6H_data->Paletef[region][i].z = lerpf(Ab, Bb, i, 7); + } + + } + } +} + +CGU_FLOAT CalcShapeError(BC6H_Encode_local *BC6H_data, CGU_FLOAT fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_BOOL SkipPallet) +{ + CGU_INT maxPallet; + CGU_INT subset = 0; + CGU_FLOAT totalError = 0.0f; + CGU_INT region = (BC6H_data->region - 1); + + if (region == 0) + maxPallet = 16; + else + maxPallet = 8; + + if (!SkipPallet) + palitizeEndPointsF(BC6H_data, fEndPoints); + + for (CGU_INT i = 0; i < MAX_SUBSET_SIZE; i++) + { + CGU_FLOAT error = 0.0f; + CGU_FLOAT bestError = 0.0f; + + if (region == 0) + { + subset = 0; + } + else + { + // get the shape subset 0 or 1 + subset = BC6_PARTITIONS[BC6H_data->d_shape_index][i]; + } + + // initialize bestError to the difference for first data + bestError = abs(BC6H_data->din[i][0] - BC6H_data->Paletef[subset][0].x) + + abs(BC6H_data->din[i][1] - BC6H_data->Paletef[subset][0].y) + + abs(BC6H_data->din[i][2] - BC6H_data->Paletef[subset][0].z); + + // loop through the rest of the data until find the best error + for (CGU_INT j = 1; j < maxPallet && bestError > 0; j++) + { + error = abs(BC6H_data->din[i][0] - BC6H_data->Paletef[subset][j].x) + + abs(BC6H_data->din[i][1] - BC6H_data->Paletef[subset][j].y) + + abs(BC6H_data->din[i][2] - BC6H_data->Paletef[subset][j].z); + + if (error <= bestError) + bestError = error; + else + break; + } + totalError += bestError; + } + + return totalError; +} + +CGU_FLOAT FindBestPattern(BC6H_Encode_local * BC6H_data, CGU_BOOL TwoRegionShapes, CGU_INT8 shape_pattern, CGU_FLOAT quality) +{ + // Index bit size for the patterns been used. + // All two zone shapes have 3 bits per color, max index value < 8 + // All one zone shapes gave 4 bits per color, max index value < 16 + CGU_INT8 Index_BitSize = TwoRegionShapes ? 8 : 16; + CGU_INT8 max_subsets = TwoRegionShapes ? 2 : 1; + CGU_FLOAT direction[NCHANNELS]; + CGU_FLOAT step; + + BC6H_data->region = max_subsets; + BC6H_data->index = 0; + BC6H_data->d_shape_index = shape_pattern; + memset((CGU_UINT8 *)BC6H_data->partition, 0, sizeof(BC6H_data->partition)); + memset((CGU_UINT8 *)BC6H_data->shape_indices, 0, sizeof(BC6H_data->shape_indices)); + + // Get the pattern to encode with + Partition(shape_pattern, // Shape pattern we want to get + BC6H_data->din, // Input data + BC6H_data->partition, // Returns the patterned shape data + BC6H_data->entryCount, // counts the number of pixel used in each subset region num of 0's amd 1's + max_subsets, // Table Shapes to use eithe one regions 1 or two regions 2 + 3); // rgb no alpha always = 3 + + CGU_FLOAT error[MAX_SUBSETS] = { 0.0, CMP_FLOAT_MAX,CMP_FLOAT_MAX }; + CGU_INT BestOutB = 0; + CGU_FLOAT BestError; //the lowest error from vector direction quantization + CGU_FLOAT BestError_endpts; //the lowest error from endpoints extracted from the vector direction quantization + + CGU_FLOAT outB[2][2][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; + CGU_INT shape_indicesB[2][MAX_SUBSETS][MAX_SUBSET_SIZE]; + + for (CGU_INT subset = 0; subset < max_subsets; subset++) + { + error[0] += optQuantAnD_d( + BC6H_data->partition[subset], // input data + BC6H_data->entryCount[subset], // number of input points above (not clear about 1, better to avoid) + Index_BitSize, // number of clusters on the ramp, 8 or 16 + shape_indicesB[0][subset], // output index, if not all points of the ramp used, 0 may not be assigned + outB[0][subset], // resulting quantization + direction, // direction vector of the ramp (check normalization) + &step, // step size (check normalization) + 3, // number of channels (always 3 = RGB for BC6H) + quality // Quality set number of retry to get good end points + // Max retries = MAX_TRY = 4000 when Quality is 1.0 + // Min = 0 and default with quality 0.05 is 200 times + ); + } + + BestError = error[0]; + BestOutB = 0; + + // The following code is almost complete - runs very slow and not sure if % of improvement is justified.. +#ifdef USE_SHAKERHD + // Valid only for 2 region shapes + if ((max_subsets > 1) && (quality > 0.80)) + { + CGU_INT tempIndices[MAX_SUBSET_SIZE]; + // CGU_INT temp_epo_code[2][2][MAX_DIMENSION_BIG]; + CGU_INT bits[3] = { 8,8,8 }; // Channel index bit size + + // CGU_FLOAT epo[2][MAX_DIMENSION_BIG]; + CGU_INT epo_code[MAX_SUBSETS][2][MAX_DIMENSION_BIG]; + // CGU_INT shakeSize = 8; + + error[1] = 0.0; + for (CGU_INT subset = 0; subset < max_subsets; subset++) + { + for (CGU_INT k = 0; k < BC6H_data->entryCount[subset]; k++) + { + tempIndices[k] = shape_indicesB[0][subset][k]; + } + + error[1] += ep_shaker_HD( + BC6H_data->partition[subset], + BC6H_data->entryCount[subset], + tempIndices, // output index, if not all points of the ramp used, 0 may not be assigned + outB[1][subset], // resulting quantization + epo_code[subset], + BC6H_data->entryCount[subset] - 1, + bits, + 3 + ); + + // error[1] += ep_shaker_2_d( + // BC6H_data.partition[subset], + // BC6H_data.entryCount[subset], + // tempIndices, // output index, if not all points of the ramp used, 0 may not be assigned + // outB[1][subset], // resulting quantization + // epo_code[subset], + // shakeSize, + // BC6H_data.entryCount[subset] - 1, + // bits[0], + // 3, + // epo + // ); + + + for (CGU_INT k = 0; k < BC6H_data->entryCount[subset]; k++) + { + shape_indicesB[1][subset][k] = tempIndices[k]; + } + + } // subsets + + if (BestError > error[1]) + { + BestError = error[1]; + BestOutB = 1; + for (CGU_INT subset = 0; subset < max_subsets; subset++) + { + for (CGU_INT k = 0; k < MAX_DIMENSION_BIG; k++) + { + BC6H_data->fEndPoints[subset][0][k] = (CGU_FLOAT)epo_code[subset][0][k]; + BC6H_data->fEndPoints[subset][1][k] = (CGU_FLOAT)epo_code[subset][1][k]; + } + } + } + + } +#endif + + // Save the best for BC6H data processing later + if (BestOutB == 0) + GetEndPoints(BC6H_data->fEndPoints, outB[BestOutB], max_subsets, BC6H_data->entryCount); + + memcpy((CGU_UINT8 *)BC6H_data->shape_indices, (CGU_UINT8 *)shape_indicesB[BestOutB], sizeof(BC6H_data->shape_indices)); + clampF16Max(BC6H_data->fEndPoints, BC6H_data->issigned); + + BestError_endpts = CalcShapeError(BC6H_data, BC6H_data->fEndPoints, false); + return BestError_endpts; +} + +#ifndef ASPM_GPU +void SaveDataBlock(BC6H_Encode_local *bc6h_format, CMP_GLOBAL CGU_UINT8 cmpout[COMPRESSED_BLOCK_SIZE]) +{ + BitHeader header(NULL, COMPRESSED_BLOCK_SIZE); + + // Save the RGB end point values + switch (bc6h_format->m_mode) + { + case 1: //0x00 + header.setvalue(0, 2, 0x00); + header.setvalue(2, 1, bc6h_format->gy, 4); // gy[4] + header.setvalue(3, 1, bc6h_format->by, 4); // by[4] + header.setvalue(4, 1, bc6h_format->bz, 4); // bz[4] + header.setvalue(5, 10, bc6h_format->rw); // 10: rw[9:0] + header.setvalue(15, 10, bc6h_format->gw); // 10: gw[9:0] + header.setvalue(25, 10, bc6h_format->bw); // 10: bw[9:0] + header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0] + header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4] + header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0] + header.setvalue(45, 5, bc6h_format->gx); // 5: gx[4:0] + header.setvalue(50, 1, bc6h_format->bz); // 5: bz[0] + header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0] + header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0] + header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0] + header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0] + header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0] + header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3] + break; + case 2: // 0x01 + header.setvalue(0, 2, 0x01); + header.setvalue(2, 1, bc6h_format->gy, 5); // gy[5] + header.setvalue(3, 1, bc6h_format->gz, 4); // gz[4] + header.setvalue(4, 1, bc6h_format->gz, 5); // gz[5] + header.setvalue(5, 7, bc6h_format->rw); // rw[6:0] + header.setvalue(12, 1, bc6h_format->bz); // bz[0] + header.setvalue(13, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(14, 1, bc6h_format->by, 4); // by[4] + header.setvalue(15, 7, bc6h_format->gw); // gw[6:0] + header.setvalue(22, 1, bc6h_format->by, 5); // by[5] + header.setvalue(23, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4] + header.setvalue(25, 7, bc6h_format->bw); // 7: bw[6:0] + header.setvalue(32, 1, bc6h_format->bz, 3); // bz[3] + header.setvalue(33, 1, bc6h_format->bz, 5); // bz[5] + header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4] + header.setvalue(35, 6, bc6h_format->rx); // 6: rx[5:0] + header.setvalue(41, 4, bc6h_format->gy); // 6: gy[3:0] + header.setvalue(45, 6, bc6h_format->gx); // 6: gx[5:0] + header.setvalue(51, 4, bc6h_format->gz); // 6: gz[3:0] + header.setvalue(55, 6, bc6h_format->bx); // 6: bx[5:0] + header.setvalue(61, 4, bc6h_format->by); // 6: by[3:0] + header.setvalue(65, 6, bc6h_format->ry); // 6: ry[5:0] + header.setvalue(71, 6, bc6h_format->rz); // 6: rz[5:0] + break; + case 3: // 0x02 + header.setvalue(0, 5, 0x02); + header.setvalue(5, 10, bc6h_format->rw); // 11: rw[9:0] + header.setvalue(15, 10, bc6h_format->gw); // 11: gw[9:0] + header.setvalue(25, 10, bc6h_format->bw); // 11: bw[9:0] + header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0] + header.setvalue(40, 1, bc6h_format->rw, 10); // rw[10] + header.setvalue(41, 4, bc6h_format->gy); // 4: gy[3:0] + header.setvalue(45, 4, bc6h_format->gx); // 4: gx[3:0] + header.setvalue(49, 1, bc6h_format->gw, 10); // gw[10] + header.setvalue(50, 1, bc6h_format->bz); // 4: bz[0] + header.setvalue(51, 4, bc6h_format->gz); // 4: gz[3:0] + header.setvalue(55, 4, bc6h_format->bx); // 4: bx[3:0] + header.setvalue(59, 1, bc6h_format->bw, 10); // bw[10] + header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(61, 4, bc6h_format->by); // 4: by[3:0] + header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0] + header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0] + header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3] + break; + case 4: // 0x06 + header.setvalue(0, 5, 0x06); + header.setvalue(5, 10, bc6h_format->rw); // 11: rw[9:0] + header.setvalue(15, 10, bc6h_format->gw); // 11: gw[9:0] + header.setvalue(25, 10, bc6h_format->bw); // 11: bw[9:0] + header.setvalue(35, 4, bc6h_format->rx); // rx[3:0] + header.setvalue(39, 1, bc6h_format->rw, 10); // rw[10] + header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4] + header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0] + header.setvalue(45, 5, bc6h_format->gx); // gx[4:0] + header.setvalue(50, 1, bc6h_format->gw, 10); // 5: gw[10] + header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0] + header.setvalue(55, 4, bc6h_format->bx); // 4: bx[3:0] + header.setvalue(59, 1, bc6h_format->bw, 10); // bw[10] + header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(61, 4, bc6h_format->by); // 4: by[3:0] + header.setvalue(65, 4, bc6h_format->ry); // 4: ry[3:0] + header.setvalue(69, 1, bc6h_format->bz); // 4: bz[0] + header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(71, 4, bc6h_format->rz); // 4: rz[3:0] + header.setvalue(75, 1, bc6h_format->gy, 4); // gy[4] + header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3] + break; + case 5: // 0x0A + header.setvalue(0, 5, 0x0A); + header.setvalue(5, 10, bc6h_format->rw); // 11: rw[9:0] + header.setvalue(15, 10, bc6h_format->gw); // 11: gw[9:0] + header.setvalue(25, 10, bc6h_format->bw); // 11: bw[9:0] + header.setvalue(35, 4, bc6h_format->rx); // 4: rx[3:0] + header.setvalue(39, 1, bc6h_format->rw, 10); // rw[10] + header.setvalue(40, 1, bc6h_format->by, 4); // by[4] + header.setvalue(41, 4, bc6h_format->gy); // 4: gy[3:0] + header.setvalue(45, 4, bc6h_format->gx); // 4: gx[3:0] + header.setvalue(49, 1, bc6h_format->gw, 10); // gw[10] + header.setvalue(50, 1, bc6h_format->bz); // 5: bz[0] + header.setvalue(51, 4, bc6h_format->gz); // 4: gz[3:0] + header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0] + header.setvalue(60, 1, bc6h_format->bw, 10); // bw[10] + header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0] + header.setvalue(65, 4, bc6h_format->ry); // 4: ry[3:0] + header.setvalue(69, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(71, 4, bc6h_format->rz); // 4: rz[3:0] + header.setvalue(75, 1, bc6h_format->bz, 4); // bz[4] + header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3] + break; + case 6: // 0x0E + header.setvalue(0, 5, 0x0E); + header.setvalue(5, 9, bc6h_format->rw); // 9: rw[8:0] + header.setvalue(14, 1, bc6h_format->by, 4); // by[4] + header.setvalue(15, 9, bc6h_format->gw); // 9: gw[8:0] + header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4] + header.setvalue(25, 9, bc6h_format->bw); // 9: bw[8:0] + header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4] + header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0] + header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4] + header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0] + header.setvalue(45, 5, bc6h_format->gx); // 5: gx[4:0] + header.setvalue(50, 1, bc6h_format->bz); // 5: bz[0] + header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0] + header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0] + header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0] + header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0] + header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0] + header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3] + break; + case 7: // 0x12 + header.setvalue(0, 5, 0x12); + header.setvalue(5, 8, bc6h_format->rw); // 8: rw[7:0] + header.setvalue(13, 1, bc6h_format->gz, 4); // gz[4] + header.setvalue(14, 1, bc6h_format->by, 4); // by[4] + header.setvalue(15, 8, bc6h_format->gw); // 8: gw[7:0] + header.setvalue(23, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4] + header.setvalue(25, 8, bc6h_format->bw); // 8: bw[7:0] + header.setvalue(33, 1, bc6h_format->bz, 3); // bz[3] + header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4] + header.setvalue(35, 6, bc6h_format->rx); // 6: rx[5:0] + header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0] + header.setvalue(45, 5, bc6h_format->gx); // 5: gx[4:0] + header.setvalue(50, 1, bc6h_format->bz); // 5: bz[0] + header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0] + header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0] + header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0] + header.setvalue(65, 6, bc6h_format->ry); // 6: ry[5:0] + header.setvalue(71, 6, bc6h_format->rz); // 6: rz[5:0] + break; + case 8: // 0x16 + header.setvalue(0, 5, 0x16); + header.setvalue(5, 8, bc6h_format->rw); // 8: rw[7:0] + header.setvalue(13, 1, bc6h_format->bz); // 5: bz[0] + header.setvalue(14, 1, bc6h_format->by, 4); // by[4] + header.setvalue(15, 8, bc6h_format->gw); // 8: gw[7:0] + header.setvalue(23, 1, bc6h_format->gy, 5); // gy[5] + header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4] + header.setvalue(25, 8, bc6h_format->bw); // 8: bw[7:0] + header.setvalue(33, 1, bc6h_format->gz, 5); // gz[5] + header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4] + header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0] + header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4] + header.setvalue(41, 4, bc6h_format->gy); // 6: gy[3:0] + header.setvalue(45, 6, bc6h_format->gx); // 6: gx[5:0] + header.setvalue(51, 4, bc6h_format->gz); // 6: gz[3:0] + header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0] + header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0] + header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0] + header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0] + header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3] + break; + case 9: // 0x1A + header.setvalue(0, 5, 0x1A); + header.setvalue(5, 8, bc6h_format->rw); // 8: rw[7:0] + header.setvalue(13, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(14, 1, bc6h_format->by, 4); // by[4] + header.setvalue(15, 8, bc6h_format->gw); // 8: gw[7:0] + header.setvalue(23, 1, bc6h_format->by, 5); // by[5] + header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4] + header.setvalue(25, 8, bc6h_format->bw); // 8: bw[7:0] + header.setvalue(33, 1, bc6h_format->bz, 5); // bz[5] + header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4] + header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0] + header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4] + header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0] + header.setvalue(45, 5, bc6h_format->gx); // 5: gx[4:0] + header.setvalue(50, 1, bc6h_format->bz); // 6: bz[0] + header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0] + header.setvalue(55, 6, bc6h_format->bx); // 6: bx[5:0] + header.setvalue(61, 4, bc6h_format->by); // 6: by[3:0] + header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0] + header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0] + header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3] + break; + case 10: // 0x1E + header.setvalue(0, 5, 0x1E); + header.setvalue(5, 6, bc6h_format->rw); // 6: rw[5:0] + header.setvalue(11, 1, bc6h_format->gz, 4); // gz[4] + header.setvalue(12, 1, bc6h_format->bz); // 6: bz[0] + header.setvalue(13, 1, bc6h_format->bz, 1); // bz[1] + header.setvalue(14, 1, bc6h_format->by, 4); // by[4] + header.setvalue(15, 6, bc6h_format->gw); // 6: gw[5:0] + header.setvalue(21, 1, bc6h_format->gy, 5); // gy[5] + header.setvalue(22, 1, bc6h_format->by, 5); // by[5] + header.setvalue(23, 1, bc6h_format->bz, 2); // bz[2] + header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4] + header.setvalue(25, 6, bc6h_format->bw); // 6: bw[5:0] + header.setvalue(31, 1, bc6h_format->gz, 5); // gz[5] + header.setvalue(32, 1, bc6h_format->bz, 3); // bz[3] + header.setvalue(33, 1, bc6h_format->bz, 5); // bz[5] + header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4] + header.setvalue(35, 6, bc6h_format->rx); // 6: rx[5:0] + header.setvalue(41, 4, bc6h_format->gy); // 6: gy[3:0] + header.setvalue(45, 6, bc6h_format->gx); // 6: gx[5:0] + header.setvalue(51, 4, bc6h_format->gz); // 6: gz[3:0] + header.setvalue(55, 6, bc6h_format->bx); // 6: bx[5:0] + header.setvalue(61, 4, bc6h_format->by); // 6: by[3:0] + header.setvalue(65, 6, bc6h_format->ry); // 6: ry[5:0] + header.setvalue(71, 6, bc6h_format->rz); // 6: rz[5:0] + break; + + // Single regions Modes + case 11: // 0x03 + header.setvalue(0, 5, 0x03); + header.setvalue(5, 10, bc6h_format->rw); // 10: rw[9:0] + header.setvalue(15, 10, bc6h_format->gw); // 10: gw[9:0] + header.setvalue(25, 10, bc6h_format->bw); // 10: bw[9:0] + header.setvalue(35, 10, bc6h_format->rx); // 10: rx[9:0] + header.setvalue(45, 10, bc6h_format->gx); // 10: gx[9:0] + header.setvalue(55, 10, bc6h_format->bx); // 10: bx[9:0] + break; + case 12: // 0x07 + header.setvalue(0, 5, 0x07); + header.setvalue(5, 10, bc6h_format->rw); // 11: rw[9:0] + header.setvalue(15, 10, bc6h_format->gw); // 11: gw[9:0] + header.setvalue(25, 10, bc6h_format->bw); // 11: bw[9:0] + header.setvalue(35, 9, bc6h_format->rx); // 9: rx[8:0] + header.setvalue(44, 1, bc6h_format->rw, 10); // rw[10] + header.setvalue(45, 9, bc6h_format->gx); // 9: gx[8:0] + header.setvalue(54, 1, bc6h_format->gw, 10); // gw[10] + header.setvalue(55, 9, bc6h_format->bx); // 9: bx[8:0] + header.setvalue(64, 1, bc6h_format->bw, 10); // bw[10] + break; + case 13: // 0x0B + header.setvalue(0, 5, 0x0B); + header.setvalue(5, 10, bc6h_format->rw); // 12: rw[9:0] + header.setvalue(15, 10, bc6h_format->gw); // 12: gw[9:0] + header.setvalue(25, 10, bc6h_format->bw); // 12: bw[9:0] + header.setvalue(35, 8, bc6h_format->rx); // 8: rx[7:0] + header.setvalue(43, 1, bc6h_format->rw, 11); // rw[11] + header.setvalue(44, 1, bc6h_format->rw, 10); // rw[10] + header.setvalue(45, 8, bc6h_format->gx); // 8: gx[7:0] + header.setvalue(53, 1, bc6h_format->gw, 11); // gw[11] + header.setvalue(54, 1, bc6h_format->gw, 10); // gw[10] + header.setvalue(55, 8, bc6h_format->bx); // 8: bx[7:0] + header.setvalue(63, 1, bc6h_format->bw, 11); // bw[11] + header.setvalue(64, 1, bc6h_format->bw, 10); // bw[10] + break; + case 14: // 0x0F + header.setvalue(0, 5, 0x0F); + header.setvalue(5, 10, bc6h_format->rw); // 16: rw[9:0] + header.setvalue(15, 10, bc6h_format->gw); // 16: gw[9:0] + header.setvalue(25, 10, bc6h_format->bw); // 16: bw[9:0] + header.setvalue(35, 4, bc6h_format->rx); // 4: rx[3:0] + header.setvalue(39, 6, bc6h_format->rw, 10); // rw[15:10] + header.setvalue(45, 4, bc6h_format->gx); // 4: gx[3:0] + header.setvalue(49, 6, bc6h_format->gw, 10); // gw[15:10] + header.setvalue(55, 4, bc6h_format->bx); // 4: bx[3:0] + header.setvalue(59, 6, bc6h_format->bw, 10); // bw[15:10] + break; + default: // Need to indicate error! + return; + } + + // Each format in the mode table can be uniquely identified by the mode bits. + // The first ten modes are used for two-region tiles, and the mode bit field + // can be either two or five bits long. These blocks also have fields for + // the compressed color endpoints (72 or 75 bits), the partition (5 bits), + // and the partition indices (46 bits). + + if (bc6h_format->m_mode >= MIN_MODE_FOR_ONE_REGION) + { + CGU_INT startbit = ONE_REGION_INDEX_OFFSET; + header.setvalue(startbit, 3, bc6h_format->indices16[0]); + startbit += 3; + for (CGU_INT i = 1; i < 16; i++) + { + header.setvalue(startbit, 4, bc6h_format->indices16[i]); + startbit += 4; + } + } + else + { + header.setvalue(77, 5, bc6h_format->d_shape_index); // Shape Index + CGU_INT startbit = TWO_REGION_INDEX_OFFSET, + nbits = 2; + header.setvalue(startbit, nbits, bc6h_format->indices16[0]); + for (CGU_INT i = 1; i < 16; i++) + { + startbit += nbits; // offset start bit for next index using prior nbits used + nbits = g_indexfixups[bc6h_format->d_shape_index] == i ? 2 : 3; // get new number of bit to save index with + header.setvalue(startbit, nbits, bc6h_format->indices16[i]); + } + } + + // save to output buffer our new bit values + // this can be optimized if header is part of bc6h_format struct + header.transferbits(cmpout, 16); +} +#else +void SaveDataBlock(BC6H_Encode_local *bc6h_format, CMP_GLOBAL CGU_UINT8 out[COMPRESSED_BLOCK_SIZE]) +{ + // ToDo +} +#endif + +void SwapIndices(CGU_INT32 iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT32 iIndices[3][MAX_SUBSET_SIZE], CGU_INT entryCount[MAX_SUBSETS], CGU_INT max_subsets, CGU_INT mode, CGU_INT shape_pattern) +{ + + CGU_UINT32 uNumIndices = 1 << ModePartition[mode].IndexPrec; + CGU_UINT32 uHighIndexBit = uNumIndices >> 1; + + for (CGU_INT subset = 0; subset < max_subsets; ++subset) + { + // region 0 (subset = 0) The fix-up index for this subset is allways index 0 + // region 1 (subset = 1) The fix-up index for this subset varies based on the shape + size_t i = subset ? g_Region2FixUp[shape_pattern] : 0; + + if (iIndices[subset][i] & uHighIndexBit) + { + // high bit is set, swap the aEndPts and indices for this region + swap(iEndPoints[subset][0][0], iEndPoints[subset][1][0]); + swap(iEndPoints[subset][0][1], iEndPoints[subset][1][1]); + swap(iEndPoints[subset][0][2], iEndPoints[subset][1][2]); + + for (size_t j = 0; j < (size_t)entryCount[subset]; ++j) + { + iIndices[subset][j] = uNumIndices - 1 - iIndices[subset][j]; + } + } + + } +} + +// helper function to check transform overflow +// todo: check overflow by checking against sign +CGU_BOOL isOverflow(CGU_INT endpoint, CGU_INT nbit) +{ + CGU_INT maxRange = (int)pow(2.0f, (CGU_FLOAT)nbit - 1.0f) - 1; + CGU_INT minRange = (int)-(pow(2.0f, (CGU_FLOAT)nbit - 1.0f)); + + //no overflow + if ((endpoint >= minRange) && (endpoint <= maxRange)) + return false; + else //overflow + return true; +} + +CGU_BOOL TransformEndPoints(BC6H_Encode_local *BC6H_data, CGU_INT iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT max_subsets, CGU_INT mode) +{ + CGU_INT Mask; + if (ModePartition[mode].transformed) + { + BC6H_data->istransformed = true; + for (CGU_INT i = 0; i < 3; ++i) + { + Mask = MASK(ModePartition[mode].nbits); + oEndPoints[0][0][i] = iEndPoints[0][0][i] & Mask; // [0][A] + + Mask = MASK(ModePartition[mode].prec[i]); + oEndPoints[0][1][i] = iEndPoints[0][1][i] - iEndPoints[0][0][i]; // [0][B] - [0][A] + + if (isOverflow(oEndPoints[0][1][i], ModePartition[mode].prec[i])) + return false; + + oEndPoints[0][1][i] = (oEndPoints[0][1][i] & Mask); + + //redo the check for sign overflow for one region case + if (max_subsets <= 1) + { + if (isOverflow(oEndPoints[0][1][i], ModePartition[mode].prec[i])) + return false; + } + + if (max_subsets > 1) + { + oEndPoints[1][0][i] = iEndPoints[1][0][i] - iEndPoints[0][0][i]; // [1][A] - [0][A] + if (isOverflow(oEndPoints[1][0][i], ModePartition[mode].prec[i])) + return false; + + oEndPoints[1][0][i] = (oEndPoints[1][0][i] & Mask); + + oEndPoints[1][1][i] = iEndPoints[1][1][i] - iEndPoints[0][0][i]; // [1][B] - [0][A] + if (isOverflow(oEndPoints[1][1][i], ModePartition[mode].prec[i])) + return false; + + oEndPoints[1][1][i] = (oEndPoints[1][1][i] & Mask); + } + } + } + else + { + BC6H_data->istransformed = false; + for (CGU_INT i = 0; i < 3; ++i) + { + Mask = MASK(ModePartition[mode].nbits); + oEndPoints[0][0][i] = iEndPoints[0][0][i] & Mask; + + Mask = MASK(ModePartition[mode].prec[i]); + oEndPoints[0][1][i] = iEndPoints[0][1][i] & Mask; + + if (max_subsets > 1) + { + oEndPoints[1][0][i] = iEndPoints[1][0][i] & Mask; + oEndPoints[1][1][i] = iEndPoints[1][1][i] & Mask; + } + } + } + + return true; +} + +void SaveCompressedBlockData(BC6H_Encode_local *BC6H_data, + CGU_INT oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], + CGU_INT iIndices[2][MAX_SUBSET_SIZE], + CGU_INT8 max_subsets, + CGU_INT8 mode) +{ + BC6H_data->m_mode = mode; + BC6H_data->index++; + + // Save the data to output + BC6H_data->rw = oEndPoints[0][0][0]; // rw + BC6H_data->gw = oEndPoints[0][0][1]; // gw + BC6H_data->bw = oEndPoints[0][0][2]; // bw + BC6H_data->rx = oEndPoints[0][1][0]; // rx + BC6H_data->gx = oEndPoints[0][1][1]; // gx + BC6H_data->bx = oEndPoints[0][1][2]; // bx + + if (max_subsets > 1) + { + // Save the data to output + BC6H_data->ry = oEndPoints[1][0][0]; // ry + BC6H_data->gy = oEndPoints[1][0][1]; // gy + BC6H_data->by = oEndPoints[1][0][2]; // by + BC6H_data->rz = oEndPoints[1][1][0]; // rz + BC6H_data->gz = oEndPoints[1][1][1]; // gz + BC6H_data->bz = oEndPoints[1][1][2]; // bz + } + + // Map our two subset Indices for the shape to output 4x4 block + CGU_INT pos[2] = { 0,0 }; + CGU_INT asubset; + for (CGU_INT i = 0; i < MAX_SUBSET_SIZE; i++) + { + if (max_subsets > 1) + asubset = BC6_PARTITIONS[BC6H_data->d_shape_index][i]; // Two region shapes + else + asubset = 0; // One region shapes + BC6H_data->indices16[i] = (CGU_UINT8)iIndices[asubset][pos[asubset]]; + pos[asubset]++; + } + +} + +CGU_FLOAT CalcOneRegionEndPtsError(BC6H_Encode_local *BC6H_data, CGU_FLOAT fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE]) +{ + CGU_FLOAT error = 0; + + for (CGU_INT i = 0; i < MAX_SUBSET_SIZE; i++) + { + for (CGU_INT m = 0; m < MAX_END_POINTS; m++) + { + for (CGU_INT n = 0; n < NCHANNELS; n++) + { + CGU_FLOAT calencpts = fEndPoints[0][m][n] + (abs(fEndPoints[0][m][n] - fEndPoints[0][m][n]) * (shape_indices[0][i] / 15)); + error += abs(BC6H_data->din[i][n] - calencpts); + } + } + } + + return error; +} + +void ReIndexShapef(BC6H_Encode_local *BC6H_data, CGU_INT shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE]) +{ + CGU_FLOAT error = 0; + CGU_FLOAT bestError; + CGU_INT bestIndex = 0; + CGU_INT sub0index = 0; + CGU_INT sub1index = 0; + CGU_INT MaxPallet; + CGU_INT region = (BC6H_data->region - 1); + + if (region == 0) + MaxPallet = 16; + else + MaxPallet = 8; + + CGU_UINT8 isSet = 0; + for (CGU_INT i = 0; i < MAX_SUBSET_SIZE; i++) + { + // subset 0 or subset 1 + if (region) + isSet = BC6_PARTITIONS[BC6H_data->d_shape_index][i]; + + if (isSet) + { + bestError = CMP_HALF_MAX; + bestIndex = 0; + + // For two shape regions max Pallet is 8 + for (CGU_INT j = 0; j < MaxPallet; j++) + { + // Calculate error from original + error = abs(BC6H_data->din[i][0] - BC6H_data->Paletef[1][j].x) + + abs(BC6H_data->din[i][1] - BC6H_data->Paletef[1][j].y) + + abs(BC6H_data->din[i][2] - BC6H_data->Paletef[1][j].z); + if (error < bestError) + { + bestError = error; + bestIndex = j; + } + } + + shape_indices[1][sub1index] = bestIndex; + sub1index++; + } + else + { + // This is shared for one or two shape regions max Pallet either 16 or 8 + bestError = CMP_FLOAT_MAX; + bestIndex = 0; + + for (CGU_INT j = 0; j < MaxPallet; j++) + { + // Calculate error from original + error = abs(BC6H_data->din[i][0] - BC6H_data->Paletef[0][j].x) + + abs(BC6H_data->din[i][1] - BC6H_data->Paletef[0][j].y) + + abs(BC6H_data->din[i][2] - BC6H_data->Paletef[0][j].z); + if (error < bestError) + { + bestError = error; + bestIndex = j; + } + } + + shape_indices[0][sub0index] = bestIndex; + sub0index++; + } + } + +} + +CGU_INT Unquantize(CGU_INT comp, unsigned char uBitsPerComp, CGU_BOOL bSigned) +{ + CGU_INT unq = 0, s = 0; + if (bSigned) + { + if (uBitsPerComp >= 16) + { + unq = comp; + } + else + { + if (comp < 0) + { + s = 1; + comp = -comp; + } + + if (comp == 0) unq = 0; + else if (comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF; + else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp - 1); + + if (s) unq = -unq; + } + } + else + { + if (uBitsPerComp >= 15) unq = comp; + else if (comp == 0) unq = 0; + else if (comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF; + else unq = ((comp << 16) + 0x8000) >> uBitsPerComp; + } + + return unq; +} + +CGU_INT finish_unquantizeF16(CGU_INT q, CGU_BOOL isSigned) +{ + // Is it F16 Signed else F16 Unsigned + if (isSigned) + return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32 + else + return (q * 31) >> 6; // scale the magnitude by 31/64 + + // Note for Undefined we should return q as is +} + +// decompress endpoints +void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_FLOAT outf[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT mode) +{ + CGU_INT i; + CGU_INT t; + CGU_FLOAT out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + + if (bc6h_format->issigned) + { + if (bc6h_format->istransformed) + { + for (i = 0; i < NCHANNELS; i++) + { + out[0][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][0][i], ModePartition[mode].nbits); + + t = SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]); //C_RED + t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits); + out[0][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits); + + // Unquantize all points to nbits + out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); + + // F16 format + outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + } + } + else + { + for (i = 0; i < NCHANNELS; i++) + { + out[0][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][0][i], ModePartition[mode].nbits); + out[0][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]); + + // Unquantize all points to nbits + out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); + + // F16 format + outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + } + } + + } + else + { + if (bc6h_format->istransformed) + { + for (i = 0; i < NCHANNELS; i++) + { + out[0][0][i] = (CGU_FLOAT)oEndPoints[0][0][i]; + t = SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]); + out[0][1][i] = (CGU_FLOAT)((t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits)); + + // Unquantize all points to nbits + out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); + + // F16 format + outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + } + } + else + { + for (i = 0; i < NCHANNELS; i++) + { + out[0][0][i] = (CGU_FLOAT)oEndPoints[0][0][i]; + out[0][1][i] = (CGU_FLOAT)oEndPoints[0][1][i]; + + // Unquantize all points to nbits + out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); + + // F16 format + outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + } + } + } +} + +void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_FLOAT outf[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT mode) +{ + CGU_INT i; + CGU_INT t; + CGU_FLOAT out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + + if (bc6h_format->issigned) + { + if (bc6h_format->istransformed) + { + for (i = 0; i < NCHANNELS; i++) + { + // get the quantized values + out[0][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][0][i], ModePartition[mode].nbits); + + t = SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]); + t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits); + out[0][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits); + + t = SIGN_EXTEND_TYPELESS(oEndPoints[1][0][i], ModePartition[mode].prec[i]); + t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits); + out[1][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits); + + t = SIGN_EXTEND_TYPELESS(oEndPoints[1][1][i], ModePartition[mode].prec[i]); + t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits); + out[1][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits); + + // Unquantize all points to nbits + out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, true); + out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, true); + out[1][0][i] = (CGU_FLOAT)Unquantize((int)out[1][0][i], (unsigned char)ModePartition[mode].nbits, true); + out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, true); + + // F16 format + outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], true); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], true); + outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], true); + outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], true); + + } + } + else + { + for (i = 0; i < NCHANNELS; i++) + { + out[0][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][0][i], ModePartition[mode].nbits); + out[0][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]); + out[1][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[1][0][i], ModePartition[mode].prec[i]); + out[1][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[1][1][i], ModePartition[mode].prec[i]); + + // Unquantize all points to nbits + out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); + out[1][0][i] = (CGU_FLOAT)Unquantize((int)out[1][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); + + // nbits to F16 format + outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); + outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); + } + } + + } + else + { + if (bc6h_format->istransformed) + { + for (i = 0; i < NCHANNELS; i++) + { + out[0][0][i] = (CGU_FLOAT)oEndPoints[0][0][i]; + t = SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]); + out[0][1][i] = (CGU_FLOAT)((t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits)); + + t = SIGN_EXTEND_TYPELESS(oEndPoints[1][0][i], ModePartition[mode].prec[i]); + out[1][0][i] = (CGU_FLOAT)((t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits)); + + t = SIGN_EXTEND_TYPELESS(oEndPoints[1][1][i], ModePartition[mode].prec[i]); + out[1][1][i] = (CGU_FLOAT)((t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits)); + + // Unquantize all points to nbits + out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); + out[1][0][i] = (CGU_FLOAT)Unquantize((int)out[1][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); + + // nbits to F16 format + outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); + outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); + + } + } + else + { + for (i = 0; i < NCHANNELS; i++) + { + out[0][0][i] = (CGU_FLOAT)oEndPoints[0][0][i]; + out[0][1][i] = (CGU_FLOAT)oEndPoints[0][1][i]; + out[1][0][i] = (CGU_FLOAT)oEndPoints[1][0][i]; + out[1][1][i] = (CGU_FLOAT)oEndPoints[1][1][i]; + + // Unquantize all points to nbits + out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); + out[1][0][i] = (CGU_FLOAT)Unquantize((int)out[1][0][i], (unsigned char)ModePartition[mode].nbits, false); + out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); + + // nbits to F16 format + outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); + outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); + } + } + } +} + +// decompress endpoints +static void decompress_endpts(const CGU_INT in[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const CGU_INT mode, CGU_BOOL issigned) +{ + + if (ModePartition[mode].transformed) + { + for (CGU_INT i = 0; i < 3; ++i) + { + R_0(out) = issigned ? SIGN_EXTEND_TYPELESS(R_0(in), ModePartition[mode].IndexPrec) : R_0(in); + CGU_INT t; + t = SIGN_EXTEND_TYPELESS(R_1(in), ModePartition[mode].prec[i]); + t = (t + R_0(in)) & MASK(ModePartition[mode].nbits); + R_1(out) = issigned ? SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits) : t; + + t = SIGN_EXTEND_TYPELESS(R_2(in), ModePartition[mode].prec[i]); + t = (t + R_0(in)) & MASK(ModePartition[mode].nbits); + R_2(out) = issigned ? SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits) : t; + + t = SIGN_EXTEND_TYPELESS(R_3(in), ModePartition[mode].prec[i]); + t = (t + R_0(in)) & MASK(ModePartition[mode].nbits); + R_3(out) = issigned ? SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits) : t; + } + } + else + { + for (CGU_INT i = 0; i < 3; ++i) + { + R_0(out) = issigned ? SIGN_EXTEND_TYPELESS(R_0(in), ModePartition[mode].nbits) : R_0(in); + R_1(out) = issigned ? SIGN_EXTEND_TYPELESS(R_1(in), ModePartition[mode].prec[i]) : R_1(in); + R_2(out) = issigned ? SIGN_EXTEND_TYPELESS(R_2(in), ModePartition[mode].prec[i]) : R_2(in); + R_3(out) = issigned ? SIGN_EXTEND_TYPELESS(R_3(in), ModePartition[mode].prec[i]) : R_3(in); + } + } +} + +// endpoints fit only if the compression was lossless +static CGU_BOOL endpts_fit(const CGU_INT orig[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const CGU_INT compressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const CGU_INT mode, CGU_INT max_subsets, CGU_BOOL issigned) +{ + CGU_INT uncompressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + + decompress_endpts(compressed, uncompressed, mode, issigned); + + for (CGU_INT j = 0; j < max_subsets; ++j) + for (CGU_INT i = 0; i < 3; ++i) + { + if (orig[j][0][i] != uncompressed[j][0][i]) return false; + if (orig[j][1][i] != uncompressed[j][1][i]) return false; + } + + return true; +} + +//todo: check overflow +CGU_INT QuantizeToInt(short value, CGU_INT prec, CGU_BOOL signedfloat16) +{ + + if (prec <= 1) return 0; + CGU_BOOL negvalue = false; + + // move data to use extra bits for processing + CGU_INT ivalue = value; + + if (signedfloat16) + { + if (value < 0) + { + negvalue = true; + value = -value; + } + prec--; + } + else + { + // clamp -ve + if (value < 0) + value = 0; + } + + CGU_INT iQuantized; + CGU_INT bias = (prec > 10 && prec != 16) ? ((1 << (prec - 11)) - 1) : 0; + bias = (prec == 16) ? 15 : bias; + + iQuantized = ((ivalue << prec) + bias) / (FLT16_MAX + 1); + + return (negvalue ? -iQuantized : iQuantized); +} + +//todo: checkoverflow +void QuantizeEndPointToF16Prec(CGU_FLOAT EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_INT max_subsets, CGU_INT prec, CGU_BOOL isSigned) +{ + + for (CGU_INT subset = 0; subset < max_subsets; ++subset) + { + iEndPoints[subset][0][0] = QuantizeToInt((short)EndPoints[subset][0][0], prec, isSigned); // A.Red + iEndPoints[subset][0][1] = QuantizeToInt((short)EndPoints[subset][0][1], prec, isSigned); // A.Green + iEndPoints[subset][0][2] = QuantizeToInt((short)EndPoints[subset][0][2], prec, isSigned); // A.Blue + iEndPoints[subset][1][0] = QuantizeToInt((short)EndPoints[subset][1][0], prec, isSigned); // B.Red + iEndPoints[subset][1][1] = QuantizeToInt((short)EndPoints[subset][1][1], prec, isSigned); // B.Green + iEndPoints[subset][1][2] = QuantizeToInt((short)EndPoints[subset][1][2], prec, isSigned); // B.Blue + } +} + +CGU_FLOAT EncodePattern(BC6H_Encode_local *BC6H_data, CGU_FLOAT error) +{ + CGU_INT8 max_subsets = BC6H_data->region; + + // now we have input colors (in), output colors (outB) mapped to a line of ends (EndPoints) + // and a set of colors on the line equally spaced (indexedcolors) + // Lets assign indices + + //CGU_FLOAT SrcEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // temp endpoints used during calculations + + // Quantize the EndPoints + CGU_INT F16EndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // temp endpoints used during calculations + CGU_INT quantEndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // endpoints to save for a given mode + + // ModePartition[] starts from 1 to 14 + // If we have a shape pattern set the loop to check modes from 1 to 10 else from 11 to 14 + // of the ModePartition table + CGU_INT min_mode = (BC6H_data->region == 2) ? 1 : 11; + CGU_INT max_mode = (BC6H_data->region == 2) ? MAX_TWOREGION_MODES : MAX_BC6H_MODES; + + CGU_BOOL fits[15]; + memset((CGU_UINT8 *)fits, 0, sizeof(fits)); + + CGU_INT bestFit = 0; + CGU_INT bestEndpointMode = 0; + CGU_FLOAT bestError = CMP_FLOAT_MAX; + CGU_FLOAT bestEndpointsErr = CMP_FLOAT_MAX; + CGU_FLOAT endPointErr = 0; + + // Try Optimization for the Mode + CGU_FLOAT best_EndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + CGU_INT best_Indices[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_SUBSET_SIZE]; + CGU_FLOAT opt_toterr[MAX_BC6H_MODES + 1] = { 0 }; + + memset((CGU_UINT8 *)opt_toterr, 0, sizeof(opt_toterr)); + + CGU_INT numfits = 0; + // + // Notes; Only the endpoints are varying; the indices stay fixed in values! + // so to optimize which mode we need only check the endpoints error against our original to pick the mode to save + // + for (CGU_INT modes = min_mode; modes <= max_mode; ++modes) + { + memcpy((CGU_UINT8 *)best_EndPoints[modes], (CGU_UINT8 *)BC6H_data->fEndPoints, sizeof(BC6H_data->fEndPoints)); + memcpy((CGU_UINT8 *)best_Indices[modes] , (CGU_UINT8 *)BC6H_data->shape_indices, sizeof(BC6H_data->shape_indices)); + + { + QuantizeEndPointToF16Prec(best_EndPoints[modes], F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits, BC6H_data->issigned); + } + + // Indices data to save for given mode + SwapIndices(F16EndPoints[modes], best_Indices[modes], BC6H_data->entryCount, max_subsets, ModeFitOrder[modes], BC6H_data->d_shape_index); + CGU_BOOL transformfit = TransformEndPoints(BC6H_data, F16EndPoints[modes], quantEndPoints[modes], max_subsets, ModeFitOrder[modes]); + fits[modes] = endpts_fit(F16EndPoints[modes], quantEndPoints[modes], ModeFitOrder[modes], max_subsets, BC6H_data->issigned); + + if (fits[modes] && transformfit) + { + numfits++; + + // The new compressed end points fit the mode + // recalculate the error for this mode with a new set of indices + // since we have shifted the end points from what we origially calc + // from the find_bestpattern + CGU_FLOAT uncompressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + if (BC6H_data->region == 1) + decompress_endpoints1(BC6H_data, quantEndPoints[modes], uncompressed, ModeFitOrder[modes]); + else + decompress_endpoints2(BC6H_data, quantEndPoints[modes], uncompressed, ModeFitOrder[modes]); + // Takes the end points and creates a pallet of colors + // based on preset weights along a vector formed by the two end points + palitizeEndPointsF(BC6H_data, uncompressed); + + // Once we have the pallet - recalculate the optimal indices using the pallet + // and the original image data stored in BC6H_data.din[] + if (!BC6H_data->issigned) + ReIndexShapef(BC6H_data, best_Indices[modes]); + + // Calculate the error of the new tile vs the old tile data + opt_toterr[modes] = CalcShapeError(BC6H_data, uncompressed, true); + if (BC6H_data->region == 1) + { + endPointErr = CalcOneRegionEndPtsError(BC6H_data, uncompressed, best_Indices[modes]); + if (endPointErr < bestEndpointsErr) + { + bestEndpointsErr = endPointErr; + bestEndpointMode = modes; + } + } + + CGU_BOOL transformFit = true; + // Save hold this mode fit data if its better than the last one checked. + if (opt_toterr[modes] < bestError) + { + if (!BC6H_data->issigned) + { + QuantizeEndPointToF16Prec(uncompressed, F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits, BC6H_data->issigned); + SwapIndices(F16EndPoints[modes], best_Indices[modes], BC6H_data->entryCount, max_subsets, ModeFitOrder[modes], BC6H_data->d_shape_index); + transformFit = TransformEndPoints(BC6H_data, F16EndPoints[modes], quantEndPoints[modes], max_subsets, ModeFitOrder[modes]); + } + if (transformFit) + { + if (BC6H_data->region == 1) + { + bestFit = (modes == bestEndpointMode) ? modes : ((modes < bestEndpointMode) ? modes : bestEndpointMode); + } + else + { + bestFit = modes; + } + bestError = opt_toterr[bestFit]; + error = bestError; + } + } + + } + } + + if (numfits > 0) + { + SaveCompressedBlockData(BC6H_data, quantEndPoints[bestFit], best_Indices[bestFit], max_subsets, ModeFitOrder[bestFit]); + return error; + } + + // Should not get here! + return error; +} + +void CompressBlockBC6_Internal(CMP_GLOBAL unsigned char*outdata, + CGU_UINT32 destIdx, + BC6H_Encode_local * BC6HEncode_local, + CMP_GLOBAL const BC6H_Encode *BC6HEncode) +{ + //printf("---SRC---\n"); + //CGU_UINT8 blkindex = 0; + //CGU_UINT8 srcindex = 0; + //for ( CGU_INT32 j = 0; j < 16; j++) { + // printf("%5.0f,",BC6HEncode_local->din[j][0]);// R + // printf("%5.0f,",BC6HEncode_local->din[j][1]);// G + // printf("%5.0f,",BC6HEncode_local->din[j][2]);// B + // printf("%5.0f\n,",BC6HEncode_local->din[j][3]);// No Alpha + //} + + CGU_UINT8 Cmp_Red_Block[16] = { 0xc2,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x03,0x00,0x00,0x00,0x00,0x00 }; + + CGU_FLOAT bestError = CMP_FLOAT_MAX; + CGU_FLOAT error = CMP_FLOAT_MAX; + CGU_INT8 bestShape = 0; + CGU_FLOAT quality = BC6HEncode->m_quality; + BC6HEncode_local->issigned = BC6HEncode->m_isSigned; + // run through no partition first + error = FindBestPattern(BC6HEncode_local, false, 0, quality); + if (error < bestError) + { + bestError = error; + bestShape = -1; + + memcpy((CGU_UINT8 *)BC6HEncode_local->cur_best_shape_indices,(CGU_UINT8 *) BC6HEncode_local->shape_indices, sizeof(BC6HEncode_local->shape_indices)); + memcpy((CGU_UINT8 *)BC6HEncode_local->cur_best_partition ,(CGU_UINT8 *) BC6HEncode_local->partition, sizeof(BC6HEncode_local->partition)); + memcpy((CGU_UINT8 *)BC6HEncode_local->cur_best_fEndPoints ,(CGU_UINT8 *) BC6HEncode_local->fEndPoints, sizeof(BC6HEncode_local->fEndPoints)); + memcpy((CGU_UINT8 *)BC6HEncode_local->cur_best_entryCount ,(CGU_UINT8 *) BC6HEncode_local->entryCount, sizeof(BC6HEncode_local->entryCount)); + BC6HEncode_local->d_shape_index = bestShape; + } + + + // run through 32 possible partition set + for (CGU_INT8 shape = 0; shape < MAX_BC6H_PARTITIONS; shape++) + { + error = FindBestPattern(BC6HEncode_local, true, shape, quality); + if (error < bestError) + { + bestError = error; + bestShape = shape; + + memcpy((CGU_UINT8 *)BC6HEncode_local->cur_best_shape_indices, (CGU_UINT8 *)BC6HEncode_local->shape_indices, sizeof(BC6HEncode_local->shape_indices)); + memcpy((CGU_UINT8 *)BC6HEncode_local->cur_best_partition , (CGU_UINT8 *)BC6HEncode_local->partition, sizeof(BC6HEncode_local->partition)); + memcpy((CGU_UINT8 *)BC6HEncode_local->cur_best_fEndPoints , (CGU_UINT8 *)BC6HEncode_local->fEndPoints, sizeof(BC6HEncode_local->fEndPoints)); + memcpy((CGU_UINT8 *)BC6HEncode_local->cur_best_entryCount , (CGU_UINT8 *)BC6HEncode_local->entryCount, sizeof(BC6HEncode_local->entryCount)); + BC6HEncode_local->d_shape_index = bestShape; + } + else + { + if (bestShape != -1) + { + BC6HEncode_local->d_shape_index = bestShape; + memcpy((CGU_UINT8 *)BC6HEncode_local->shape_indices, (CGU_UINT8 *)BC6HEncode_local->cur_best_shape_indices, sizeof(BC6HEncode_local->shape_indices)); + memcpy((CGU_UINT8 *)BC6HEncode_local->partition , (CGU_UINT8 *)BC6HEncode_local->cur_best_partition, sizeof(BC6HEncode_local->partition)); + memcpy((CGU_UINT8 *)BC6HEncode_local->fEndPoints , (CGU_UINT8 *)BC6HEncode_local->cur_best_fEndPoints, sizeof(BC6HEncode_local->fEndPoints)); + memcpy((CGU_UINT8 *)BC6HEncode_local->entryCount , (CGU_UINT8 *)BC6HEncode_local->cur_best_entryCount, sizeof(BC6HEncode_local->entryCount)); + } + } + } + + bestError = EncodePattern(BC6HEncode_local, bestError); + + + // used for debugging modes, set the value you want to debug with + if (BC6HEncode_local->m_mode != 0) + { + // do final encoding and save to output block + SaveDataBlock(BC6HEncode_local, &outdata[destIdx]); + } + else + { + for (CGU_INT i = 0; i < 16; i++) + outdata[destIdx + i] = Cmp_Red_Block[i]; + } +} + +//============================================== USER INTERFACES ======================================================== + +#ifndef ASPM_GPU +#ifndef ASPM +//======================= DECOMPRESS ========================================= +using namespace std; + +static AMD_BC6H_Format extract_format(const CGU_UINT8 in[COMPRESSED_BLOCK_SIZE]) +{ + AMD_BC6H_Format bc6h_format; + unsigned short decvalue; + CGU_UINT8 iData[COMPRESSED_BLOCK_SIZE]; + memcpy(iData,in,COMPRESSED_BLOCK_SIZE); + + memset(&bc6h_format,0,sizeof(AMD_BC6H_Format)); + + // 2 bit mode has Mode bit:2 = 0 and mode bits:1 = 0 or 1 + // 5 bit mode has Mode bit:2 = 1 + if ((in[0]&0x02) > 0) + { + decvalue = (in[0]&0x1F); // first five bits + } + else + { + decvalue = (in[0]&0x01); // first two bits + } + + BitHeader header(in,16); + + switch (decvalue) + { + case 0x00: + bc6h_format.m_mode = 1; // 10:5:5:5 + bc6h_format.wBits = 10; + bc6h_format.tBits[C_RED] = 5; + bc6h_format.tBits[C_GREEN] = 5; + bc6h_format.tBits[C_BLUE] = 5; + bc6h_format.rw = header.getvalue(5 ,10); // 10: rw[9:0] + bc6h_format.rx = header.getvalue(35,5); // 5: rx[4:0] + bc6h_format.ry = header.getvalue(65,5); // 5: ry[4:0] + bc6h_format.rz = header.getvalue(71,5); // 5: rz[4:0] + bc6h_format.gw = header.getvalue(15,10); // 10: gw[9:0] + bc6h_format.gx = header.getvalue(45,5); // 5: gx[4:0] + bc6h_format.gy = header.getvalue(41,4) | // 5: gy[3:0] + (header.getvalue(2,1) << 4); // gy[4] + bc6h_format.gz = header.getvalue(51,4) | // 5: gz[3:0] + (header.getvalue(40,1) << 4); // gz[4] + bc6h_format.bw = header.getvalue(25,10); // 10: bw[9:0] + bc6h_format.bx = header.getvalue(55,5); // 5: bx[4:0] + bc6h_format.by = header.getvalue(61,4) | // 5: by[3:0] + (header.getvalue(3,1) << 4); // by[4] + bc6h_format.bz = header.getvalue(50,1) | // 5: bz[0] + (header.getvalue(60,1) << 1) | // bz[1] + (header.getvalue(70,1) << 2) | // bz[2] + (header.getvalue(76,1) << 3) | // bz[3] + (header.getvalue(4 ,1) << 4); // bz[4] + break; + case 0x01: + bc6h_format.m_mode = 2; // 7:6:6:6 + bc6h_format.wBits = 7; + bc6h_format.tBits[C_RED] = 6; + bc6h_format.tBits[C_GREEN] = 6; + bc6h_format.tBits[C_BLUE] = 6; + bc6h_format.rw = header.getvalue(5,7); // 7: rw[6:0] + bc6h_format.rx = header.getvalue(35,6); // 6: rx[5:0] + bc6h_format.ry = header.getvalue(65,6); // 6: ry[5:0] + bc6h_format.rz = header.getvalue(71,6); // 6: rz[5:0] + bc6h_format.gw = header.getvalue(15,7); // 7: gw[6:0] + bc6h_format.gx = header.getvalue(45,6); // 6: gx[5:0] + bc6h_format.gy = header.getvalue(41,4) | // 6: gy[3:0] + (header.getvalue(24,1) << 4) | // gy[4] + (header.getvalue(2,1) << 5); // gy[5] + bc6h_format.gz = header.getvalue(51,4) | // 6: gz[3:0] + (header.getvalue(3,1) << 4) | // gz[4] + (header.getvalue(4,1) << 5); // gz[5] + bc6h_format.bw = header.getvalue(25,7); // 7: bw[6:0] + bc6h_format.bx = header.getvalue(55,6); // 6: bx[5:0] + bc6h_format.by = header.getvalue(61,4) | // 6: by[3:0] + (header.getvalue(14,1) << 4) | // by[4] + (header.getvalue(22,1) << 5); // by[5] + bc6h_format.bz = header.getvalue(12,1) | // 6: bz[0] + (header.getvalue(13,1) << 1) | // bz[1] + (header.getvalue(23,1) << 2) | // bz[2] + (header.getvalue(32,1) << 3) | // bz[3] + (header.getvalue(34,1) << 4) | // bz[4] + (header.getvalue(33,1) << 5); // bz[5] + break; + case 0x02: + bc6h_format.m_mode = 3; // 11:5:4:4 + bc6h_format.wBits = 11; + bc6h_format.tBits[C_RED] = 5; + bc6h_format.tBits[C_GREEN] = 4; + bc6h_format.tBits[C_BLUE] = 4; + bc6h_format.rw = header.getvalue(5,10) | //11: rw[9:0] + (header.getvalue(40,1) << 10); // rw[10] + bc6h_format.rx = header.getvalue(35,5); // 5: rx[4:0] + bc6h_format.ry = header.getvalue(65,5); // 5: ry[4:0] + bc6h_format.rz = header.getvalue(71,5); // 5: rz[4:0] + bc6h_format.gw = header.getvalue(15,10) | //11: gw[9:0] + (header.getvalue(49,1) << 10); // gw[10] + bc6h_format.gx = header.getvalue(45,4); //4: gx[3:0] + bc6h_format.gy = header.getvalue(41,4); //4: gy[3:0] + bc6h_format.gz = header.getvalue(51,4); //4: gz[3:0] + bc6h_format.bw = header.getvalue(25,10) | //11: bw[9:0] + (header.getvalue(59,1) << 10); // bw[10] + bc6h_format.bx = header.getvalue(55,4); //4: bx[3:0] + bc6h_format.by = header.getvalue(61,4); //4: by[3:0] + bc6h_format.bz = header.getvalue(50,1) | //4: bz[0] + (header.getvalue(60,1) << 1) | // bz[1] + (header.getvalue(70,1) << 2) | // bz[2] + (header.getvalue(76,1) << 3); // bz[3] + break; + case 0x06: + bc6h_format.m_mode = 4; // 11:4:5:4 + bc6h_format.wBits = 11; + bc6h_format.tBits[C_RED] = 4; + bc6h_format.tBits[C_GREEN] = 5; + bc6h_format.tBits[C_BLUE] = 4; + bc6h_format.rw = header.getvalue(5,10) | //11: rw[9:0] + (header.getvalue(39,1) << 10); // rw[10] + bc6h_format.rx = header.getvalue(35,4); //4: rx[3:0] + bc6h_format.ry = header.getvalue(65,4); //4: ry[3:0] + bc6h_format.rz = header.getvalue(71,4); //4: rz[3:0] + bc6h_format.gw = header.getvalue(15,10) | //11: gw[9:0] + (header.getvalue(50,1) << 10); // gw[10] + bc6h_format.gx = header.getvalue(45,5); //5: gx[4:0] + bc6h_format.gy = header.getvalue(41,4) | //5: gy[3:0] + (header.getvalue(75,1) << 4); // gy[4] + bc6h_format.gz = header.getvalue(51,4) | //5: gz[3:0] + (header.getvalue(40,1) << 4); // gz[4] + bc6h_format.bw = header.getvalue(25,10) | //11: bw[9:0] + (header.getvalue(59,1) << 10); // bw[10] + bc6h_format.bx = header.getvalue(55,4); //4: bx[3:0] + bc6h_format.by = header.getvalue(61,4); //4: by[3:0] + bc6h_format.bz = header.getvalue(69,1) | //4: bz[0] + (header.getvalue(60,1) << 1) | // bz[1] + (header.getvalue(70,1) << 2) | // bz[2] + (header.getvalue(76,1) << 3); // bz[3] + break; + case 0x0A: + bc6h_format.m_mode = 5; // 11:4:4:5 + bc6h_format.wBits = 11; + bc6h_format.tBits[C_RED] = 4; + bc6h_format.tBits[C_GREEN] = 4; + bc6h_format.tBits[C_BLUE] = 5; + bc6h_format.rw = header.getvalue(5,10) | //11: rw[9:0] + (header.getvalue(39,1) << 10); // rw[10] + bc6h_format.rx = header.getvalue(35,4); //4: rx[3:0] + bc6h_format.ry = header.getvalue(65,4); //4: ry[3:0] + bc6h_format.rz = header.getvalue(71,4); //4: rz[3:0] + bc6h_format.gw = header.getvalue(15,10) | //11: gw[9:0] + (header.getvalue(49,1) << 10); // gw[10] + bc6h_format.gx = header.getvalue(45,4); //4: gx[3:0] + bc6h_format.gy = header.getvalue(41,4); //4: gy[3:0] + bc6h_format.gz = header.getvalue(51,4); //4: gz[3:0] + bc6h_format.bw = header.getvalue(25,10) | //11: bw[9:0] + (header.getvalue(60,1) << 10); // bw[10] + bc6h_format.bx = header.getvalue(55,5); //5: bx[4:0] + bc6h_format.by = header.getvalue(61,4); //5: by[3:0] + (header.getvalue(40,1) << 4); // by[4] + bc6h_format.bz = header.getvalue(50,1) | //5: bz[0] + (header.getvalue(69,1) << 1) | // bz[1] + (header.getvalue(70,1) << 2) | // bz[2] + (header.getvalue(76,1) << 3) | // bz[3] + (header.getvalue(75,1) << 4); // bz[4] + break; + case 0x0E: + bc6h_format.m_mode = 6; // 9:5:5:5 + bc6h_format.wBits = 9; + bc6h_format.tBits[C_RED] = 5; + bc6h_format.tBits[C_GREEN] = 5; + bc6h_format.tBits[C_BLUE] = 5; + bc6h_format.rw = header.getvalue(5,9); //9: rw[8:0] + bc6h_format.gw = header.getvalue(15,9); //9: gw[8:0] + bc6h_format.bw = header.getvalue(25,9); //9: bw[8:0] + bc6h_format.rx = header.getvalue(35,5); //5: rx[4:0] + bc6h_format.gx = header.getvalue(45,5); //5: gx[4:0] + bc6h_format.bx = header.getvalue(55,5); //5: bx[4:0] + bc6h_format.ry = header.getvalue(65,5); //5: ry[4:0] + bc6h_format.gy = header.getvalue(41,4) | //5: gy[3:0] + (header.getvalue(24,1) << 4); // gy[4] + bc6h_format.by = header.getvalue(61,4) | //5: by[3:0] + (header.getvalue(14,1) << 4); // by[4] + bc6h_format.rz = header.getvalue(71,5); //5: rz[4:0] + bc6h_format.gz = header.getvalue(51,4) | //5: gz[3:0] + (header.getvalue(40,1) << 4); // gz[4] + bc6h_format.bz = header.getvalue(50,1) | //5: bz[0] + (header.getvalue(60,1) << 1) | // bz[1] + (header.getvalue(70,1) << 2) | // bz[2] + (header.getvalue(76,1) << 3) | // bz[3] + (header.getvalue(34,1) << 4); // bz[4] + break; + case 0x12: + bc6h_format.m_mode = 7; // 8:6:5:5 + bc6h_format.wBits = 8; + bc6h_format.tBits[C_RED] = 6; + bc6h_format.tBits[C_GREEN] = 5; + bc6h_format.tBits[C_BLUE] = 5; + bc6h_format.rw = header.getvalue(5,8); //8: rw[7:0] + bc6h_format.gw = header.getvalue(15,8); //8: gw[7:0] + bc6h_format.bw = header.getvalue(25,8); //8: bw[7:0] + bc6h_format.rx = header.getvalue(35,6); //6: rx[5:0] + bc6h_format.gx = header.getvalue(45,5); //5: gx[4:0] + bc6h_format.bx = header.getvalue(55,5); //5: bx[4:0] + bc6h_format.ry = header.getvalue(65,6); //6: ry[5:0] + bc6h_format.gy = header.getvalue(41,4) | //5: gy[3:0] + (header.getvalue(24,1) << 4); // gy[4] + bc6h_format.by = header.getvalue(61,4) | //5: by[3:0] + (header.getvalue(14,1) << 4); // by[4] + bc6h_format.rz = header.getvalue(71,6); //6: rz[5:0] + bc6h_format.gz = header.getvalue(51,4) | //5: gz[3:0] + (header.getvalue(13,1) << 4); // gz[4] + bc6h_format.bz = header.getvalue(50,1) | //5: bz[0] + (header.getvalue(60,1) << 1) | // bz[1] + (header.getvalue(23,1) << 2) | // bz[2] + (header.getvalue(33,1) << 3) | // bz[3] + (header.getvalue(34,1) << 4); // bz[4] + break; + case 0x16: + bc6h_format.m_mode = 8; // 8:5:6:5 + bc6h_format.wBits = 8; + bc6h_format.tBits[C_RED] = 5; + bc6h_format.tBits[C_GREEN] = 6; + bc6h_format.tBits[C_BLUE] = 5; + bc6h_format.rw = header.getvalue(5,8); //8: rw[7:0] + bc6h_format.gw = header.getvalue(15,8); //8: gw[7:0] + bc6h_format.bw = header.getvalue(25,8); //8: bw[7:0] + bc6h_format.rx = header.getvalue(35,5); //5: rx[4:0] + bc6h_format.gx = header.getvalue(45,6); //6: gx[5:0] + bc6h_format.bx = header.getvalue(55,5); //5: bx[4:0] + bc6h_format.ry = header.getvalue(65,5); //5: ry[4:0] + bc6h_format.gy = header.getvalue(41,4) | //6: gy[3:0] + (header.getvalue(24,1) << 4) | // gy[4] + (header.getvalue(23,1) << 5); // gy[5] + bc6h_format.by = header.getvalue(61,4) | //5: by[3:0] + (header.getvalue(14,1) << 4); // by[4] + bc6h_format.rz = header.getvalue(71,5); //5: rz[4:0] + bc6h_format.gz = header.getvalue(51,4) | //6: gz[3:0] + (header.getvalue(40,1) << 4) | // gz[4] + (header.getvalue(33,1) << 5); // gz[5] + bc6h_format.bz = header.getvalue(13,1) | //5: bz[0] + (header.getvalue(60,1) << 1) | // bz[1] + (header.getvalue(70,1) << 2) | // bz[2] + (header.getvalue(76,1) << 3) | // bz[3] + (header.getvalue(34,1) << 4); // bz[4] + break; + case 0x1A: + bc6h_format.m_mode = 9; // 8:5:5:6 + bc6h_format.wBits = 8; + bc6h_format.tBits[C_RED] = 5; + bc6h_format.tBits[C_GREEN] = 5; + bc6h_format.tBits[C_BLUE] = 6; + bc6h_format.rw = header.getvalue(5,8); //8: rw[7:0] + bc6h_format.gw = header.getvalue(15,8); //8: gw[7:0] + bc6h_format.bw = header.getvalue(25,8); //8: bw[7:0] + bc6h_format.rx = header.getvalue(35,5); //5: rx[4:0] + bc6h_format.gx = header.getvalue(45,5); //5: gx[4:0] + bc6h_format.bx = header.getvalue(55,6); //6: bx[5:0] + bc6h_format.ry = header.getvalue(65,5); //5: ry[4:0] + bc6h_format.gy = header.getvalue(41,4) | //5: gy[3:0] + (header.getvalue(24,1) << 4); // gy[4] + bc6h_format.by = header.getvalue(61,4) | //6: by[3:0] + (header.getvalue(14,1) << 4) | // by[4] + (header.getvalue(23,1) << 5); // by[5] + bc6h_format.rz = header.getvalue(71,5); //5: rz[4:0] + bc6h_format.gz = header.getvalue(51,4) | //5: gz[3:0] + (header.getvalue(40,1) << 4); // gz[4] + bc6h_format.bz = header.getvalue(50,1) | //6: bz[0] + (header.getvalue(13,1) << 1) | // bz[1] + (header.getvalue(70,1) << 2) | // bz[2] + (header.getvalue(76,1) << 3) | // bz[3] + (header.getvalue(34,1) << 4) | // bz[4] + (header.getvalue(33,1) << 5); // bz[5] + break; + case 0x1E: + bc6h_format.m_mode = 10; // 6:6:6:6 + bc6h_format.istransformed = FALSE; + bc6h_format.wBits = 6; + bc6h_format.tBits[C_RED] = 6; + bc6h_format.tBits[C_GREEN] = 6; + bc6h_format.tBits[C_BLUE] = 6; + bc6h_format.rw = header.getvalue(5,6); //6: rw[5:0] + bc6h_format.gw = header.getvalue(15,6); //6: gw[5:0] + bc6h_format.bw = header.getvalue(25,6); //6: bw[5:0] + bc6h_format.rx = header.getvalue(35,6); //6: rx[5:0] + bc6h_format.gx = header.getvalue(45,6); //6: gx[5:0] + bc6h_format.bx = header.getvalue(55,6); //6: bx[5:0] + bc6h_format.ry = header.getvalue(65,6); //6: ry[5:0] + bc6h_format.gy = header.getvalue(41,4) | //6: gy[3:0] + (header.getvalue(24,1) << 4) | // gy[4] + (header.getvalue(21,1) << 5); // gy[5] + bc6h_format.by = header.getvalue(61,4) | //6: by[3:0] + (header.getvalue(14,1) << 4) | // by[4] + (header.getvalue(22,1) << 5); // by[5] + bc6h_format.rz = header.getvalue(71,6); //6: rz[5:0] + bc6h_format.gz = header.getvalue(51,4) | //6: gz[3:0] + (header.getvalue(11,1) << 4) | // gz[4] + (header.getvalue(31,1) << 5); // gz[5] + bc6h_format.bz = header.getvalue(12,1) | //6: bz[0] + (header.getvalue(13,1) << 1) | // bz[1] + (header.getvalue(23,1) << 2) | // bz[2] + (header.getvalue(32,1) << 3) | // bz[3] + (header.getvalue(34,1) << 4) | // bz[4] + (header.getvalue(33,1) << 5); // bz[5] + break; + + // Single region modes + case 0x03: + bc6h_format.m_mode = 11; // 10:10 + bc6h_format.wBits = 10; + bc6h_format.tBits[C_RED] = 10; + bc6h_format.tBits[C_GREEN] = 10; + bc6h_format.tBits[C_BLUE] = 10; + bc6h_format.rw = header.getvalue(5,10); // 10: rw[9:0] + bc6h_format.gw = header.getvalue(15,10); // 10: gw[9:0] + bc6h_format.bw = header.getvalue(25,10); // 10: bw[9:0] + bc6h_format.rx = header.getvalue(35,10); // 10: rx[9:0] + bc6h_format.gx = header.getvalue(45,10); // 10: gx[9:0] + bc6h_format.bx = header.getvalue(55,10); // 10: bx[9:0] + break; + case 0x07: + bc6h_format.m_mode = 12; // 11:9 + bc6h_format.wBits = 11; + bc6h_format.tBits[C_RED] = 9; + bc6h_format.tBits[C_GREEN] = 9; + bc6h_format.tBits[C_BLUE] = 9; + bc6h_format.rw = header.getvalue(5,10) | // 10: rw[9:0] + (header.getvalue(44,1) << 10); // rw[10] + bc6h_format.gw = header.getvalue(15,10) | // 10: gw[9:0] + (header.getvalue(54,1) << 10); // gw[10] + bc6h_format.bw = header.getvalue(25,10) | // 10: bw[9:0] + (header.getvalue(64,1) << 10); // bw[10] + bc6h_format.rx = header.getvalue(35,9); // 9: rx[8:0] + bc6h_format.gx = header.getvalue(45,9); // 9: gx[8:0] + bc6h_format.bx = header.getvalue(55,9); // 9: bx[8:0] + break; + case 0x0B: + bc6h_format.m_mode = 13; // 12:8 + bc6h_format.wBits = 12; + bc6h_format.tBits[C_RED] = 8; + bc6h_format.tBits[C_GREEN] = 8; + bc6h_format.tBits[C_BLUE] = 8; + bc6h_format.rw = header.getvalue(5, 10) | // 12: rw[9:0] + (header.getvalue(43, 1) << 11) | // rw[11] + (header.getvalue(44, 1) << 10); // rw[10] + bc6h_format.gw = header.getvalue(15, 10) | // 12: gw[9:0] + (header.getvalue(53, 1) << 11) | // gw[11] + (header.getvalue(54, 1) << 10); // gw[10] + bc6h_format.bw = header.getvalue(25,10) | // 12: bw[9:0] + (header.getvalue(63, 1) << 11) | // bw[11] + (header.getvalue(64,1) << 10); // bw[10] + bc6h_format.rx = header.getvalue(35,8); // 8: rx[7:0] + bc6h_format.gx = header.getvalue(45,8); // 8: gx[7:0] + bc6h_format.bx = header.getvalue(55,8); // 8: bx[7:0] + break; + case 0x0F: + bc6h_format.m_mode = 14; // 16:4 + bc6h_format.wBits = 16; + bc6h_format.tBits[C_RED] = 4; + bc6h_format.tBits[C_GREEN] = 4; + bc6h_format.tBits[C_BLUE] = 4; + bc6h_format.rw = header.getvalue(5,10) | // 16: rw[9:0] + (header.getvalue(39, 1) << 15) | // rw[15] + (header.getvalue(40, 1) << 14) | // rw[14] + (header.getvalue(41, 1) << 13) | // rw[13] + (header.getvalue(42, 1) << 12) | // rw[12] + (header.getvalue(43, 1) << 11) | // rw[11] + (header.getvalue(44, 1) << 10); // rw[10] + bc6h_format.gw = header.getvalue(15,10) | // 16: gw[9:0] + (header.getvalue(49, 1) << 15) | // gw[15] + (header.getvalue(50, 1) << 14) | // gw[14] + (header.getvalue(51, 1) << 13) | // gw[13] + (header.getvalue(52, 1) << 12) | // gw[12] + (header.getvalue(53, 1) << 11) | // gw[11] + (header.getvalue(54, 1) << 10); // gw[10] + bc6h_format.bw = header.getvalue(25,10) | // 16: bw[9:0] + (header.getvalue(59, 1) << 15) | // bw[15] + (header.getvalue(60, 1) << 14) | // bw[14] + (header.getvalue(61, 1) << 13) | // bw[13] + (header.getvalue(62, 1) << 12) | // bw[12] + (header.getvalue(63, 1) << 11) | // bw[11] + (header.getvalue(64, 1) << 10); // bw[10] + bc6h_format.rx = header.getvalue(35,4); // 4: rx[3:0] + bc6h_format.gx = header.getvalue(45,4); // 4: gx[3:0] + bc6h_format.bx = header.getvalue(55,4); // 4: bx[3:0] + break; + default: + bc6h_format.m_mode = 0; + return bc6h_format; + } + + // Each format in the mode table can be uniquely identified by the mode bits. + // The first ten modes are used for two-region tiles, and the mode bit field + // can be either two or five bits long. These blocks also have fields for + // the compressed color endpoints (72 or 75 bits), the partition (5 bits), + // and the partition indices (46 bits). + + if (bc6h_format.m_mode <= 10) + { + bc6h_format.region = BC6_TWO; + // Get the shape index bits 77 to 81 + bc6h_format.d_shape_index = (unsigned short) header.getvalue(77,5); + bc6h_format.istransformed = (bc6h_format.m_mode < 10) ? TRUE : FALSE; + } + else + { + bc6h_format.region = BC6_ONE; + bc6h_format.d_shape_index = 0; + bc6h_format.istransformed = (bc6h_format.m_mode > 11) ? TRUE : FALSE; + } + + // Save the points in a form easy to compute with + bc6h_format.EC[0].A[0] = (CGU_FLOAT)bc6h_format.rw; + bc6h_format.EC[0].B[0] = (CGU_FLOAT)bc6h_format.rx; + bc6h_format.EC[1].A[0] = (CGU_FLOAT)bc6h_format.ry; + bc6h_format.EC[1].B[0] = (CGU_FLOAT)bc6h_format.rz; + bc6h_format.EC[0].A[1] = (CGU_FLOAT)bc6h_format.gw; + bc6h_format.EC[0].B[1] = (CGU_FLOAT)bc6h_format.gx; + bc6h_format.EC[1].A[1] = (CGU_FLOAT)bc6h_format.gy; + bc6h_format.EC[1].B[1] = (CGU_FLOAT)bc6h_format.gz; + bc6h_format.EC[0].A[2] = (CGU_FLOAT)bc6h_format.bw; + bc6h_format.EC[0].B[2] = (CGU_FLOAT)bc6h_format.bx; + bc6h_format.EC[1].A[2] = (CGU_FLOAT)bc6h_format.by; + bc6h_format.EC[1].B[2] = (CGU_FLOAT)bc6h_format.bz; + + if (bc6h_format.region == BC6_ONE) + { + int startbits = ONE_REGION_INDEX_OFFSET; + bc6h_format.indices16[0] = (CGU_UINT8) header.getvalue(startbits,3); + startbits+=3; + for (int i=1; i<16; i++) + { + bc6h_format.indices16[i] = (CGU_UINT8)header.getvalue(startbits,4); + startbits+=4; + } + } + else + { + int startbit = TWO_REGION_INDEX_OFFSET, + nbits = 2; + bc6h_format.indices16[0 ] = (CGU_UINT8)header.getvalue(startbit,2); + for (int i= 1; i<16; i++) + { + startbit += nbits; // offset start bit for next index using prior nbits used + nbits = g_indexfixups[bc6h_format.d_shape_index] == i?2:3; // get new number of bit to save index with + bc6h_format.indices16[i] = (CGU_UINT8)header.getvalue(startbit,nbits); + } + + } + + return bc6h_format; +} + +static void extract_compressed_endpoints(AMD_BC6H_Format& bc6h_format) +{ + int i; + int t; + + if (bc6h_format.issigned) + { + if (bc6h_format.istransformed) + { + for (i=0; i= 15) + unq = q; + else if (q == 0) + unq = 0; + else if (q == ((1<> prec; + break; + + // here, let's stick with S16 (no apparent quality benefit from going to S17) + // range is (-7c00..7c00)/(-8000..8000) = 31/32 + case SIGNED_F16: + // don't remove this test even though it appears equivalent to the code below + // as it isn't -- the code below can overflow for prec = 16 + if (prec >= 16) + unq = q; + else + { + if (q < 0) { s = 1; q = -q; } else s = 0; + + if (q == 0) + unq = 0; + else if (q >= ((1<<(prec-1))-1)) + unq = s ? -S16MAX : S16MAX; + else + { + unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1); + if (s) + unq = -unq; + } + } + break; + } + return unq; +} + +static int lerp(int a, int b, int i, int denom) +{ + assert (denom == 3 || denom == 7 || denom == 15); + assert (i >= 0 && i <= denom); + + int shift = 6, *weights = NULL; + + switch(denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15: weights = g_aWeights4; break; + case 7: weights = g_aWeights3; break; + default: assert(0); + } + + #pragma warning(disable:4244) + // no need to round these as this is an exact division + return (int)(a*weights[denom-i] +b*weights[i]) / float(1 << shift); +} + +static int finish_unquantize(AMD_BC6H_Format bc6h_format, int q) +{ + if (bc6h_format.format == UNSIGNED_F16) + return (q * 31) >> 6; // scale the magnitude by 31/64 + else if (bc6h_format.format == SIGNED_F16) + return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32 + else + return q; +} + +static void generate_palette_quantized(int max, AMD_BC6H_Format& bc6h_format, int region) +{ + // scale endpoints + int a, b, c; // really need a IntVec3... + + a = unquantize(bc6h_format, bc6h_format.E[region].A[0], bc6h_format.wBits); + b = unquantize(bc6h_format, bc6h_format.E[region].B[0], bc6h_format.wBits); + + // interpolate : This part of code is used for debuging data + for (int i = 0; i < max; i++) + { + c = finish_unquantize(bc6h_format, lerp(a, b, i, max-1)); + bc6h_format.Palete[region][i].x = c; + } + + a = unquantize(bc6h_format, bc6h_format.E[region].A[1], bc6h_format.wBits); + b = unquantize(bc6h_format, bc6h_format.E[region].B[1], bc6h_format.wBits); + + // interpolate + for (int i = 0; i < max; i++) + bc6h_format.Palete[region][i].y = finish_unquantize(bc6h_format, lerp(a, b, i, max-1)); + + a = unquantize(bc6h_format,bc6h_format.E[region].A[2], bc6h_format.wBits); + b = unquantize(bc6h_format,bc6h_format.E[region].B[2], bc6h_format.wBits); + + // interpolate + for (int i = 0; i < max; i++) + bc6h_format.Palete[region][i].z = finish_unquantize(bc6h_format, lerp(a, b, i, max-1)); +} + +// NV code : used with modifications +static void extract_compressed_endpoints2(AMD_BC6H_Format& bc6h_format) +{ + int i; + int t; + + if (bc6h_format.issigned) + { + if (bc6h_format.istransformed) + { + for (i=0; i(options); + delete BCOptions; + return CGU_CORE_OK; +} + +int CMP_CDECL SetQualityBC6(void *options, CGU_FLOAT fquality) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + BC6H_Encode *BC6optionsDefault = (BC6H_Encode *)options; + if (fquality < 0.0f) fquality = 0.0f; + else + if (fquality > 1.0f) fquality = 1.0f; + BC6optionsDefault->m_quality = fquality; + BC6optionsDefault->m_partitionSearchSize = (BC6optionsDefault->m_quality*2.0F) / qFAST_THRESHOLD; + if (BC6optionsDefault->m_partitionSearchSize < (1.0F / 16.0F)) + BC6optionsDefault->m_partitionSearchSize = (1.0F / 16.0F); + return CGU_CORE_OK; +} + +int CMP_CDECL SetMaskBC6(void *options, CGU_UINT32 mask) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + BC6H_Encode *BC6options = (BC6H_Encode *)options; + BC6options->m_validModeMask = mask; + return CGU_CORE_OK; +} + +int CMP_CDECL CompressBlockBC6(const CGU_UINT16 *srcBlock, + unsigned int srcStrideInShorts, + CMP_GLOBAL CGU_UINT8 cmpBlock[16], + const CMP_GLOBAL void *options = NULL) +{ + + CGU_UINT16 inBlock[48]; + + //---------------------------------- + // Fill the inBlock with source data + //---------------------------------- + CGU_INT srcpos = 0; + CGU_INT dstptr = 0; + for (CGU_UINT8 row = 0; row < 4; row++) + { + srcpos = row * srcStrideInShorts; + for (CGU_UINT8 col = 0; col < 4; col++) + { + inBlock[dstptr++] = CGU_UINT16(srcBlock[srcpos++]); + inBlock[dstptr++] = CGU_UINT16(srcBlock[srcpos++]); + inBlock[dstptr++] = CGU_UINT16(srcBlock[srcpos++]); + } + } + + + BC6H_Encode *BC6HEncode = (BC6H_Encode *)options; + BC6H_Encode BC6HEncodeDefault; + + if (BC6HEncode == NULL) + { + BC6HEncode = &BC6HEncodeDefault; + SetDefaultBC6Options(BC6HEncode); + } + + BC6H_Encode_local BC6HEncode_local; + memset((CGU_UINT8 *)&BC6HEncode_local, 0, sizeof(BC6H_Encode_local)); + CGU_UINT8 blkindex = 0; + for ( CGU_INT32 j = 0; j < 16; j++) { + BC6HEncode_local.din[j][0] = inBlock[blkindex++]; // R + BC6HEncode_local.din[j][1] = inBlock[blkindex++]; // G + BC6HEncode_local.din[j][2] = inBlock[blkindex++]; // B + BC6HEncode_local.din[j][3] = 0; // A + } + + CompressBlockBC6_Internal(cmpBlock, 0, &BC6HEncode_local,BC6HEncode); + + return CGU_CORE_OK; +} + +int CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16], + CGU_UINT16 srcBlock[48], + const void *options = NULL) { + BC6H_Encode *BC6HEncode = (BC6H_Encode *)options; + BC6H_Encode BC6HEncodeDefault; + + if (BC6HEncode == NULL) + { + BC6HEncode = &BC6HEncodeDefault; + SetDefaultBC6Options(BC6HEncode); + } + DecompressBC6_Internal(srcBlock, cmpBlock,BC6HEncode); + + return CGU_CORE_OK; +} + +#endif // !ASPM +#endif // !ASPM_GPU + +//============================================== OpenCL USER INTERFACE ==================================================== +#ifdef ASPM_GPU +CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( + CMP_GLOBAL CGU_UINT8* p_source_pixels, + CMP_GLOBAL CGU_UINT8* p_encoded_blocks, + CMP_GLOBAL Source_Info* SourceInfo, + CMP_GLOBAL BC6H_Encode * BC6HEncode +) +{ + CGU_UINT32 x = get_global_id(0); + CGU_UINT32 y = get_global_id(1); + + if (x >= (SourceInfo->m_src_width / BYTEPP)) return; + if (y >= (SourceInfo->m_src_height / BYTEPP)) return; + + BC6H_Encode_local BC6HEncode_local; + memset((CGU_UINT8 *)&BC6HEncode_local, 0, sizeof(BC6H_Encode_local)); + + + CGU_UINT32 stride = SourceInfo->m_src_width * BYTEPP; + CGU_UINT32 srcOffset = (x*BlockX*BYTEPP) + (y*stride*BYTEPP); + CGU_UINT32 destI = (x*COMPRESSED_BLOCK_SIZE) + (y*(SourceInfo->m_src_width / BlockX)*COMPRESSED_BLOCK_SIZE); + CGU_UINT32 srcidx; + + //CGU_FLOAT block4x4[16][4]; + + for (CGU_INT i = 0; i < BlockX; i++) + { + srcidx = i * stride; + for (CGU_INT j = 0; j < BlockY; j++) + { + BC6HEncode_local.din[i*BlockX + j][0] = (CGU_UINT16)(p_source_pixels[srcOffset + srcidx++]); + if (BC6HEncode_local.din[i*BlockX + j][0] < 0.00001 || isnan(BC6HEncode_local.din[i*BlockX + j][0])) + { + if (BC6HEncode->m_isSigned) + { + BC6HEncode_local.din[i*BlockX + j][0] = (isnan(BC6HEncode_local.din[i*BlockX + j][0])) ? F16NEGPREC_LIMIT_VAL : -BC6HEncode_local.din[i*BlockX + j][0]; + if (BC6HEncode_local.din[i*BlockX + j][0] < F16NEGPREC_LIMIT_VAL) { + BC6HEncode_local.din[i*BlockX + j][0] = F16NEGPREC_LIMIT_VAL; + } + } + else + BC6HEncode_local.din[i*BlockX + j][0] = 0.0; + } + + BC6HEncode_local.din[i*BlockX + j][1] = (CGU_UINT16)(p_source_pixels[srcOffset + srcidx++]); + + if (BC6HEncode_local.din[i*BlockX + j][1] < 0.00001 || isnan(BC6HEncode_local.din[i*BlockX + j][1])) + { + if (BC6HEncode->m_isSigned) + { + BC6HEncode_local.din[i*BlockX + j][1] = (isnan(BC6HEncode_local.din[i*BlockX + j][1])) ? F16NEGPREC_LIMIT_VAL : -BC6HEncode_local.din[i*BlockX + j][1]; + if (BC6HEncode_local.din[i*BlockX + j][1] < F16NEGPREC_LIMIT_VAL) { + BC6HEncode_local.din[i*BlockX + j][1] = F16NEGPREC_LIMIT_VAL; + } + } + else + BC6HEncode_local.din[i*BlockX + j][1] = 0.0; + } + + BC6HEncode_local.din[i*BlockX + j][2] = (CGU_UINT16)(p_source_pixels[srcOffset + srcidx++]); + if (BC6HEncode_local.din[i*BlockX + j][2] < 0.00001 || isnan(BC6HEncode_local.din[i*BlockX + j][2])) + { + if (BC6HEncode->m_isSigned) + { + BC6HEncode_local.din[i*BlockX + j][2] = (isnan(BC6HEncode_local.din[i*BlockX + j][2])) ? F16NEGPREC_LIMIT_VAL : -BC6HEncode_local.din[i*BlockX + j][2]; + if (BC6HEncode_local.din[i*BlockX + j][2] < F16NEGPREC_LIMIT_VAL) { + BC6HEncode_local.din[i*BlockX + j][2] = F16NEGPREC_LIMIT_VAL; + } + } + else + BC6HEncode_local.din[i*BlockX + j][2] = 0.0; + } + + BC6HEncode_local.din[i*BlockX + j][3] = 0.0f; + //printf("Ori---src image %d, --%02x", x, (p_source_pixels[srcOffset + srcidx++]) & 0x0000ff); //for debug + } + } + + // printf(" X %3d Y %3d Quality %2.2f", x, y, BC6HEncode->m_quality); + CompressBlockBC6_Internal(p_encoded_blocks, destI, &BC6HEncode_local, BC6HEncode); +} +#endif diff --git a/extern/CMP_Core/shaders/BC6_Encode_kernel.h b/extern/CMP_Core/shaders/BC6_Encode_kernel.h new file mode 100644 index 0000000..1a6c206 --- /dev/null +++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.h @@ -0,0 +1,480 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef BC6_ENCODE_KERNEL_H +#define BC6_ENCODE_KERNEL_H + +#include "Common_Def.h" + +#define MAX_TRACE 10 +#define MAX_ENTRIES_QUANT_TRACE 16 +#define BlockX 4 +#define BlockY 4 +#define BYTEPP 4 +#define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes +#define MAX_DIMENSION_BIG 4 +#define MAX_SUBSET_SIZE 16 // Largest possible size for an individual subset +#define NUM_BLOCK_TYPES 8 // Number of block types in the format +#define MAX_SUBSETS 3 // Maximum number of possible subsets +#define MAX_PARTITIONS 64 // Maximum number of partition types +#define MAX_ENTRIES 64 +#define MAX_TRY 20 + +#define MAX_PARTITIONS_TABLE (1+64+64) +#define DIMENSION 4 +#define MAX_CLUSTERS_BIG 16 +#define EPSILON 0.000001 +#define MAX_CLUSTERS_QUANT_TRACE 8 + +//# Image Quality will increase as this number gets larger and end-to-end performance time will reduce +#define MAX_INDEX_BITS 4 +#define HIGHQULITY_THRESHOLD 0.7F +#define qFAST_THRESHOLD 0.5F + +#define F16NEGPREC_LIMIT_VAL -2048.0f //f16 negative precision limit value + +#define LOG_CL_RANGE 5 +#define LOG_CL_BASE 2 +#define BIT_BASE 5 +#define BIT_RANGE 9 +#define MAX_CLUSTERS 8 +#define BTT(bits) (bits-BIT_BASE) +#define CLT(cl) (cl-LOG_CL_BASE) +#define MASK(n) ((1<<(n))-1) +#define SIGN_EXTEND_TYPELESS(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)) +#define CMP_HALF_MAX 65504.0f // positive half max + +#ifndef ASPM_GPU +#include +#include +//typedef uint8_t byte; +#else +//typedef bitset uint8_t; +//typedef uint8 byte; +#endif + +#define BC6CompBlockSize 16 +#define BC6BlockX 4 +#define BC6BlockY 4 + +typedef struct +{ + CGU_INT k; + CGU_FLOAT d; +} BC6H_TRACE; + +#define NCHANNELS 3 +#define MAX_END_POINTS 2 +#define MAX_BC6H_MODES 14 +#define MAX_BC6H_PARTITIONS 32 +#define MAX_TWOREGION_MODES 10 +#define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes +#define ONE_REGION_INDEX_OFFSET 65 // bit location to start saving color index values for single region shape +#define TWO_REGION_INDEX_OFFSET 82 // bit location to start saving color index values for two region shapes +#define MIN_MODE_FOR_ONE_REGION 11 // Two regions shapes use modes 1..9 and single use 11..14 +#define R_0(ep) (ep)[0][0][i] +#define R_1(ep) (ep)[0][1][i] +#define R_2(ep) (ep)[1][0][i] +#define R_3(ep) (ep)[1][1][i] +#define FLT16_MAX 0x7bff + +#ifndef ASPM_GPU +#define USE_SHAKERHD +#endif + +#define USE_NEWRAMP + +typedef struct +{ + CGU_FLOAT A[NCHANNELS]; + CGU_FLOAT B[NCHANNELS]; +} END_Points; + +typedef struct +{ + CGU_FLOAT x, y, z; +} BC6H_Vec3f; + +typedef struct +{ + CGU_INT nbits; // Number of bits + CGU_INT prec[3]; // precission of the Qunatized RGB endpoints + CGU_INT transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + CGU_INT modebits; // number of mode bits + CGU_INT IndexPrec; // Index Precision + CGU_INT mode; // Mode value to save + CGU_INT lowestPrec; // Step size of each precesion incriment +} ModePartitions; + +__constant ModePartitions ModePartition[MAX_BC6H_MODES + 1] = +{ + 0, 0,0,0, 0, 0, 0, 0, 0, // Mode = Invaild + + // Two region Partition + 10, 5,5,5, 1, 2, 3, 0x00, 31, // Mode = 1 + 7, 6,6,6, 1, 2, 3, 0x01, 248, // Mode = 2 + 11, 5,4,4, 1, 5, 3, 0x02, 15, // Mode = 3 + 11, 4,5,4, 1, 5, 3, 0x06, 15, // Mode = 4 + 11, 4,4,5, 1, 5, 3, 0x0a, 15, // Mode = 5 + 9, 5,5,5, 1, 5, 3, 0x0e, 62, // Mode = 6 + 8, 6,5,5, 1, 5, 3, 0x12, 124, // Mode = 7 + 8, 5,6,5, 1, 5, 3, 0x16, 124, // Mode = 8 + 8, 5,5,6, 1, 5, 3, 0x1a, 124, // Mode = 9 + 6, 6,6,6, 0, 5, 3, 0x1e, 496, // Mode = 10 + + // One region Partition + 10, 10,10,10, 0, 5, 4, 0x03, 31, // Mode = 11 + 11, 9,9,9, 1, 5, 4, 0x07, 15, // Mode = 12 + 12, 8,8,8, 1, 5, 4, 0x0b, 7, // Mode = 13 + 16, 4,4,4, 1, 5, 4, 0x0f, 1, // Mode = 14 +}; + +//================================================ +// Mode Pathern order to try on endpoints +// The order can be rearranged to set which modes gets processed first +// for now it is set in order. +//================================================ +__constant CGU_INT8 ModeFitOrder[MAX_BC6H_MODES + 1] = +{ + 0, //0: N/A + // ---- 2 region lower bits --- + 1, // 10 5 5 5 + 2, // 7 6 6 6 + 3, // 11 5 4 5 + 4, // 11 4 5 4 + 5, // 11 4 4 5 + 6, // 9 5 5 5 + 7, // 8 6 5 5 + 8, // 8 5 6 5 + 9, // 8 5 5 6 + 10, // 6 6 6 6 + //------ 1 region high bits --- + 11, // 10 10 10 10 + 12, // 11 9 9 9 + 13, // 12 8 8 8 + 14 // 16 4 4 4 +}; + +// The Region2FixUps are for our index[subset = 2][16][3] locations +// indexed by shape region 2 +__constant CGU_INT g_Region2FixUp[32] = +{ + 7 , 3 , 11, 7, + 3 , 11, 9 , 5, + 2 , 12, 7 , 3, + 11, 7 , 11, 3, + 7 , 1 , 0 , 1, + 0 , 1 , 0 , 7, + 0 , 1 , 1 , 0, + 4 , 4 , 1 , 0, +}; + +// Indexed by all shape regions +// Partition Set Fixups for region 1 note region 0 is always at 0 +// that means normally we use 3 bits to define an index value +// if its at the fix up location then its one bit less +__constant CGU_INT g_indexfixups[32] = +{ + 15,15,15,15, + 15,15,15,15, + 15,15,15,15, + 15,15,15,15, + 15, 2, 8, 2, + 2, 8, 8,15, + 2, 8, 2, 2, + 8, 8, 2, 2, +}; + +typedef struct +{ + CGU_INT8 region; // one or two + CGU_INT8 m_mode; // m + CGU_INT8 d_shape_index; // d + CGU_INT rw; // endpt[0].A[0] + CGU_INT rx; // endpt[0].B[0] + CGU_INT ry; // endpt[1].A[0] + CGU_INT rz; // endpt[1].B[0] + CGU_INT gw; // endpt[0].A[1] + CGU_INT gx; // endpt[0].B[1] + CGU_INT gy; // endpt[1].A[1] + CGU_INT gz; // endpt[1].B[1] + CGU_INT bw; // endpt[0].A[2] + CGU_INT bx; // endpt[0].B[2] + CGU_INT by; // endpt[1].A[2] + CGU_INT bz; // endpt[1].B[2] + + union + { + CGU_UINT8 indices[4][4]; // Indices data after header block + CGU_UINT8 indices16[16]; + }; + + union + { + CGU_FLOAT din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; // Original data input as floats + unsigned char cdin[256]; // as uchar to match float + }; + + END_Points EC[MAX_END_POINTS]; // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[] + END_Points E[MAX_END_POINTS]; // decompressed endpoints + CGU_BOOL issigned; // Format is 16 bit signed floating point + CGU_BOOL istransformed; // region two: all modes = true except mode=10 + short wBits; // number of bits for the root endpoint + short tBits[NCHANNELS]; // number of bits used for the transformed endpoints + CGU_INT format; // floating point format are we using for decompression + BC6H_Vec3f Paletef[2][16]; + + CGU_INT index; // for debugging + CGU_FLOAT fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + CGU_FLOAT cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + CGU_INT shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE]; + CGU_INT cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE]; + CGU_INT entryCount[MAX_SUBSETS]; + CGU_INT cur_best_entryCount[MAX_SUBSETS]; + CGU_FLOAT partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; + CGU_FLOAT cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; + CGU_BOOL optimized; // were end points optimized during final encoding + +} BC6H_Encode_local; + +#ifndef ASPM_GPU +using namespace std; +class BitHeader +{ +public: + BitHeader(const CGU_UINT8 in[], CGU_INT sizeinbytes) + { + m_bits.reset(); + m_sizeinbytes = sizeinbytes; + + if ((in != NULL) && (sizeinbytes <= 16)) + { + // Init bits set with given data + CGU_INT bitpos = 0; + for (CGU_INT i = 0; i < sizeinbytes; i++) + { + CGU_INT bit = 1; + for (CGU_INT j = 0; j < 8; j++) + { + m_bits[bitpos] = in[i] & bit ? 1 : 0; + bit = bit << 1; + bitpos++; + } + } + } + } + + ~BitHeader() + { + } + + void transferbits(CGU_UINT8 in[], CGU_INT sizeinbytes) + { + if ((sizeinbytes <= m_sizeinbytes) && (in != NULL)) + { + // Init bits set with given data + memset(in, 0, sizeinbytes); + CGU_INT bitpos = 0; + for (CGU_INT i = 0; i < sizeinbytes; i++) + { + CGU_INT bit = 1; + for (CGU_INT j = 0; j < 8; j++) + { + if (m_bits[bitpos]) in[i] |= bit; + bit = bit << 1; + bitpos++; + } + } + } + } + + CGU_INT getvalue(CGU_INT start, CGU_INT bitsize) + { + CGU_INT value = 0; + CGU_INT end = start + bitsize - 1; + for (; end >= start; end--) + { + value |= m_bits[end] ? 1 : 0; + if (end > start) value <<= 1; + } + + return value; + } + + void setvalue(CGU_INT start, CGU_INT bitsize, CGU_INT value, CGU_INT maskshift = 0) + { + CGU_INT end = start + bitsize - 1; + CGU_INT mask = 0x1 << maskshift; + for (; start <= end; start++) + { + m_bits[start] = (value&mask) ? 1 : 0; + mask <<= 1; + } + } + + bitset<128> m_bits; // 16 bytes max + CGU_INT m_sizeinbytes; +}; + +//==================== DECODER CODE ====================== +#define MAXENDPOINTS 2 +#define U16MAX 0xffff +#define S16MAX 0x7fff +#define SIGN_EXTEND(w,tbits) ((((signed(w))&(1<<((tbits)-1)))?((~0)<<(tbits)):0)|(signed(w))) + +enum +{ + UNSIGNED_F16 = 1, + SIGNED_F16 = 2 +}; + +enum +{ + BC6_ONE = 0, + BC6_TWO +}; + +enum +{ + C_RED = 0, + C_GREEN, + C_BLUE +}; + +struct BC6H_Vec3 +{ + int x,y,z; +}; + +struct AMD_BC6H_Format +{ + unsigned short region; // one or two + unsigned short m_mode; // m + int d_shape_index; // d + int rw; // endpt[0].A[0] + int rx; // endpt[0].B[0] + int ry; // endpt[1].A[0] + int rz; // endpt[1].B[0] + int gw; // endpt[0].A[1] + int gx; // endpt[0].B[1] + int gy; // endpt[1].A[1] + int gz; // endpt[1].B[1] + int bw; // endpt[0].A[2] + int bx; // endpt[0].B[2] + int by; // endpt[1].A[2] + int bz; // endpt[1].B[2] + + union + { + CGU_UINT8 indices[4][4]; // Indices data after header block + CGU_UINT8 indices16[16]; + }; + + float din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; // Original data input + END_Points EC[MAXENDPOINTS]; // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[] + END_Points E[MAXENDPOINTS]; // decompressed endpoints + bool issigned; // Format is 16 bit signed floating point + bool istransformed; // region two: all modes = true except mode=10 + short wBits; // number of bits for the root endpoint + short tBits[NCHANNELS]; // number of bits used for the transformed endpoints + int format; // floating point format are we using for decompression + BC6H_Vec3 Palete[2][16]; + BC6H_Vec3f Paletef[2][16]; + + int index; // for debugging + float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + float cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; + int shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE]; + int cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE]; + int entryCount[MAX_SUBSETS]; + int cur_best_entryCount[MAX_SUBSETS]; + float partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; + float cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; + bool optimized; // were end points optimized during final encoding +}; + +// =================================== END OF DECODER CODE ======================================================== +#endif + +//------------------------------------------------- +// Set by Host : Read only in kernel +//------------------------------------------------- +typedef struct +{ + // Setup at initialization time + CGU_FLOAT m_quality; + CGU_FLOAT m_performance; + CGU_FLOAT m_errorThreshold; + CGU_DWORD m_validModeMask; + CGU_BOOL m_imageNeedsAlpha; + CGU_BOOL m_colourRestrict; + CGU_BOOL m_alphaRestrict; + CGU_BOOL m_isSigned; +} CMP_BC6HOptions; + +typedef struct +{ + // These are quality parameters used to select when to use the high precision quantizer + // and shaker paths + CGU_FLOAT m_quantizerRangeThreshold; + CGU_FLOAT m_shakerRangeThreshold; + CGU_FLOAT m_partitionSearchSize; + + // Setup at initialization time + CGU_FLOAT m_quality; + CGU_FLOAT m_performance; + CGU_FLOAT m_errorThreshold; + CGU_DWORD m_validModeMask; + CGU_BOOL m_imageNeedsAlpha; + CGU_BOOL m_colourRestrict; + CGU_BOOL m_alphaRestrict; + CGU_BOOL m_isSigned; + + // Source image info : must be set prior to use in kernel + CGU_UINT32 m_src_width; + CGU_UINT32 m_src_height; + CGU_UINT32 m_src_stride; + +} BC6H_Encode; + +CMP_STATIC void SetDefaultBC6Options(BC6H_Encode *BC6Encode) +{ + if (BC6Encode) + { + BC6Encode->m_quality = 1.0f; + BC6Encode->m_quantizerRangeThreshold = 0.0f; + BC6Encode->m_shakerRangeThreshold = 0.0f; + BC6Encode->m_partitionSearchSize = 0.20f; + BC6Encode->m_performance = 0.0f; + BC6Encode->m_errorThreshold = 0.0f; + BC6Encode->m_validModeMask = 0; + BC6Encode->m_imageNeedsAlpha = 0; + BC6Encode->m_colourRestrict = 0; + BC6Encode->m_alphaRestrict = 0; + BC6Encode->m_isSigned = 0; + BC6Encode->m_src_width = 4; + BC6Encode->m_src_height = 4; + BC6Encode->m_src_stride = 0; + } +} + +#endif \ No newline at end of file diff --git a/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp b/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp new file mode 100644 index 0000000..ef6b1cb --- /dev/null +++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp @@ -0,0 +1,5489 @@ +//===================================================================== +// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +// Ref: GPUOpen-Tools/Compressonator + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//-------------------------------------- +// Common BC7 Header +//-------------------------------------- +#include "BC7_Encode_Kernel.h" + +#ifndef ASPM +//--------------------------------------------- +// Predefinitions for GPU and CPU compiled code +//--------------------------------------------- +#define ENABLE_CODE + +#ifndef ASPM_GPU + // using code for CPU or hybrid (CPU/GPU) + //#include "BC7.h" +#endif + + +INLINE CGU_INT a_compare( const void *arg1, const void *arg2 ) +{ + if (((CMP_di* )arg1)->image-((CMP_di* )arg2)->image > 0 ) return 1; + if (((CMP_di* )arg1)->image-((CMP_di* )arg2)->image < 0 ) return -1; + return 0; +}; + +#endif + +#ifndef ASPM_GPU +CMP_GLOBAL BC7_EncodeRamps BC7EncodeRamps +#ifndef ASPM + = {0} +#endif +; + +//--------------------------------------------- +// CPU: Computes max of two float values +//--------------------------------------------- +float bc7_maxf(float l1, float r1) +{ + return (l1 > r1 ? l1 : r1); +} + +//--------------------------------------------- +// CPU: Computes max of two float values +//--------------------------------------------- +float bc7_minf(float l1, float r1) +{ + return (l1 < r1 ? l1 : r1); +} + +#endif + +INLINE CGV_EPOCODE shift_right_epocode(CGV_EPOCODE v, CGU_INT bits) +{ + return v>>bits; // (perf warning expected) +} + +INLINE CGV_EPOCODE expand_epocode(CGV_EPOCODE v, CGU_INT bits) +{ + CGV_EPOCODE vv = v<<(8-bits); + return vv + shift_right_epocode(vv, bits); +} + +// valid bit range is 0..8 +CGU_INT expandbits(CGU_INT bits, CGU_INT v) +{ + return ( v << (8-bits) | v >> (2* bits - 8)); +} + +CMP_EXPORT CGU_INT bc7_isa() { +#if defined(ISPC_TARGET_SSE2) + ASPM_PRINT(("SSE2")); + return 0; +#elif defined(ISPC_TARGET_SSE4) + ASPM_PRINT(("SSE4")); + return 1; +#elif defined(ISPC_TARGET_AVX) + ASPM_PRINT(("AVX")); + return 2; +#elif defined(ISPC_TARGET_AVX2) + ASPM_PRINT(("AVX2")); + return 3; +#else + ASPM_PRINT(("CPU")); + return -1; +#endif +} + +CMP_EXPORT void init_BC7ramps() +{ +#ifdef ASPM_GPU +#else + CMP_STATIC CGU_BOOL g_rampsInitialized = FALSE; + if (g_rampsInitialized == TRUE) return; + g_rampsInitialized = TRUE; + BC7EncodeRamps.ramp_init = TRUE; + + //bc7_isa(); ASPM_PRINT((" INIT Ramps\n")); + + CGU_INT bits; + CGU_INT p1; + CGU_INT p2; + CGU_INT clogBC7; + CGU_INT index; + CGU_INT j; + CGU_INT o1; + CGU_INT o2; + CGU_INT maxi = 0; + + + for (bits = BIT_BASE; bits maxi) maxi = index; + BC7EncodeRamps.ramp[(CLT(clogBC7)*4*256*256*16)+(BTT(bits)*256*256*16)+(p1*256*16)+(p2*16)+index] = + //floor((CGV_IMAGE)BC7EncodeRamps.ep_d[BTT(bits)][p1] + rampWeights[clogBC7][index] * (CGV_IMAGE)((BC7EncodeRamps.ep_d[BTT(bits)][p2] - BC7EncodeRamps.ep_d[BTT(bits)][p1]))+ 0.5F); + floor(BC7EncodeRamps.ep_d[BTT(bits)][p1] + rampWeights[clogBC7][index] *((BC7EncodeRamps.ep_d[BTT(bits)][p2] - BC7EncodeRamps.ep_d[BTT(bits)][p1]))+ 0.5F); + }//index<(1 << clogBC7) + }//p2<(1 << bits) + }//p1<(1 << bits) +#endif + +#ifdef USE_BC7_SP_ERR_IDX + for (j = 0; j<256; j++) + { + for (o1 = 0; o1<2; o1++) + { + for (o2 = 0; o2<2; o2++) + { + for (index = 0; index<16; index++) { + BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(j*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+0] = 0; + BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(j*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+1] = 255; + BC7EncodeRamps.sp_err[(CLT(clogBC7)*4*256*2*2*16)+(BTT(bits)*256*2*2*16)+(j*2*2*16)+(o1*2*16)+(o2*16)+index] = 255; + } // i<16 + }//o2<2; + }//o1<2 + } //j<256 + + for (p1 = 0; p1<(1 << bits); p1++) + { + for (p2 = 0; p2<(1 << bits); p2++) + { + for (index = 0; index<(1 << clogBC7); index++) + { +#ifdef USE_BC7_RAMP + CGV_EPOCODE floatf = (CGV_EPOCODE)BC7EncodeRamps.ramp[(CLT(clogBC7)*4*256*256*16)+(BTT(bits)*256*256*16)+(p1*256*16)+(p2*16)+index]; +#else + CGV_EPOCODE floatf = floor((CGV_IMAGE)BC7EncodeRamps.ep_d[BTT(bits)][p1] + rampWeights[clogBC7][index] * (CGV_IMAGE)((BC7EncodeRamps.ep_d[BTT(bits)][p2] - BC7EncodeRamps.ep_d[BTT(bits)][p1]))+ 0.5F); +#endif + BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(floatf*2*2*16*2)+((p1 & 0x1)*2*16*2)+((p2 & 0x1)*16*2)+(index*2)+0] = p1; + BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(floatf*2*2*16*2)+((p1 & 0x1)*2*16*2)+((p2 & 0x1)*16*2)+(index*2)+1] = p2; + BC7EncodeRamps.sp_err[(CLT(clogBC7)*4*256*2*2*16)+(BTT(bits)*256*2*2*16)+(floatf*2*2*16)+((p1 & 0x1)*2*16)+(p2 & 0x1*16)+index] = 0; + } //i<(1 << clogBC7) + } //p2 + }//p1<(1 << bits) + + for (j = 0; j<256; j++) + { + for (o1 = 0; o1<2; o1++) + { + for (o2 = 0; o2<2; o2++) + { + for (index = 0; index<(1 << clogBC7); index++) + { + if ( // check for unitialized sp_idx + (BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(j*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+0] == 0) && + (BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(j*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+1] == 255) + ) + + { + CGU_INT k; + CGU_INT tf; + CGU_INT tc; + + for (k = 1; k<256; k++) + { + tf = j - k; + tc = j + k; + if ((tf >= 0 && BC7EncodeRamps.sp_err[(CLT(clogBC7)*4*256*2*2*16)+(BTT(bits)*256*2*2*16)+(tf*2*2*16)+(o1*2*16)+(o2*16)+index] == 0)) + { + BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(j*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+0] = BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(tf*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+0]; + BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(j*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+1] = BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(tf*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+1]; + break; + } + else if ((tc < 256 && BC7EncodeRamps.sp_err[(CLT(clogBC7)*4*256*2*2*16)+(BTT(bits)*256*2*2*16)+(tc*2*2*16)+(o1*2*16)+(o2*16)+index] == 0)) + { + BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(j*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+0] = BC7EncodeRamps.sp_idx[(CLT(clogBC7)*4*256*2*2*16*2)+(BTT(bits)*256*2*2*16*2)+(tc*2*2*16*2)+(o1*2*16*2)+(o2*16*2)+(index*2)+0]; + break; + } + } + + //BC7EncodeRamps.sp_err[(CLT(clogBC7)*4*256*2*2*16)+(BTT(bits)*256*2*2*16)+(j*2*2*16)+(o1*2*16)+(o2*16)+index] = (CGV_ERROR) k; + BC7EncodeRamps.sp_err[(CLT(clogBC7)*4*256*2*2*16)+(BTT(bits)*256*2*2*16)+(j*2*2*16)+(o1*2*16)+(o2*16)+index] = (CGU_UINT8)k; + + } //sp_idx < 0 + }//i<(1 << clogBC7) + }//o2 + }//o1 + }//j +#endif + + } //bits b) + return b; + return v; +} + +INLINE CGV_INDEX clampIndex(CGV_INDEX v, CGV_INDEX a, CGV_INDEX b) +{ + if (v < a) + return a; + else + if (v > b) + return b; + return v; +} + +INLINE CGV_SHIFT32 shift_right_uint32(CGV_SHIFT32 v, CGU_INT bits) +{ + return v>>bits; // (perf warning expected) +} + +INLINE CGV_BYTE shift_right_uint8(CGV_BYTE v, CGU_UINT8 bits) +{ + return v>>bits; // (perf warning expected) +} + +INLINE CGV_BYTE shift_right_uint8V(CGV_BYTE v, CGV_UINT8 bits) +{ + return v>>bits; // (perf warning expected) +} + +// valid bit range is 0..8 +INLINE CGV_EPOCODE expandEPObits(CGV_EPOCODE v, uniform CGV_EPOCODE bits) +{ + CGV_EPOCODE vv = v<<(8-bits); + return vv + shift_right_uint32(vv, bits); +} + +CGV_ERROR err_absf(CGV_ERROR a) { return a>0.0F?a:-a;} +CGV_IMAGE img_absf(CGV_IMAGE a) { return a>0.0F?a:-a;} + +CGU_UINT8 min8(CGU_UINT8 a, CGU_UINT8 b) { return ab?a:b;} + +void pack_index(CGV_INDEXPACKED packed_index[2], CGV_INDEX src_index[MAX_SUBSET_SIZE]) +{ + // Converts from unpacked index to packed index + packed_index[0] = 0x0000; + packed_index[1] = 0x0000; + CGV_BYTE shift = 0; // was CGV_UINT8 + for (CGU_INT k=0; k<16; k++) + { + packed_index[k/8] |= (CGV_UINT32)(src_index[k]&0x0F) << shift; + shift +=4; + } +} + +void unpack_index(CGV_INDEX unpacked_index[MAX_SUBSET_SIZE],CGV_INDEXPACKED src_packed[2]) +{ + // Converts from packed index to unpacked index + CGV_BYTE shift = 0; // was CGV_UINT8 + for (CGV_BYTE k=0; k<16; k++) + { + unpacked_index[k] = (CGV_BYTE)(src_packed[k/8] >> shift)&0xF; + if (k == 7) + shift = 0; + else + shift +=4; + } +} + +//====================================== CMP MATH UTILS ============================================ +CGV_ERROR err_Total( + CGV_IMAGE image_src1[SOURCE_BLOCK_SIZE*MAX_CHANNELS], + CGV_IMAGE image_src2[SOURCE_BLOCK_SIZE*MAX_CHANNELS], + CGV_ENTRIES numEntries, // < 16 + CGU_CHANNEL channels3or4) // IN: 3 = RGB or 4 = RGBA (4 = MAX_CHANNELS) +{ + CGV_ERROR err_t=0.0F; + for (CGU_CHANNEL ch=0;ch 0) + { + for (CGV_ENTRIES k=0;k> 16; + CGV_UINT32 mask = 0x01 << index; + + return ((mask1 & mask)?2:0 + (mask0 & mask)?1:0); // This can be moved to caller, just return mask!! +} + +void GetPartitionSubSet_mode01237( + CGV_IMAGE subsets_out[MAX_SUBSETS][SOURCE_BLOCK_SIZE][MAX_CHANNELS], // OUT: Subset pattern mapped with image src colors + CGV_ENTRIES entryCount_out[MAX_SUBSETS], // OUT: Number of entries per subset + CGV_UINT8 partition, // Partition Shape 0..63 + CGV_IMAGE image_src[SOURCE_BLOCK_SIZE*MAX_CHANNELS], // Image colors + CGU_INT blockMode, // [0,1,2,3 or 7] + CGU_CHANNEL channels3or4) // 3 = RGB or 4 = RGBA (4 = MAX_CHANNELS) +{ + CGU_UINT8 maxSubsets = 2; if (blockMode == 0 || blockMode == 2) maxSubsets = 3; + + entryCount_out[0] = 0; + entryCount_out[1] = 0; + entryCount_out[2] = 0; + + for (CGV_INT i = 0; i < MAX_SUBSET_SIZE; i++) + { + CGV_UINT8 subset = get_partition_subset(partition,maxSubsets,i); + + for (CGU_INT ch = 0; ch<3; ch++) + subsets_out[subset][entryCount_out[subset]][ch] = image_src[i+(ch*SOURCE_BLOCK_SIZE)]; + //subsets_out[subset*64+(entryCount_out[subset]*MAX_CHANNELS+ch)] = image_src[i+(ch*SOURCE_BLOCK_SIZE)]; + + // if we have only 3 channels then set the alpha subset to 0 + if (channels3or4 == 3) + subsets_out[subset][entryCount_out[subset]][3] = 0.0F; + else + subsets_out[subset][entryCount_out[subset]][3] = image_src[i+(COMP_ALPHA*SOURCE_BLOCK_SIZE)]; + entryCount_out[subset]++; + } +} + +INLINE void GetClusterMean( + CGV_IMAGE cluster_mean_out[SOURCE_BLOCK_SIZE][MAX_CHANNELS], + CGV_IMAGE image_src[SOURCE_BLOCK_SIZE*MAX_CHANNELS], + CGV_INDEX index_in[MAX_SUBSET_SIZE], + CGV_ENTRIES numEntries, // < 16 + CGU_CHANNEL channels3or4) // IN: 3 = RGB or 4 = RGBA (4 = MAX_CHANNELS) +{ + // unused index values are underfined + CGV_INDEX i_cnt[MAX_SUBSET_SIZE]; + CGV_INDEX i_comp[MAX_SUBSET_SIZE]; + + + for (CGV_ENTRIES i=0;i< numEntries;i++) + for (CGU_CHANNEL ch=0; ch< channels3or4; ch++) + { + CGV_INDEX idx = index_in[i]&0x0F; + cluster_mean_out[idx][ch] = 0; + i_cnt[idx]=0; + } + + CGV_INDEX ic = 0; // was CGV_INT + for (CGV_ENTRIES i=0;i< numEntries;i++) + { + CGV_INDEX idx = index_in[i]&0x0F; + if (i_cnt[idx]==0) + i_comp[ic++]=idx; + i_cnt[idx]++; + + for (CGU_CHANNEL ch=0; ch< channels3or4; ch++) + { + cluster_mean_out[idx][ch] += image_src[i+(ch*SOURCE_BLOCK_SIZE)]; + } + } + + for (CGU_CHANNEL ch=0; ch< channels3or4; ch++) + for (CGU_INT i=0;i < ic;i++) + { + if (i_cnt[i_comp[i]] != 0) + { + CGV_INDEX icmp = i_comp[i]; + cluster_mean_out[icmp][ch] = (CGV_IMAGE) floor( (cluster_mean_out[icmp][ch] / (CGV_IMAGE) i_cnt[icmp]) +0.5F); + } + } + +} + +INLINE void GetImageMean( + CGV_IMAGE image_mean_out[SOURCE_BLOCK_SIZE*MAX_CHANNELS], + CGV_IMAGE image_src[SOURCE_BLOCK_SIZE*MAX_CHANNELS], + CGV_ENTRIES numEntries, + CGU_CHANNEL channels) +{ + for (CGU_CHANNEL ch=0; ch< channels; ch++) + image_mean_out[ch] =0; + + for (CGV_ENTRIES i=0;i< numEntries;i++) + for (CGU_CHANNEL ch=0; ch< channels; ch++) + image_mean_out[ch] += image_src[i+ch*SOURCE_BLOCK_SIZE]; + + for (CGU_CHANNEL ch=0; ch< channels; ch++) + image_mean_out[ch] /=(CGV_IMAGE) numEntries; // Performance Warning: Conversion from unsigned int to float is slow. Use "int" if possible +} + +// calculate an eigen vector corresponding to a biggest eigen value +// will work for non-zero non-negative matricies only +void GetEigenVector( + CGV_IMAGE EigenVector_out[MAX_CHANNELS], // Normalized Eigen Vector output + CGV_IMAGE CovarianceVector[MAX_CHANNELS*MAX_CHANNELS], // Covariance Vector + CGU_CHANNEL channels3or4) // IN: 3 = RGB or 4 = RGBA +{ + CGV_IMAGE vector_covIn[MAX_CHANNELS*MAX_CHANNELS]; + CGV_IMAGE vector_covOut[MAX_CHANNELS*MAX_CHANNELS]; + CGV_IMAGE vector_maxCovariance; + + for (CGU_CHANNEL ch1=0; ch1 vector_maxCovariance) + vector_maxCovariance = vector_covIn[ch+ch*4]; + } + + // Normalize Input Covariance Vector + for (CGU_CHANNEL ch1=0; ch1 0) + vector_covIn[ch1+ch2*4] = vector_covIn[ch1+ch2*4] / vector_maxCovariance; + } + + for (CGU_CHANNEL ch1=0; ch1 vector_maxCovariance) + { + maxCovariance_channel = ch; + vector_maxCovariance = vector_covOut[ch+ch*4]; + } + } + + CGV_IMAGE vector_t = 0; + + for (CGU_CHANNEL ch=0; ch 0) + EigenVector_out[ch] = EigenVector_out[ch] / vector_t; + } + +} + +CGV_INDEX index_collapse( + CGV_INDEX index[MAX_SUBSET_SIZE], + CGV_ENTRIES numEntries) +{ + CGV_INDEX minIndex=index[0]; + CGV_INDEX MaxIndex=index[0]; + + for (CGV_ENTRIES k=1;k MaxIndex) + MaxIndex = index[k]; + } + + CGV_INDEX D=1; + + for (CGV_INDEX d=2; d<= MaxIndex-minIndex; d++) + { + for (CGV_ENTRIES ent=0;ent=numEntries) + D =d; + break; + } + } + } + + for (CGV_ENTRIES k=0;k MaxIndex) + MaxIndex = index[k]; + } + + return (MaxIndex); + +} + +void sortProjected_indexs( + CGV_INDEX index_ordered[MAX_SUBSET_SIZE], + CGV_IMAGE projection[SOURCE_BLOCK_SIZE], + CGV_ENTRIES numEntries // max 16 + ) +{ + CMP_di what[SOURCE_BLOCK_SIZE]; + + for (CGV_INDEX i=0; i < numEntries;i++) + { + what[i].index = i; + what[i].image = projection[i]; + } + + CGV_INDEX tmp_index; + CGV_IMAGE tmp_image; + + for (CGV_ENTRIES i = 1; i < numEntries; i++) + { + for (CGV_ENTRIES j=i; j>0; j--) + { + if (what[j - 1].image > what[j].image) + { + tmp_index = what[j].index; + tmp_image = what[j].image; + what[j].index = what[j - 1].index; + what[j].image = what[j - 1].image; + what[j - 1].index = tmp_index; + what[j - 1].image = tmp_image; + } + } + } + + for (CGV_ENTRIES i=0; i < numEntries;i++) + index_ordered[i]=what[i].index; + +}; + +void sortPartitionProjection( + CGV_IMAGE projection[MAX_PARTITION_ENTRIES], + CGV_UINT8 order[MAX_PARTITION_ENTRIES], + CGU_UINT8 numPartitions // max 64 + ) +{ + CMP_du what[MAX_PARTITION_ENTRIES]; + + for (CGU_UINT8 Parti=0; Parti < numPartitions;Parti++) + { + what[Parti].index = Parti; + what[Parti].image = projection[Parti]; + } + + CGV_UINT8 index; + CGV_IMAGE data; + + for (CGU_UINT8 Parti = 1; Parti < numPartitions; Parti++) + { + for (CGU_UINT8 Partj=Parti; Partj>0; Partj--) + { + if (what[Partj - 1].image > what[Partj].image) + { + index = what[Partj].index; + data = what[Partj].image; + what[Partj].index = what[Partj - 1].index; + what[Partj].image = what[Partj - 1].image; + what[Partj - 1].index = index; + what[Partj - 1].image = data; + } + } + } + + for (CGU_UINT8 Parti=0; Parti < numPartitions;Parti++) + order[Parti]=what[Parti].index; + +}; + + +void cmp_Write8Bit( + CGV_CMPOUT base[], + CGU_INT* uniform offset, + CGU_INT bits, + CGV_BYTE bitVal) +{ + base[*offset/8] |= bitVal << (*offset%8); + if (*offset%8+bits>8) + { + base[*offset/8+1] |= shift_right_uint8(bitVal, 8-*offset%8); + } + *offset += bits; +} + +void cmp_Write8BitV( + CGV_CMPOUT base[], + CGV_INT offset, + CGU_INT bits, + CGV_BYTE bitVal) +{ + base[offset/8] |= bitVal << (offset%8); + if (offset%8+bits>8) + { + base[offset/8+1] |= shift_right_uint8V(bitVal, 8-offset%8); + } +} + +INLINE CGV_EPOCODE ep_find_floor( + CGV_IMAGE v, + CGU_UINT8 bits, + CGV_BYTE use_par, + CGV_BYTE odd) + { + CGV_EPOCODE i1=0; + CGV_EPOCODE i2=1<<(bits-use_par); + odd = use_par ? odd : 0; + while (i2-i1>1) + { + CGV_EPOCODE j = (i1+i2)/2; // Warning in ASMP code + CGV_EPOCODE ep_d = expandEPObits((j<= ep_d ) + i1=j; + else + i2=j; + } + + return (i1<>4; + fixup[2] = skip_packed&15; +} + +//===================================== COMPRESS CODE ============================================= +INLINE void SetDefaultIndex(CGV_INDEX index_io[MAX_SUBSET_SIZE]) +{ + // Use this a final call + for (CGU_INT i=0; i image_projected[i]) + image_max = image_projected[i]; + } + + CGV_IMAGE img_diff = image_max-image_min; + + if (img_diff == 0.0f) return; + if (isnan(img_diff)) return; + + image_s = (clusters-1)/img_diff; + + for (CGV_INDEX i=0; i < numEntries;i++) + { + + image_v[i] = image_projected[i]*image_s; + image_z[i] = floor(image_v[i] + 0.5F - image_min *image_s); + projected_index_out[i] = (CGV_INDEX)image_z[i]; + + what[i].image = image_v[i]-image_z[i]- image_min *image_s; + what[i].index = i; + image_dm+= what[i].image; + image_r += what[i].image*what[i].image; + } + + if (numEntries*image_r- image_dm*image_dm >= (CGV_IMAGE)(numEntries-1)/8) + { + + image_dm /= numEntries; + + for (CGV_INT i=0; i < numEntries;i++) + what[i].image -= image_dm; + + CGV_INDEX tmp_index; + CGV_IMAGE tmp_image; + for (CGV_ENTRIES i = 1; i < numEntries; i++) + { + for (CGV_ENTRIES j=i; j>0; j--) + { + if (what[j - 1].image > what[j].image) + { + tmp_index = what[j].index; + tmp_image = what[j].image; + what[j].index = what[j - 1].index; + what[j].image = what[j - 1].image; + what[j - 1].index = tmp_index; + what[j - 1].image = tmp_image; + } + } + } + + // got into fundamental simplex + // move coordinate system origin to its center + + // i=0 < numEntries avoids varying int division by 0 + for (CGV_ENTRIES i=0; i < numEntries;i++) + { + what[i].image = what[i].image - (CGV_IMAGE) (((2.0f*i+1)-numEntries)/(2.0f*numEntries)); + } + + image_mm=0.0F; + image_l=0.0F; + + CGV_INT j = -1; + for (CGV_ENTRIES i=0; i < numEntries;i++) + { + image_l += what[i].image; + if (image_l < image_mm) + { + image_mm = image_l; + j=i; + } + } + + + j = j + 1; + // avoid j = j%numEntries us this + while (j > numEntries) j = j - numEntries; + + for (CGV_ENTRIES i=j; i < numEntries;i++) + { + CGV_INDEX idx = what[i].index; + CGV_INDEX pidx = projected_index_out[idx] + 1; //gather_index(projected_index_out,idx)+1; + projected_index_out[idx] = pidx; // scatter_index(projected_index_out,idx,pidx); + } + } + + // get minimum index + CGV_INDEX index_min=projected_index_out[0]; + for (CGV_ENTRIES i=1; i < numEntries;i++) + { + if (projected_index_out[i] < index_min) + index_min = projected_index_out[i]; + } + + // reposition all index by min index (using min index as 0) + for (CGV_ENTRIES i=0; i < numEntries;i++) + { + projected_index_out[i] = clampIndex(projected_index_out[i] - index_min,0,15); + } + +} + +CGV_ERROR GetQuantizeIndex( + CGV_INDEXPACKED index_packed_out[2], + CGV_INDEX index_out[MAX_SUBSET_SIZE], // OUT: + CGV_IMAGE image_src[SOURCE_BLOCK_SIZE*MAX_CHANNELS], + CGV_ENTRIES numEntries, //IN: range 0..15 (MAX_SUBSET_SIZE) + CGU_INT numClusters, + CGU_CHANNEL channels3or4) // IN: 3 = RGB or 4 = RGBA (4 = MAX_CHANNELS) +{ + CGV_IMAGE image_centered[SOURCE_BLOCK_SIZE*MAX_CHANNELS]; + CGV_IMAGE image_mean[MAX_CHANNELS]; + CGV_IMAGE eigen_vector[MAX_CHANNELS]; + CGV_IMAGE covariance_vector[MAX_CHANNELS*MAX_CHANNELS]; + + GetImageCentered(image_centered,image_mean, image_src, numEntries, channels3or4); + GetCovarianceVector(covariance_vector, image_centered, numEntries, channels3or4); + + //----------------------------------------------------- + // check if all covariances are the same + // if so then set all index to same value 0 and return + // use EPSILON to set the limit for all same limit + //----------------------------------------------------- + + CGV_IMAGE image_covt=0.0F; + for (CGU_CHANNEL ch=0; ch>= 1) + clogBC7++; + + // init epo_0 + CGV_EPOCODE epo_0[2*MAX_CHANNELS]; + SetDefaultEPOCode(epo_0,0xFF,0,0,0); + + CGV_INDEX image_log = 0; + CGV_INDEX image_idx = 0; + CGU_BOOL use_par = FALSE; + if (type != 0) + use_par = TRUE; + CGV_ERROR error_1 = CMP_FLOAT_MAX; + + for (CGU_INT pn = 0; pn err_tc) + image_tcr[ch1] = image_tc; + else if (err_tf < err_tc) + image_tcr[ch1] = image_tf; + else + image_tcr[ch1] = (CGV_EPOCODE)floor(image_src[COMP_RED+(ch1*SOURCE_BLOCK_SIZE)] + 0.5F); + + //image_tcr[ch1] = image_tf + (image_tc - image_tf)/2; + + //=============================== + // Refine this for better quality! + //=============================== + error_tr = get_sperr(clogBC7,bits[ch1],image_tcr[ch1],t1,t2,iclogBC7); + error_tr = (error_tr*error_tr) + + 2 * error_tr + * img_absf(image_tcr[ch1]- image_src[COMP_RED+(ch1*SOURCE_BLOCK_SIZE)]) + + (image_tcr[ch1] - image_src[COMP_RED+(ch1*SOURCE_BLOCK_SIZE)]) + * (image_tcr[ch1] - image_src[COMP_RED+(ch1*SOURCE_BLOCK_SIZE)]); + + if (error_tr < error_ta) + { + error_ta = error_tr; + t1o[ch1] = t1; + t2o[ch1] = t2; + epo_dr_0[ch1] = clampEPO(image_tcr[ch1],0,255); + } +#else + image_tcr[ch1] = floor(image_src[COMP_RED+(ch1*SOURCE_BLOCK_SIZE)] + 0.5F); + error_ta = 0; + t1o[ch1] = t1; + t2o[ch1] = t2; + epo_dr_0[ch1] = clampi(image_tcr[ch1],0,255); +#endif + } // B + } //C + + error_t += error_ta; + } // D + + if (error_t < error_0) + { + image_log = iclogBC7; + image_idx = image_log; + CGU_BOOL srcIsWhite = FALSE; + if ((image_src[0] == 255.0f)&&(image_src[1] == 255.0f)&&(image_src[2] == 255.0f)) srcIsWhite = TRUE; + + for (CGU_CHANNEL ch = 0; chsp_idx,index+0)&0xFF; + epo_0[4+ch] = BC7EncodeRamps.sp_idx[index+1]&0xFF;// gather_epocode(u_BC7Encode->sp_idx,index+1)&0xFF; + } + else { + epo_0[ch] = 0; + epo_0[4 + ch] = 0; + } +#else + epo_0[ ch] = 0; + epo_0[4+ch] = 0; +#endif +#endif + } + error_0 = error_t; + } + //if (error_0 == 0) + // break; + } // E + + if (error_0 < error_1) + { + + image_idx = image_log; + for (CGU_CHANNEL chE = 0; chE 0) + { + image_ramp = GetRamp(clogBC7,max_bits[ch],epo_p1,epo_p2,index_cidx[_mc-1]); + + image_square_diff += sq_image(image_ramp-image_src[(_mc-1)+(ch*SOURCE_BLOCK_SIZE)]); + _mc--; + } + if (image_square_diff < err_ed[(ppA*8)+(ppB*4)+ch]) + { + err_ed[(ppA*8)+(ppB*4)+ch] = image_square_diff; + epo_code_par[ppA][ppB][0][ch] = epo_p1; + epo_code_par[ppA][ppB][1][ch] = epo_p2; + } + } + } // pp1 + } // pp0 + } // j + + //--------------------------------------------------------- + for (CGU_INT pn=0; pn < npv_nd[channels3or4-3][type]; pn++) + { + CGV_ERROR err_2=0.0F; + CGU_INT d1; + CGU_INT d2; + + for (CGU_CHANNEL ch=0; ch>=1) + clogBC7++; + + CGU_INT clt_clogBC7 = CLT(clogBC7); + + if (clt_clogBC7 > 3) + { + ASPM_PRINT(("Err: optimize_IndexAndEndPoints, clt_clogBC7\n")); + return CMP_FLOAT_MAX; + } + + Mi_ = Mi_ - 1; + + CGV_INDEX MaxIndex; + CGV_INDEX index_tmp[MAX_SUBSET_SIZE]; + CGU_INT maxTry = MAX_TRY_SHAKER; + + CGV_INDEX index_best[MAX_SUBSET_SIZE]; + + for (CGV_ENTRIES k=0;kerrorThreshold) + { + break; + } + + CGV_TYPEINT done; + done = !(change && better); + if ((maxTry > 0)&&(!done)) + { + maxTry--; + MaxIndex = index_collapse(index_tmp, numEntries); + } + else + { + maxTry = 0; + } + + } while (maxTry); + + if (err_best == CMP_FLOAT_MAX) + { + ASPM_PRINT(("Err: requantized_image_err\n")); + } + + return err_best; +} + +CGU_UINT8 get_partitionsToTry(uniform CMP_GLOBAL BC7_Encode u_BC7Encode[],CGU_UINT8 maxPartitions) +{ + CGU_FLOAT u_minPartitionSearchSize = 0.30f; + if(u_BC7Encode->quality <= BC7_qFAST_THRESHOLD) // Using this to match performance and quality of CPU code + { + u_minPartitionSearchSize = u_minPartitionSearchSize + ( u_BC7Encode->quality*BC7_qFAST_THRESHOLD); + } + else + { + u_minPartitionSearchSize = u_BC7Encode->quality; + } + return (CGU_UINT8)(maxPartitions * u_minPartitionSearchSize); +} + +INLINE void cmp_encode_swap(CGV_EPOCODE endpoint[], CGU_INT channels, CGV_INDEX block_index[MAX_SUBSET_SIZE], CGU_INT bits) +{ + CGU_INT levels = 1 << bits; + if (block_index[0]>=levels/2) + { + cmp_swap_epo(&endpoint[0], &endpoint[channels], channels); + for (CGU_INT k=0; k0) q = (levels-1)-q; + + if (k1==0 && k2==0) cmp_Write8Bit(data, pPos, bits - 1, static_cast (q)); + else cmp_Write8Bit(data, pPos, bits, static_cast(q)); + qbits_shifted >>= 4; + flips_shifted >>= 1; + } + } +} + + +INLINE CGV_SHIFT32 pow32(CGV_SHIFT32 x) +{ + return 1<>= 1; + packedColours[1] >>= 1; + } + else + if(blockMode == 1) // ONE_PBIT + { + parityBits[subset][0] = packedColours[1] & 1; + parityBits[subset][1] = packedColours[1] & 1; + packedColours[0] >>= 1; + packedColours[1] >>= 1; + } + else + if(blockMode == 2) + { + parityBits[subset][0] = 0; + parityBits[subset][1] = 0; + } + + for (CGU_INT ch=0; ch>= componentBits; + packedColours[1] >>= componentBits; + } + } + + // Loop over component + for (CGU_INT ch=0; ch < channels; ch++) + { + // loop over subsets + for (CGU_INT subset=0; subset (params->rotated_channel)); + + // idxMode 1 bit + cmp_Write8Bit(cmp_out, &bitPosition, 1, static_cast (params->idxMode)); + + CGU_INT idxBits[2] = {2,3}; + + if(params->idxMode) + { + idxBits[0] = 3; + idxBits[1] = 2; + // Indicate if we need to fixup the index + cmp_swap_index(params->color_index,params->alpha_index,16); + cmp_encode_swap(params->alpha_qendpoint, 4, params->color_index,2); + cmp_encode_swap(params->color_qendpoint, 4, params->alpha_index,3); + } + else + { + cmp_encode_swap(params->color_qendpoint, 4, params->color_index,2); + cmp_encode_swap(params->alpha_qendpoint, 4, params->alpha_index,3); + } + + // color endpoints 5 bits each + // R0 : R1 + // G0 : G1 + // B0 : B1 + for (CGU_INT component=0; component < 3; component++) + { + cmp_Write8Bit(cmp_out, &bitPosition, 5, static_cast (params->color_qendpoint[component])); + cmp_Write8Bit(cmp_out, &bitPosition, 5, static_cast (params->color_qendpoint[4 + component])); + } + + // alpha endpoints (6 bits each) + // A0 : A1 + cmp_Write8Bit(cmp_out, &bitPosition, 6, static_cast (params->alpha_qendpoint[0])); + cmp_Write8Bit(cmp_out, &bitPosition, 6, static_cast (params->alpha_qendpoint[4])); + + // index 2 bits each (31 bits total) + cmp_encode_index(cmp_out, &bitPosition, params->color_index, 2); + // index 3 bits each (47 bits total) + cmp_encode_index(cmp_out, &bitPosition, params->alpha_index, 3); +} + +void Encode_mode5( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE], + varying cmp_mode_parameters* uniform params) +{ + for (CGU_INT k=0; k (params->rotated_channel)); + + cmp_encode_swap(params->color_qendpoint, 4, params->color_index,2); + cmp_encode_swap(params->alpha_qendpoint, 4, params->alpha_index,2); + + // color endpoints (7 bits each) + // R0 : R1 + // G0 : G1 + // B0 : B1 + for (CGU_INT component=0; component < 3; component++) + { + cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast (params->color_qendpoint[component])); + cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast (params->color_qendpoint[4 + component])); + } + + // alpha endpoints (8 bits each) + // A0 : A1 + cmp_Write8Bit(cmp_out, &bitPosition, 8, static_cast (params->alpha_qendpoint[0])); + cmp_Write8Bit(cmp_out, &bitPosition, 8, static_cast (params->alpha_qendpoint[4])); + + + // color index 2 bits each (31 bits total) + // alpha index 2 bits each (31 bits total) + cmp_encode_index(cmp_out, &bitPosition, params->color_index, 2); + cmp_encode_index(cmp_out, &bitPosition, params->alpha_index, 2); +} + +void Encode_mode6( + CGV_INDEX index[MAX_SUBSET_SIZE], + CGV_EPOCODE epo_code[8], + CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE]) +{ + for (CGU_INT k=0; k (epo_code[0 + p] >> 1)); + cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast (epo_code[4 + p] >> 1)); + } + + // p bits + cmp_Write8Bit(cmp_out, &bitPosition, 1, epo_code[0]&1); + cmp_Write8Bit(cmp_out, &bitPosition, 1, epo_code[4]&1); + + // quantized values + cmp_encode_index(cmp_out, &bitPosition, index, 4); +} + + +void Compress_mode01237( + CGU_INT blockMode, + BC7_EncodeState EncodeState[], +uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) +{ + CGV_INDEX storedBestindex[MAX_PARTITIONS][MAX_SUBSETS][MAX_SUBSET_SIZE]; + CGV_ERROR storedError[MAX_PARTITIONS]; + CGV_UINT8 sortedPartition[MAX_PARTITIONS]; + + EncodeState->numPartitionModes = 64; + EncodeState->maxSubSets = 2; + + if (blockMode == 0) + { + EncodeState->numPartitionModes = 16; + EncodeState->channels3or4 = 3; + EncodeState->bits = 26; + EncodeState->clusters = 8; + EncodeState->componentBits = 4; + EncodeState->maxSubSets = 3; + } + else + if (blockMode == 2) + { + EncodeState->channels3or4 = 3; + EncodeState->bits = 30; + EncodeState->clusters = 4; + EncodeState->componentBits = 5; + EncodeState->maxSubSets = 3; + } + else + if (blockMode == 1) + { + + EncodeState->channels3or4 = 3; + EncodeState->bits = 37; + EncodeState->clusters = 8; + EncodeState->componentBits = 6; + } + else + if (blockMode == 3) + { + EncodeState->channels3or4 = 3; + EncodeState->bits = 44; + EncodeState->clusters = 4; + EncodeState->componentBits = 7; + } + else + if (blockMode == 7) + { + EncodeState->channels3or4 = 4; + EncodeState->bits = 42; // (2* (R 5 + G 5 + B 5 + A 5)) + 2 parity bits + EncodeState->clusters = 4; + EncodeState->componentBits = 5; // 5 bit components + } + + CGV_IMAGE image_subsets[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_CHANNELS]; + CGV_ENTRIES subset_entryCount[MAX_SUBSETS] = {0,0,0}; + + // Loop over the available partitions for the block mode and quantize them + // to figure out the best candidates for further refinement + CGU_UINT8 mode_partitionsToTry; + mode_partitionsToTry = get_partitionsToTry(u_BC7Encode,EncodeState->numPartitionModes); + + CGV_UINT8 bestPartition = 0; + + for (CGU_INT mode_blockPartition = 0; mode_blockPartition < mode_partitionsToTry; mode_blockPartition++) + { + + GetPartitionSubSet_mode01237( + image_subsets, + subset_entryCount, + static_cast(mode_blockPartition), + EncodeState->image_src, + blockMode, + EncodeState->channels3or4); + + CGV_IMAGE subset_image_src[SOURCE_BLOCK_SIZE*MAX_CHANNELS]; + CGV_INDEX index_out1[SOURCE_BLOCK_SIZE]; + CGV_ERROR err_quant = 0.0F; + + // Store the quntize error for this partition to be sorted and processed later + for (CGU_INT subset=0; subset < EncodeState->maxSubSets; subset++) + { + CGV_ENTRIES numEntries = subset_entryCount[subset]; + + for (CGU_INT ii=0; iiclusters, + EncodeState->channels3or4); + + for (CGV_INT idx=0; idx < numEntries; idx++) + { + storedBestindex[mode_blockPartition][subset][idx] = index_out1[idx]; + } + } + + storedError[mode_blockPartition] = err_quant; + } + + // Sort the results + sortPartitionProjection( storedError, + sortedPartition, + mode_partitionsToTry); + + CGV_EPOCODE epo_code[MAX_SUBSETS*2*MAX_CHANNELS]; + CGV_EPOCODE bestEndpoints[MAX_SUBSETS*2*MAX_CHANNELS]; + CGV_BYTE bestindex[MAX_SUBSETS*MAX_SUBSET_SIZE]; + CGV_ENTRIES bestEntryCount[MAX_SUBSETS]; + CGV_BYTE bestindex16[MAX_SUBSET_SIZE]; + + // Extensive shaking is most important when the ramp is short, and + // when we have less index. On a long ramp the quality of the + // initial quantizing is relatively more important + // We modulate the shake size according to the number of ramp index + // - the more index we have the less shaking should be required to find a near + // optimal match + + CGU_UINT8 numShakeAttempts = max8(1, min8((CGU_UINT8)floor(8 * u_BC7Encode->quality + 0.5), mode_partitionsToTry)); + CGV_ERROR err_best = CMP_FLOAT_MAX; + + // Now do the endpoint shaking + for (CGU_INT nSA =0; nSA < numShakeAttempts; nSA++) + { + + CGV_ERROR err_optimized = 0.0F; + CGV_UINT8 sortedBlockPartition; + sortedBlockPartition = sortedPartition[nSA]; + + //******************************************** + // Get the partition shape for the given mode + //******************************************** + GetPartitionSubSet_mode01237( + image_subsets, + subset_entryCount, + sortedBlockPartition, + EncodeState->image_src, + blockMode, + EncodeState->channels3or4); + + //***************************** + // Process the partition shape + //***************************** + for (CGU_INT subset=0; subset < EncodeState->maxSubSets; subset++) + { + CGV_ENTRIES numEntries = subset_entryCount[subset]; + CGV_IMAGE src_image_block[SOURCE_BLOCK_SIZE*MAX_CHANNELS]; + CGV_INDEX index_io[MAX_SUBSET_SIZE]; + CGV_EPOCODE tmp_epo_code[8]; + + for (CGU_INT k=0; k(EncodeState->clusters), // Mi_ + EncodeState->bits, + EncodeState->channels3or4, + u_BC7Encode); + + for (CGU_INT k=0; k < MAX_SUBSET_SIZE; k++) + { + storedBestindex[sortedBlockPartition][subset][k] = index_io[k]; + } + + for (CGU_INT ch=0; chmaxSubSets; subset++) + { + CGV_ENTRIES numEntries = subset_entryCount[subset]; + bestEntryCount[subset] = numEntries; + + if(numEntries) + { + for (CGU_INT ch=0; ch < EncodeState->channels3or4; ch++) + { + bestEndpoints[(subset*2+0)*4+ch] = epo_code[(subset*2+0)*4+ch]; + bestEndpoints[(subset*2+1)*4+ch] = epo_code[(subset*2+1)*4+ch]; + } + + for (CGV_ENTRIES k=0; k< numEntries; k++) + { + bestindex[subset*MAX_SUBSET_SIZE+k] = storedBestindex[sortedBlockPartition][subset][k]; + bestindex16[bestIndexCount++] = storedBestindex[sortedBlockPartition][subset][k]; + } + } + } + + err_best = err_optimized; + // Early out if we found we can compress with error below the quality threshold + if(err_best <= u_BC7Encode->errorThreshold) + { + break; + } + } + } + + + if (blockMode != 7) + err_best += EncodeState->opaque_err; + + if(err_best > EncodeState->best_err) + return; + + //************************** + // Save the encoded block + //************************** + EncodeState->best_err = err_best; + + + // Now we have all the data needed to encode the block + // We need to pack the endpoints prior to encoding + CGV_TYPEUINT32 packedEndpoints[MAX_SUBSETS*2] = {0,0,0,0,0,0}; + for (CGU_INT subset=0; subsetmaxSubSets; subset++) + { + packedEndpoints[(subset*2)+0] = 0; + packedEndpoints[(subset*2)+1] = 0; + + if(bestEntryCount[subset]) + { + CGU_UINT32 rightAlignment = 0; + + // Sort out parity bits + if(blockMode != 2) + { + // Sort out BCC parity bits + packedEndpoints[(subset*2)+0] = bestEndpoints[(subset*2+0)*4+0] & 1; + packedEndpoints[(subset*2)+1] = bestEndpoints[(subset*2+1)*4+0] & 1; + for (CGU_INT ch=0; chchannels3or4; ch++) + { + bestEndpoints[(subset*2+0)*4+ch] >>= 1; + bestEndpoints[(subset*2+1)*4+ch] >>= 1; + } + rightAlignment++; + } + + // Fixup endpoints + for (CGU_INT ch=0; chchannels3or4; ch++) + { + packedEndpoints[(subset*2)+0] |= bestEndpoints[((subset*2)+0)*4+ch] << rightAlignment; + packedEndpoints[(subset*2)+1] |= bestEndpoints[((subset*2)+1)*4+ch] << rightAlignment; + rightAlignment += EncodeState->componentBits; + } + } + } + + CGV_UINT8 idxCount[3] = {0, 0, 0}; + for (CGV_INT k=0; kmaxSubSets,k); + CGV_UINT8 idxC = idxCount[partsub]; + bestindex16[k] = bestindex[partsub*MAX_SUBSET_SIZE+idxC]; + idxCount[partsub] = idxC + 1; + } + + Encode_mode02137( + blockMode, + bestPartition, + packedEndpoints, + bestindex16, + EncodeState->cmp_out); +} + +void Compress_mode45( + CGU_INT blockMode, + BC7_EncodeState EncodeState[], +uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) +{ + + cmp_mode_parameters best_candidate; + EncodeState->channels3or4 = 4; + cmp_memsetBC7((CGV_BYTE *)&best_candidate, 0, sizeof(cmp_mode_parameters)); + + if (blockMode == 4) + { + EncodeState->max_idxMode = 2; + EncodeState->modeBits[0] = 30; // bits = 2 * (Red 5+ Grn 5+ blu 5) + EncodeState->modeBits[1] = 36; // bits = 2 * (Alpha 6+6+6) + EncodeState->numClusters0[0] = 4; + EncodeState->numClusters0[1] = 8; + EncodeState->numClusters1[0] = 8; + EncodeState->numClusters1[1] = 4; + } + else + { + EncodeState->max_idxMode = 1; + EncodeState->modeBits[0] = 42; // bits = 2 * (Red 7+ Grn 7+ blu 7) + EncodeState->modeBits[1] = 48; // bits = 2 * (Alpha 8+8+8) = 48 + EncodeState->numClusters0[0] = 4; + EncodeState->numClusters0[1] = 4; + EncodeState->numClusters1[0] = 4; + EncodeState->numClusters1[1] = 4; + } + + + CGV_IMAGE src_color_Block[SOURCE_BLOCK_SIZE*MAX_CHANNELS]; + CGV_IMAGE src_alpha_Block[SOURCE_BLOCK_SIZE*MAX_CHANNELS]; + + // Go through each possible rotation and selection of index rotationBits) + for (CGU_CHANNEL rotated_channel = 0; rotated_channel < EncodeState->channels3or4; rotated_channel++) + { // A + + for (CGU_INT k=0; kimage_src[k+componentRotations[rotated_channel][p+1]*SOURCE_BLOCK_SIZE]; + src_alpha_Block[k+p*SOURCE_BLOCK_SIZE] = EncodeState->image_src[k+componentRotations[rotated_channel][0]*SOURCE_BLOCK_SIZE]; + } + } + + CGV_ERROR err_quantizer; + CGV_ERROR err_bestQuantizer = CMP_FLOAT_MAX; + + for (CGU_INT idxMode = 0; idxMode < EncodeState->max_idxMode; idxMode++) + { // B + CGV_INDEXPACKED color_index2[2]; // reserved .. Not used! + + err_quantizer = GetQuantizeIndex( + color_index2, + best_candidate.color_index, + src_color_Block, + SOURCE_BLOCK_SIZE, + EncodeState->numClusters0[idxMode], + 3); + + err_quantizer += GetQuantizeIndex( + color_index2, + best_candidate.alpha_index, + src_alpha_Block, + SOURCE_BLOCK_SIZE, + EncodeState->numClusters1[idxMode], + 3) / 3.0F; + + // If quality is high then run the full shaking for this config and + // store the result if it beats the best overall error + // Otherwise only run the shaking if the error is better than the best + // quantizer error + if(err_quantizer <= err_bestQuantizer) + { + err_bestQuantizer = err_quantizer; + + // Shake size gives the size of the shake cube + CGV_ERROR err_overallError; + + err_overallError = optimize_IndexAndEndPoints( + best_candidate.color_index, + best_candidate.color_qendpoint, + src_color_Block, + SOURCE_BLOCK_SIZE, + EncodeState->numClusters0[idxMode], + static_cast(EncodeState->modeBits[0]), + 3, + u_BC7Encode); + + // Alpha scalar block + err_overallError += optimize_IndexAndEndPoints( + best_candidate.alpha_index, + best_candidate.alpha_qendpoint, + src_alpha_Block, + SOURCE_BLOCK_SIZE, + EncodeState->numClusters1[idxMode], + static_cast(EncodeState->modeBits[1]), + 3, + u_BC7Encode) / 3.0f; + + // If we beat the previous best then encode the block + if(err_overallError < EncodeState->best_err) + { + best_candidate.idxMode = idxMode; + best_candidate.rotated_channel = rotated_channel; + if (blockMode == 4) + Encode_mode4( EncodeState->cmp_out, &best_candidate); + else + Encode_mode5( EncodeState->cmp_out, &best_candidate); + EncodeState->best_err = err_overallError; + } + } + } // B + } // A +} + + +void Compress_mode6( BC7_EncodeState EncodeState[], +uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) +{ + CGV_ERROR err; + + CGV_EPOCODE epo_code_out[8] = {0}; + CGV_INDEX best_index_out[MAX_SUBSET_SIZE]; + CGV_INDEXPACKED best_packedindex_out[2]; + + + // CGV_IMAGE block_endpoints[8]; + // icmp_get_block_endpoints(block_endpoints, EncodeState->image_src, -1, 4); + // icmp_GetQuantizedEpoCode(epo_code_out, block_endpoints, 6,4); + // err = icmp_GetQuantizeIndex(best_packedindex_out, best_index_out, EncodeState->image_src, 4, block_endpoints, 0,4); + + err = GetQuantizeIndex( + best_packedindex_out, + best_index_out, + EncodeState->image_src, + 16, // numEntries + 16, // clusters + 4); // channels3or4 + + //***************************** + // Process the partition shape + //***************************** + err = optimize_IndexAndEndPoints( + best_index_out, + epo_code_out, + EncodeState->image_src, + 16, //numEntries + 16, // Mi_ = clusters + 58, // bits + 4, // channels3or4 + u_BC7Encode); + + //************************** + // Save the encoded block + //************************** + + if (err < EncodeState->best_err) + { + EncodeState->best_err = err; + Encode_mode6( + best_index_out, + epo_code_out, + EncodeState->cmp_out); + } +} + +void copy_BC7_Encode_settings(BC7_EncodeState EncodeState[], uniform CMP_GLOBAL BC7_Encode settings []) +{ + EncodeState->best_err = CMP_FLOAT_MAX; + EncodeState->validModeMask = settings->validModeMask; + #ifdef USE_ICMP + EncodeState->part_count = settings->part_count; + EncodeState->channels = settings->channels; +#endif +} + +//===================================== ICMP CODE ========================================================= +#ifdef USE_ICMP +//======================================== +// Modified Intel Texture Compression Code +//======================================== + +void icmp_Write32Bit(CGV_CMPOUTPACKED base[], CGU_INT* uniform offset, CGU_INT bits, CGV_CMPOUTPACKED bitVal) +{ + base[*offset / 32] |= ((CGV_CMPOUTPACKED)bitVal) << (*offset % 32); + if (*offset % 32 + bits > 32) + { + base[*offset / 32 + 1] |= shift_right_uint32(bitVal, 32 - *offset % 32); + } + *offset += bits; +} + +//================ 32 bit cmp_out mode encoders =============== + +INLINE void icmp_swap_epocode(CGV_EPOCODE u[], CGV_EPOCODE v[], CGU_INT n) +{ + for (CGU_INT i = 0; i < n; i++) + { + CGV_EPOCODE t = u[i]; + u[i] = v[i]; + v[i] = t; + } +} + +void icmp_encode_apply_swap(CGV_EPOCODE endpoint[], CGU_INT channel, CGV_INDEXPACKED block_index[2], CGU_INT bits) +{ + CGU_INT levels = 1 << bits; + if ((block_index[0] & 15) >= levels / 2) + { + icmp_swap_epocode(&endpoint[0], &endpoint[channel], channel); + + for (CGU_INT k = 0; k < 2; k++) + block_index[k] = (CGV_INDEXPACKED)(0x11111111 * (levels - 1)) - block_index[k]; + } +} + +void icmp_encode_index(CGV_CMPOUTPACKED data[5], CGU_INT* uniform pPos, CGV_INDEXPACKED block_index[2], CGU_INT bits, CGV_MASK flips) +{ + CGU_INT levels = 1 << bits; + CGV_MASK flips_shifted = flips; + for (CGU_INT k1 = 0; k1 < 2; k1++) + { + CGV_CMPOUTPACKED qbits_shifted = block_index[k1]; + for (CGU_INT k2 = 0; k2 < 8; k2++) + { + CGV_CMPOUTPACKED q = qbits_shifted & 15; + if ((flips_shifted & 1) > 0) q = (levels - 1) - q; + + if (k1 == 0 && k2 == 0) icmp_Write32Bit(data, pPos, bits - 1, q); + else icmp_Write32Bit(data, pPos, bits, q); + qbits_shifted >>= 4; + flips_shifted >>= 1; + } + } +} + +void icmp_bc7_encode_endpoint2(CGV_CMPOUTPACKED data[5], CGU_INT* uniform pPos, CGV_INDEXPACKED color_index[2], CGU_INT bits, CGV_MASK flips) +{ + CGU_INT levels = 1 << bits; + CGV_MASK flips_shifted = flips; + for (CGU_INT k1 = 0; k1 < 2; k1++) + { + CGV_INDEXPACKED qbits_shifted = color_index[k1]; + for (CGU_INT k2 = 0; k2 < 8; k2++) + { + CGV_INDEXPACKED q = qbits_shifted & 15; + if ((flips_shifted & 1) > 0) q = (levels - 1) - q; + + if (k1 == 0 && k2 == 0) icmp_Write32Bit(data, pPos, bits - 1, q); + else icmp_Write32Bit(data, pPos, bits, q); + qbits_shifted >>= 4; + flips_shifted >>= 1; + } + } +} + +INLINE CGV_CMPOUTPACKED icmp_pow2Packed(CGV_FIXUPINDEX x) +{ + return 1 << x; +} + +INLINE void icmp_encode_data_shl_1bit_from(CGV_CMPOUTPACKED data[5], CGV_FIXUPINDEX from) +{ + if (from < 96) + { + //assert(from > 64+10); + + CGV_CMPOUTPACKED shifted = (data[2] >> 1) | (data[3] << 31); + CGV_CMPOUTPACKED mask = (icmp_pow2Packed(from - 64) - 1) >> 1; + data[2] = (mask&data[2]) | (~mask&shifted); + data[3] = (data[3] >> 1) | (data[4] << 31); + data[4] = data[4] >> 1; + } + else if (from < 128) + { + CGV_CMPOUTPACKED shifted = (data[3] >> 1) | (data[4] << 31); + CGV_CMPOUTPACKED mask = (icmp_pow2Packed(from - 96) - 1) >> 1; + data[3] = (mask&data[3]) | (~mask&shifted); + data[4] = data[4] >> 1; + } +} + +INLINE void icmp_get_fixuptable(CGV_FIXUPINDEX fixup[3], CGV_PARTID part_id) +{ + // same as CMP SDK v3.1 BC7_FIXUPINDEX1 & BC7_FIXUPINDEX2 for each partition range 0..63 + // The data is saved as a packed INT = (BC7_FIXUPINDEX1 << 4 + BC7_FIXUPINDEX2) + CMP_STATIC uniform __constant CGV_FIXUPINDEX FIXUPINDEX[] = { + // 2 subset partitions 0..63 + 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, + 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0x20u, 0x20u, + 0xf0u, 0xf0u, 0x60u, 0x80u, 0x20u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0x60u, + 0x60u, 0x20u, 0x60u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, + // 3 subset partitions 64..128 + 0x3fu, 0x38u, 0xf8u, 0xf3u, 0x8fu, 0x3fu, 0xf3u, 0xf8u, 0x8fu, 0x8fu, 0x6fu, 0x6fu, 0x6fu, 0x5fu, 0x3fu, 0x38u, + 0x3fu, 0x38u, 0x8fu, 0xf3u, 0x3fu, 0x38u, 0x6fu, 0xa8u, 0x53u, 0x8fu, 0x86u, 0x6au, 0x8fu, 0x5fu, 0xfau, 0xf8u, + 0x8fu, 0xf3u, 0x3fu, 0x5au, 0x6au, 0xa8u, 0x89u, 0xfau, 0xf6u, 0x3fu, 0xf8u, 0x5fu, 0xf3u, 0xf6u, 0xf6u, 0xf8u, + 0x3fu, 0xf3u, 0x5fu, 0x5fu, 0x5fu, 0x8fu, 0x5fu, 0xafu, 0x5fu, 0xafu, 0x8fu, 0xdfu, 0xf3u, 0xcfu, 0x3fu, 0x38u + }; + + CGV_FIXUPINDEX skip_packed = FIXUPINDEX[part_id];// gather_int2(FIXUPINDEX, part_id); + fixup[0] = 0; + fixup[1] = skip_packed >> 4; + fixup[2] = skip_packed & 15; +} + +void icmp_bc7_encode_adjust_skip_mode01237_2(CGV_CMPOUTPACKED data[5], CGU_INT mode, CGV_PARTID part_id) +{ + CGU_INT bits = 2; if (mode == 0 || mode == 1) bits = 3; + CGU_INT maxSubSets = 2; if (mode == 0 || mode == 2) maxSubSets = 3; + + CGV_FIXUPINDEX fixup[3]; + icmp_get_fixuptable(fixup, part_id); + + if (maxSubSets > 2 && fixup[1] < fixup[2]) + { + CGV_FIXUPINDEX t = fixup[1]; fixup[1] = fixup[2]; fixup[2] = t; + } + + for (CGU_INT j = 1; j < maxSubSets; j++) + { + CGV_FIXUPINDEX k = fixup[j]; + icmp_encode_data_shl_1bit_from(data, 128 + (maxSubSets - 1) - (15 - k)*bits); + } +} + +INLINE CGV_UINT32 gather_uint32(__constant CGU_UINT32 * const uniform ptr, CGV_INT idx) +{ + return ptr[idx]; // (perf warning expected) +} + +INLINE CGV_MASK icmp_get_partition_mask(CGV_PARTID part_id, CGU_INT subset) +{ + CMP_STATIC uniform __constant CGV_SHIFT32 pattern_mask_table[] = { + // 2 subset partitions + 0xCCCC3333u, 0x88887777u, 0xEEEE1111u, 0xECC81337u, 0xC880377Fu, 0xFEEC0113u, 0xFEC80137u, 0xEC80137Fu, + 0xC80037FFu, 0xFFEC0013u, 0xFE80017Fu, 0xE80017FFu, 0xFFE80017u, 0xFF0000FFu, 0xFFF0000Fu, 0xF0000FFFu, + 0xF71008EFu, 0x008EFF71u, 0x71008EFFu, 0x08CEF731u, 0x008CFF73u, 0x73108CEFu, 0x3100CEFFu, 0x8CCE7331u, + 0x088CF773u, 0x3110CEEFu, 0x66669999u, 0x366CC993u, 0x17E8E817u, 0x0FF0F00Fu, 0x718E8E71u, 0x399CC663u, + 0xAAAA5555u, 0xF0F00F0Fu, 0x5A5AA5A5u, 0x33CCCC33u, 0x3C3CC3C3u, 0x55AAAA55u, 0x96966969u, 0xA55A5AA5u, + 0x73CE8C31u, 0x13C8EC37u, 0x324CCDB3u, 0x3BDCC423u, 0x69969669u, 0xC33C3CC3u, 0x99666699u, 0x0660F99Fu, + 0x0272FD8Du, 0x04E4FB1Bu, 0x4E40B1BFu, 0x2720D8DFu, 0xC93636C9u, 0x936C6C93u, 0x39C6C639u, 0x639C9C63u, + 0x93366CC9u, 0x9CC66339u, 0x817E7E81u, 0xE71818E7u, 0xCCF0330Fu, 0x0FCCF033u, 0x774488BBu, 0xEE2211DDu, + + // 3 subset partitions + 0x08CC0133u, 0x8CC80037u, 0xCC80006Fu, 0xEC001331u, 0x330000FFu, 0x00CC3333u, 0xFF000033u, 0xCCCC0033u, + 0x0F0000FFu, 0x0FF0000Fu, 0x00F0000Fu, 0x44443333u, 0x66661111u, 0x22221111u, 0x136C0013u, 0x008C8C63u, + 0x36C80137u, 0x08CEC631u, 0x3330000Fu, 0xF0000333u, 0x00EE1111u, 0x88880077u, 0x22C0113Fu, 0x443088CFu, + 0x0C22F311u, 0x03440033u, 0x69969009u, 0x9960009Fu, 0x03303443u, 0x00660699u, 0xC22C3113u, 0x8C0000EFu, + 0x1300007Fu, 0xC4003331u, 0x004C1333u, 0x22229999u, 0x00F0F00Fu, 0x24929249u, 0x29429429u, 0xC30C30C3u, + 0xC03C3C03u, 0x00AA0055u, 0xAA0000FFu, 0x30300303u, 0xC0C03333u, 0x90900909u, 0xA00A5005u, 0xAAA0000Fu, + 0x0AAA0555u, 0xE0E01111u, 0x70700707u, 0x6660000Fu, 0x0EE01111u, 0x07707007u, 0x06660999u, 0x660000FFu, + 0x00660099u, 0x0CC03333u, 0x03303003u, 0x60000FFFu, 0x80807777u, 0x10100101u, 0x000A0005u, 0x08CE8421u + }; + + CGV_MASK mask_packed = gather_uint32(pattern_mask_table, part_id); + CGV_MASK mask0 = mask_packed & 0xFFFF; + CGV_MASK mask1 = mask_packed >> 16; + + CGV_MASK mask = (subset == 2) ? (~mask0)&(~mask1) : ((subset == 0) ? mask0 : mask1); + return mask; +} + +#ifdef USE_VARYING +#ifdef ASPM_GPU +INLINE CGV_INDEXPACKED gather_packedindex(CGV_INDEXPACKED* ptr, CGV_FIXUPINDEX idx) +{ + return ptr[idx]; +} +#else +INLINE CGV_INDEXPACKED gather_packedindex(CMP_CONSTANT varying CGV_INDEXPACKED* CMP_CONSTANT uniform ptr, CGV_FIXUPINDEX idx) +{ + return ptr[idx]; // (perf warning expected) +} +#endif +#endif + +CGV_MASK icmp_encode_apply_swap_mode01237(CGV_EPOCODE qep[], CGV_INDEXPACKED color_index[2], CGU_INT blockMode, CGV_PARTID part_id) +{ + CGU_INT bits = 2; if (blockMode == 0 || blockMode == 1) bits = 3; + CGU_INT maxSubSets = 2; if (blockMode == 0 || blockMode == 2) maxSubSets = 3; + + CGV_MASK flips = 0; + CGU_INT levels = 1 << bits; + CGV_FIXUPINDEX fixup[3]; + icmp_get_fixuptable(fixup, part_id); + + for (CGU_INT j = 0; j < maxSubSets; j++) + { + CGV_FIXUPINDEX k0 = fixup[j]; + +#ifdef USE_VARYING + CGV_INDEXPACKED q = ((gather_packedindex(color_index, k0 >> 3) << (28 - (k0 & 7) * 4)) >> 28); +#else + CGV_INDEXPACKED q = ((color_index[k0 >> 3] << (28 - (k0 & 7) * 4)) >> 28); +#endif + + if (q >= levels / 2) + { + icmp_swap_epocode(&qep[8 * j], &qep[8 * j + 4], 4); + CGV_MASK partition_mask = icmp_get_partition_mask(part_id, j); + flips |= partition_mask; + } + } + + return flips; +} + +void icmp_encode_mode01237(CGV_CMPOUTPACKED cmp_out[5], CGV_EPOCODE color_qendpoint[], CGV_INDEXPACKED color_index[2], CGV_PARTID part_id, CGU_INT blockMode) +{ + CGU_INT bits = 2; if (blockMode == 0 || blockMode == 1) bits = 3; + CGU_INT maxSubSets = 2; if (blockMode == 0 || blockMode == 2) maxSubSets = 3; + CGU_INT channels = 3; if (blockMode == 7) channels = 4; + + CGV_MASK flips = icmp_encode_apply_swap_mode01237(color_qendpoint, color_index, blockMode, part_id); + + for (CGU_INT k = 0; k < 5; k++) cmp_out[k] = 0; + CGU_INT pos = 0; + + // mode 0-3, 7 + icmp_Write32Bit(cmp_out, &pos, blockMode + 1, 1 << blockMode); + + // partition + if (blockMode == 0) + { + icmp_Write32Bit(cmp_out, &pos, 4, part_id & 15); + } + else + { + icmp_Write32Bit(cmp_out, &pos, 6, part_id & 63); + } + + // endpoints + for (CGU_INT ch = 0; ch < channels; ch++) + for (CGU_INT j = 0; j < maxSubSets * 2; j++) + { + if (blockMode == 0) + { + icmp_Write32Bit(cmp_out, &pos, 4, color_qendpoint[j * 4 + 0 + ch] >> 1); + } + else if (blockMode == 1) + { + icmp_Write32Bit(cmp_out, &pos, 6, color_qendpoint[j * 4 + 0 + ch] >> 1); + } + else if (blockMode == 2) + { + icmp_Write32Bit(cmp_out, &pos, 5, color_qendpoint[j * 4 + 0 + ch]); + } + else if (blockMode == 3) + { + icmp_Write32Bit(cmp_out, &pos, 7, color_qendpoint[j * 4 + 0 + ch] >> 1); + } + else if (blockMode == 7) + { + icmp_Write32Bit(cmp_out, &pos, 5, color_qendpoint[j * 4 + 0 + ch] >> 1); + } + //else + //{ + // assert(false); + //} + } + + // p bits + if (blockMode == 1) + for (CGU_INT j = 0; j < 2; j++) + { + icmp_Write32Bit(cmp_out, &pos, 1, color_qendpoint[j * 8] & 1); + } + + if (blockMode == 0 || blockMode == 3 || blockMode == 7) + for (CGU_INT j = 0; j < maxSubSets * 2; j++) + { + icmp_Write32Bit(cmp_out, &pos, 1, color_qendpoint[j * 4] & 1); + } + + // quantized values + icmp_bc7_encode_endpoint2(cmp_out, &pos, color_index, bits, flips); + icmp_bc7_encode_adjust_skip_mode01237_2(cmp_out, blockMode, part_id); +} + +INLINE void icmp_swap_indexpacked(CGV_INDEXPACKED u[], CGV_INDEXPACKED v[], CGU_INT n) +{ + for (CGU_INT i = 0; i < n; i++) + { + CGV_INDEXPACKED t = u[i]; + u[i] = v[i]; + v[i] = t; + } +} + + +void icmp_encode_mode4(CGV_CMPOUTPACKED cmp_out[5], varying cmp_mode_parameters* uniform params) +{ + CGV_EPOCODE color_qendpoint[8]; + CGV_INDEXPACKED color_index[2]; + CGV_EPOCODE alpha_qendpoint[2]; + CGV_INDEXPACKED alpha_index[2]; + + CGV_CMPOUTPACKED rotated_channel = params->rotated_channel; + CGV_SHIFT32 idxMode = params->idxMode; + + icmp_swap_epocode(params->color_qendpoint, color_qendpoint, 8); + icmp_swap_indexpacked(params->best_color_index, color_index, 2); + icmp_swap_epocode(params->alpha_qendpoint, alpha_qendpoint, 2); + icmp_swap_indexpacked(params->best_alpha_index, alpha_index, 2); + + for (CGU_INT k = 0; k < 5; k++) cmp_out[k] = 0; + CGU_INT pos = 0; + + // mode 4 (5 bits) 00001 + icmp_Write32Bit(cmp_out, &pos, 5, 16); + + // rotation channel 2 bits + icmp_Write32Bit(cmp_out, &pos, 2, (rotated_channel + 1) & 3); + + // idxMode 1 bit + icmp_Write32Bit(cmp_out, &pos, 1, idxMode); + + if (!idxMode) + { + icmp_encode_apply_swap(color_qendpoint, 4, color_index, 2); + icmp_encode_apply_swap(alpha_qendpoint, 1, alpha_index, 3); + } + else + { + icmp_swap_indexpacked(color_index, alpha_index, 2); + icmp_encode_apply_swap(alpha_qendpoint, 1, color_index, 2); + icmp_encode_apply_swap(color_qendpoint, 4, alpha_index, 3); + } + + // color endpoints 5 bits each + // R0 : R1 + // G0 : G1 + // B0 : B1 + for (CGU_INT p = 0; p < 3; p++) + { + CGV_EPOCODE c0 = color_qendpoint[0 + p]; + CGV_EPOCODE c1 = color_qendpoint[4 + p]; + icmp_Write32Bit(cmp_out, &pos, 5, c0); // 0 + icmp_Write32Bit(cmp_out, &pos, 5, c1); // 1 + } + + // alpha endpoints (6 bits each) + // A0 : A1 + icmp_Write32Bit(cmp_out, &pos, 6, alpha_qendpoint[0]); + icmp_Write32Bit(cmp_out, &pos, 6, alpha_qendpoint[1]); + + // index data (color index 2 bits each) 31 bits total + icmp_encode_index(cmp_out, &pos, color_index, 2, 0); + + // index data (alpha index 3 bits each) 47 bits total + icmp_encode_index(cmp_out, &pos, alpha_index, 3, 0); +} + +void icmp_Encode_mode5(CGV_CMPOUTPACKED cmp_out[5], varying cmp_mode_parameters* uniform params) +{ + + CGV_EPOCODE qep[8]; + CGV_INDEXPACKED color_index[2]; + CGV_EPOCODE alpha_qendpoint[2]; + CGV_INDEXPACKED alpha_index[2]; + + icmp_swap_epocode(params->color_qendpoint, qep, 8); + icmp_swap_indexpacked(params->best_color_index, color_index, 2); + icmp_swap_epocode(params->alpha_qendpoint, alpha_qendpoint, 2); + icmp_swap_indexpacked(params->best_alpha_index, alpha_index, 2); + + CGV_CMPOUTPACKED rotated_channel = params->rotated_channel; + + icmp_encode_apply_swap(qep, 4, color_index, 2); + icmp_encode_apply_swap(alpha_qendpoint, 1, alpha_index, 2); + + for (CGU_INT k = 0; k < 5; k++) cmp_out[k] = 0; + CGU_INT pos = 0; + + // mode 5 + icmp_Write32Bit(cmp_out, &pos, 6, 1 << 5); + + // rotated channel + icmp_Write32Bit(cmp_out, &pos, 2, (rotated_channel + 1) & 3); + + // endpoints + for (CGU_INT p = 0; p < 3; p++) + { + icmp_Write32Bit(cmp_out, &pos, 7, qep[0 + p]); + icmp_Write32Bit(cmp_out, &pos, 7, qep[4 + p]); + } + + // alpha endpoints + icmp_Write32Bit(cmp_out, &pos, 8, alpha_qendpoint[0]); + icmp_Write32Bit(cmp_out, &pos, 8, alpha_qendpoint[1]); + + // quantized values + icmp_encode_index(cmp_out, &pos, color_index, 2, 0); + icmp_encode_index(cmp_out, &pos, alpha_index, 2, 0); + +} + +void icmp_encode_mode6(CGV_CMPOUTPACKED cmp_out[5], CGV_EPOCODE qep[8], CGV_INDEXPACKED color_index[2]) +{ + icmp_encode_apply_swap(qep, 4, color_index, 4); + + for (CGU_INT k = 0; k < 5; k++) cmp_out[k] = 0; + CGU_INT pos = 0; + + // mode 6 + icmp_Write32Bit(cmp_out, &pos, 7, 64); + + // endpoints + for (CGU_INT p = 0; p < 4; p++) + { + icmp_Write32Bit(cmp_out, &pos, 7, qep[0 + p] >> 1); + icmp_Write32Bit(cmp_out, &pos, 7, qep[4 + p] >> 1); + } + + // p bits + icmp_Write32Bit(cmp_out, &pos, 1, qep[0] & 1); + icmp_Write32Bit(cmp_out, &pos, 1, qep[4] & 1); + + // quantized values + icmp_encode_index(cmp_out, &pos, color_index, 4, 0); +} + +/////////////////////////// +// PCA helpers + +INLINE void icmp_compute_stats_masked(CGV_IMAGE stats[15], CGV_IMAGE image_src[64], CGV_MASK mask, CGU_CHANNEL channels) +{ + for (CGU_INT i = 0; i < 15; i++) stats[i] = 0; + + CGV_MASK mask_shifted = mask << 1; + for (CGU_INT k = 0; k < 16; k++) + { + mask_shifted >>= 1; + //if ((mask_shifted&1) == 0) continue; + CGV_MASK flag = (mask_shifted & 1); + + CGV_IMAGE rgba[4]; + for (CGU_CHANNEL ch = 0; ch < channels; ch++) rgba[ch] = image_src[k + ch * 16]; + + for (CGU_CHANNEL ch = 0; ch < channels; ch++) rgba[ch] *= flag; + stats[14] += flag; + + stats[10] += rgba[0]; + stats[11] += rgba[1]; + stats[12] += rgba[2]; + + stats[0] += rgba[0] * rgba[0]; + stats[1] += rgba[0] * rgba[1]; + stats[2] += rgba[0] * rgba[2]; + + stats[4] += rgba[1] * rgba[1]; + stats[5] += rgba[1] * rgba[2]; + + stats[7] += rgba[2] * rgba[2]; + + if (channels == 4) + { + stats[13] += rgba[3]; + + stats[3] += rgba[0] * rgba[3]; + stats[6] += rgba[1] * rgba[3]; + stats[8] += rgba[2] * rgba[3]; + stats[9] += rgba[3] * rgba[3]; + } + } +} + +INLINE void icmp_covar_from_stats(CGV_IMAGE covar[10], CGV_IMAGE stats[15], CGU_CHANNEL channels3or4) +{ + covar[0] = stats[0] - stats[10 + 0] * stats[10 + 0] / stats[14]; + covar[1] = stats[1] - stats[10 + 0] * stats[10 + 1] / stats[14]; + covar[2] = stats[2] - stats[10 + 0] * stats[10 + 2] / stats[14]; + + covar[4] = stats[4] - stats[10 + 1] * stats[10 + 1] / stats[14]; + covar[5] = stats[5] - stats[10 + 1] * stats[10 + 2] / stats[14]; + + covar[7] = stats[7] - stats[10 + 2] * stats[10 + 2] / stats[14]; + + if (channels3or4 == 4) + { + covar[3] = stats[3] - stats[10 + 0] * stats[10 + 3] / stats[14]; + covar[6] = stats[6] - stats[10 + 1] * stats[10 + 3] / stats[14]; + covar[8] = stats[8] - stats[10 + 2] * stats[10 + 3] / stats[14]; + covar[9] = stats[9] - stats[10 + 3] * stats[10 + 3] / stats[14]; + } +} + +INLINE void icmp_compute_covar_dc_masked(CGV_IMAGE covar[6], CGV_IMAGE dc[3], CGV_IMAGE image_src[64], CGV_MASK mask, CGU_INT channels3or4) +{ + CGV_IMAGE stats[15]; + icmp_compute_stats_masked(stats, image_src, mask, channels3or4); + + icmp_covar_from_stats(covar, stats, channels3or4); + for (CGU_INT ch = 0; ch < channels3or4; ch++) dc[ch] = stats[10 + ch] / stats[14]; +} + +INLINE void icmp_ssymv3(CGV_IMAGE a[4], CGV_IMAGE covar[10], CGV_IMAGE b[4]) +{ + a[0] = covar[0] * b[0] + covar[1] * b[1] + covar[2] * b[2]; + a[1] = covar[1] * b[0] + covar[4] * b[1] + covar[5] * b[2]; + a[2] = covar[2] * b[0] + covar[5] * b[1] + covar[7] * b[2]; +} + +INLINE void icmp_ssymv4_2(CGV_IMAGE a[4], CGV_IMAGE covar[10], CGV_IMAGE b[4]) +{ + a[0] = covar[0] * b[0] + covar[1] * b[1] + covar[2] * b[2] + covar[3] * b[3]; + a[1] = covar[1] * b[0] + covar[4] * b[1] + covar[5] * b[2] + covar[6] * b[3]; + a[2] = covar[2] * b[0] + covar[5] * b[1] + covar[7] * b[2] + covar[8] * b[3]; + a[3] = covar[3] * b[0] + covar[6] * b[1] + covar[8] * b[2] + covar[9] * b[3]; +} + +#ifndef ASPM +// Computes inverse square root over an implementation-defined range. The maximum error is implementation-defined. +CGV_IMAGE Image_rsqrt(CGV_IMAGE f) +{ + CGV_IMAGE sf = sqrt(f); + if (sf != 0) + return 1 / sqrt(f); + else + return 0.0f; +} +#endif + +INLINE void icmp_compute_axis(CGV_IMAGE axis[4], + CGV_IMAGE covar[10], +#ifdef ASPM_GPU + CGV_ITTERATIONS powerIterations, +#else + uniform __constant CGV_ITTERATIONS powerIterations, +#endif + CGU_CHANNEL channels) +{ + CGV_IMAGE vec[4] = { 1,1,1,1 }; + + for (CGU_INT i = 0; i < powerIterations; i++) + { + if (channels == 3) icmp_ssymv3(axis, covar, vec); + if (channels == 4) icmp_ssymv4_2(axis, covar, vec); + + for (CGU_CHANNEL ch = 0; ch < channels; ch++) vec[ch] = axis[ch]; + + if (i % 2 == 1) // renormalize every other iteration + { + CGV_IMAGE norm_sq = 0; + for (CGU_CHANNEL ch = 0; ch < channels; ch++) + norm_sq += axis[ch] * axis[ch]; + +#ifndef ASPM + CGV_IMAGE rnorm = Image_rsqrt(norm_sq); +#else + CGV_IMAGE rnorm = rsqrt(norm_sq); +#endif + for (CGU_CHANNEL ch = 0; ch < channels; ch++) vec[ch] *= rnorm; + } + } + + for (CGU_CHANNEL ch = 0; ch < channels; ch++) axis[ch] = vec[ch]; +} + +void icmp_block_pca_axis(CGV_IMAGE axis[4], CGV_IMAGE dc[4], CGV_IMAGE image_src[64], CGV_MASK mask, CGU_INT channels3or4) +{ + uniform __constant CGV_ITTERATIONS powerIterations = 8; // 4 not enough for HQ + + CGV_IMAGE covar[10]; + icmp_compute_covar_dc_masked(covar, dc, image_src, mask, channels3or4); + + CGV_IMAGE inv_var = 1.0 / (256 * 256); + for (CGU_INT k = 0; k < 10; k++) + { + covar[k] *= inv_var; + } + + CGV_IMAGE eps = sq_image(0.001F); + covar[0] += eps; + covar[4] += eps; + covar[7] += eps; + covar[9] += eps; + + icmp_compute_axis(axis, covar, powerIterations, channels3or4); +} + +CGV_IMAGE minImage(CGV_IMAGE a, CGV_IMAGE b) { return a < b ? a : b; } +CGV_IMAGE maxImage(CGV_IMAGE a, CGV_IMAGE b) { return a > b ? a : b; } + + +void icmp_block_segment_core(CGV_IMAGE epo_code[], CGV_IMAGE image_src[64], CGV_MASK mask, CGU_INT channels3or4) +{ + CGV_IMAGE axis[4]; + CGV_IMAGE dc[4]; + icmp_block_pca_axis(axis, dc, image_src, mask, channels3or4); + + CGV_IMAGE ext[2]; + ext[0] = +1e32; + ext[1] = -1e32; + + // find min/max + CGV_MASK mask_shifted = mask << 1; + for (CGU_INT k = 0; k < 16; k++) + { + mask_shifted >>= 1; + if ((mask_shifted & 1) == 0) continue; + + CGV_IMAGE dot = 0; + for (CGU_INT ch = 0; ch < channels3or4; ch++) + dot += axis[ch] * (image_src[16 * ch + k] - dc[ch]); + + ext[0] = minImage(ext[0], dot); + ext[1] = maxImage(ext[1], dot); + } + + // create some distance if the endpoints collapse + if (ext[1] - ext[0] < 1.0f) + { + ext[0] -= 0.5f; + ext[1] += 0.5f; + } + + for (CGU_INT i = 0; i < 2; i++) + for (CGU_INT ch = 0; ch < channels3or4; ch++) + { + epo_code[4 * i + ch] = ext[i] * axis[ch] + dc[ch]; + } +} + +INLINE CGV_IMAGE clampf(CGV_IMAGE v, CGV_IMAGE a, CGV_IMAGE b) +{ + if (v < a) + return a; + else + if (v > b) + return b; + return v; +} + + +void icmp_get_block_endpoints(CGV_IMAGE block_endpoints[], CGV_IMAGE image_src[64], CGV_MASK mask, CGU_CHANNEL channels3or4) +{ + icmp_block_segment_core(block_endpoints, image_src, mask, channels3or4); + + for (CGU_INT i = 0; i < 2; i++) + for (CGU_INT ch = 0; ch < channels3or4; ch++) + { + block_endpoints[4 * i + ch] = clampf(block_endpoints[4 * i + ch], 0.0f, 255.0f); + } +} + +void icmp_ep_quant0367_2(CGV_EPOCODE qep[], CGV_IMAGE ep[], CGU_INT blockMode, CGU_INT channels) +{ + CGU_INT bits = 7; + if (blockMode == 0) bits = 4; + if (blockMode == 7) bits = 5; + + CGU_INT levels = 1 << bits; + CGU_INT levels2 = levels * 2 - 1; + + for (CGU_INT i = 0; i < 2; i++) + { + CGV_EPOCODE qep_b[8]; + + for (CGU_INT b = 0; b < 2; b++) + for (CGU_INT p = 0; p < 4; p++) + { + CGV_EPOCODE v = (CGV_TYPEINT)((ep[i * 4 + p] / 255.0f*levels2 - b) / 2.0f + 0.5f) * 2 + b; + qep_b[b * 4 + p] = clampEPO(v, b, levels2 - 1 + b); + } + + CGV_IMAGE ep_b[8]; + for (CGU_INT j = 0; j < 8; j++) + ep_b[j] = qep_b[j]; + + if (blockMode == 0) + for (CGU_INT j = 0; j < 8; j++) + ep_b[j] = expandEPObits(qep_b[j], 5); + + CGV_ERROR err0 = 0.0f; + CGV_ERROR err1 = 0.0f; + for (CGU_INT ch = 0; ch < channels; ch++) + { + err0 += sq_image(ep[i * 4 + ch] - ep_b[0 + ch]); + err1 += sq_image(ep[i * 4 + ch] - ep_b[4 + ch]); + } + + for (CGU_INT p = 0; p < 4; p++) + qep[i * 4 + p] = (err0 < err1) ? qep_b[0 + p] : qep_b[4 + p]; + } +} + +void icmp_ep_quant245_2(CGV_EPOCODE qep[], CGV_IMAGE ep[], CGU_INT mode) +{ + CGU_INT bits = 5; + if (mode == 5) bits = 7; + CGU_INT levels = 1 << bits; + + for (CGU_INT i = 0; i < 8; i++) + { + CGV_EPOCODE v = ((CGV_TYPEINT)(ep[i] / 255.0f*(levels - 1) + 0.5)); + qep[i] = clampEPO(v, 0, levels - 1); + } +} + +void icmp_ep_quant1_2(CGV_EPOCODE qep[], CGV_IMAGE ep[], CGU_INT mode) +{ + CGV_EPOCODE qep_b[16]; + + for (CGU_INT b = 0; b < 2; b++) + for (CGU_INT i = 0; i < 8; i++) + { + CGV_EPOCODE v = ((CGV_TYPEINT)((ep[i] / 255.0f*127.0f - b) / 2 + 0.5)) * 2 + b; + qep_b[b * 8 + i] = clampEPO(v, b, 126 + b); + } + + // dequant + CGV_IMAGE ep_b[16]; + for (CGU_INT k = 0; k < 16; k++) + ep_b[k] = expandEPObits(qep_b[k], 7); + + CGV_ERROR err0 = 0.0f; + CGV_ERROR err1 = 0.0f; + for (CGU_INT j = 0; j < 2; j++) + for (CGU_INT p = 0; p < 3; p++) + { + err0 += sq_image(ep[j * 4 + p] - ep_b[0 + j * 4 + p]); + err1 += sq_image(ep[j * 4 + p] - ep_b[8 + j * 4 + p]); + } + + for (CGU_INT i = 0; i < 8; i++) + qep[i] = (err0 < err1) ? qep_b[0 + i] : qep_b[8 + i]; + +} + +void icmp_ep_quant2_2(CGV_EPOCODE qep[], CGV_IMAGE ep[], CGU_INT blockMode, CGU_INT channels3or4) +{ + //assert(mode <= 7); + CMP_STATIC uniform __constant CGV_SUBSETS SubSetTable[] = { 3,2,3,2,1,1,1,2 }; +#ifndef ASPM_GPU + uniform CMP_CONSTANT +#endif + CGV_SUBSETS maxSubSets = SubSetTable[blockMode]; + + if (blockMode == 0 || blockMode == 3 || blockMode == 6 || blockMode == 7) + { + for (CGU_INT i = 0; i < maxSubSets; i++) + icmp_ep_quant0367_2(&qep[i * 8], &ep[i * 8], blockMode, channels3or4); + } + else + if (blockMode == 1) + { + for (CGU_INT i = 0; i < maxSubSets; i++) + icmp_ep_quant1_2(&qep[i * 8], &ep[i * 8], blockMode); + } + else + if (blockMode == 2 || blockMode == 4 || blockMode == 5) + { + for (CGU_INT i = 0; i < maxSubSets; i++) + icmp_ep_quant245_2(&qep[i * 8], &ep[i * 8], blockMode); + } + // else + // assert(false); + +} + +void icmp_ep_dequant2(CGV_IMAGE ep[], CGV_EPOCODE qep[], CGU_INT blockMode) +{ + //assert(mode <= 7); + CMP_STATIC uniform __constant CGV_SUBSETS subSetTable[] = { 3,2,3,2,1,1,1,2 }; +#ifndef ASPM_GPU + uniform CMP_CONSTANT +#endif + CGV_SUBSETS maxSubSets = subSetTable[blockMode]; + + // mode 3, 6 are 8-bit + if (blockMode == 3 || blockMode == 6) + { + for (CGU_INT i = 0; i < 8 * maxSubSets; i++) + ep[i] = qep[i]; + } + else + if (blockMode == 1 || blockMode == 5) + { + for (CGU_INT i = 0; i < 8 * maxSubSets; i++) + ep[i] = expandEPObits(qep[i], 7); + } + else + if (blockMode == 0 || blockMode == 2 || blockMode == 4) + { + for (CGU_INT i = 0; i < 8 * maxSubSets; i++) + ep[i] = expandEPObits(qep[i], 5); + } + else + if (blockMode == 7) + { + for (CGU_INT i = 0; i < 8 * maxSubSets; i++) + ep[i] = expandEPObits(qep[i], 6); + } + //else + // assert(false); +} + +void icmp_GetQuantizedEpoCode(CGV_EPOCODE epo_code_out[], CGV_IMAGE block_endpoints[], CGU_INT blockMode, CGU_CHANNEL channels3or4) +{ + icmp_ep_quant2_2(epo_code_out, block_endpoints, blockMode, channels3or4); + icmp_ep_dequant2(block_endpoints, epo_code_out, blockMode); +} + +void icmp_ep_quant_dequant_mode4(CGV_EPOCODE qep[], CGV_IMAGE ep[]) +{ + icmp_ep_quant2_2(qep, ep, 4, 3); + icmp_ep_dequant2(ep, qep, 4); +} + +/////////////////////////// +// pixel quantization +//======================================== +// Modified Intel Texture Compression Code +//======================================== + +INLINE uniform __constant CGV_RAMP* uniform icmp_GetRamp(CGU_INT bits) +{ + //assert(bits>=2 && bits<=4); // invalid bit size + + CMP_STATIC uniform __constant CGV_RAMP unquant_table_2bits[] = { 0, 21, 43, 64 }; + CMP_STATIC uniform __constant CGV_RAMP unquant_table_3bits[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; + CMP_STATIC uniform __constant CGV_RAMP unquant_table_4bits[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + + uniform __constant CGV_RAMP* uniform unquant_tables[] = { unquant_table_2bits, unquant_table_3bits, unquant_table_4bits }; + + return unquant_tables[bits - 2]; +} + +#ifdef USE_VARYING +INLINE CGV_IMAGE gather_image(varying CGV_IMAGE* uniform ptr, CGV_SHIFT32 idx) +{ + return ptr[idx]; // (perf warning expected) +} +#endif + +INLINE CGV_RAMP gather_ramp( +#ifdef ASPM_GPU + CMP_CONSTANT CGV_RAMP* ptr, +#else + CMP_CONSTANT CGV_RAMP* CMP_CONSTANT uniform ptr, +#endif + CGV_INDEX idx) +{ + return ptr[idx]; // (perf warning expected) +} + +CGV_ERROR icmp_GetQuantizeIndex( + CGV_INDEXPACKED index_packed_out[2], + CGV_INDEX index_out[MAX_SUBSET_SIZE], + CGV_IMAGE image_src[64], + CGU_INT bits, + CGV_IMAGE image_block[], + CGV_SHIFT32 pattern, + CGU_CHANNEL channels3or4) +{ + CGV_ERROR total_err = 0; + uniform __constant CGV_RAMP* uniform Ramp = icmp_GetRamp(bits); + CGV_LEVELS levels = 1 << bits; + + // 64-bit color_qendpoint: 5% overhead in this function + for (CGU_INT k = 0; k < 2; k++) index_packed_out[k] = 0; + + CGV_SHIFT32 pattern_shifted = pattern; + for (CGU_INT k = 0; k < 16; k++) + { + CGV_SHIFT32 j = pattern_shifted & 3; + pattern_shifted >>= 2; + + CGV_IMAGE proj = 0; + CGV_IMAGE div = 0; + for (CGU_CHANNEL ch = 0; ch < channels3or4; ch++) + { +#ifdef USE_VARYING + CGV_IMAGE ep_a = gather_image(image_block, 8 * j + 0 + ch); + CGV_IMAGE ep_b = gather_image(image_block, 8 * j + 4 + ch); +#else + CGV_IMAGE ep_a = image_block[8 * j + 0 + ch]; + CGV_IMAGE ep_b = image_block[8 * j + 4 + ch]; +#endif + proj += (image_src[k + ch * 16] - ep_a)*(ep_b - ep_a); + div += sq_image(ep_b - ep_a); + } + + proj /= div; + + CGV_INDEX index_q1 = (CGV_INDEX)(proj*levels + 0.5); + index_q1 = clampIndex(index_q1, 1, levels - 1); + + CGV_ERROR err0 = 0; + CGV_ERROR err1 = 0; + CGV_RAMP ramp0 = gather_ramp(Ramp, index_q1 - 1); + CGV_RAMP ramp1 = gather_ramp(Ramp, index_q1); + + for (CGU_CHANNEL ch = 0; ch < channels3or4; ch++) + { +#ifdef USE_VARYING + CGV_IMAGE ep_a = gather_image(image_block, 8 * j + 0 + ch); + CGV_IMAGE ep_b = gather_image(image_block, 8 * j + 4 + ch); +#else + CGV_IMAGE ep_a = image_block[8 * j + 0 + ch]; + CGV_IMAGE ep_b = image_block[8 * j + 4 + ch]; +#endif + CGV_IMAGE dec_v0 = (CGV_TYPEINT)(((64 - ramp0)*ep_a + ramp0 * ep_b + 32) / 64); + CGV_IMAGE dec_v1 = (CGV_TYPEINT)(((64 - ramp1)*ep_a + ramp1 * ep_b + 32) / 64); + err0 += sq_image(dec_v0 - image_src[k + ch * 16]); + err1 += sq_image(dec_v1 - image_src[k + ch * 16]); + } + + CGV_ERROR best_err = err1; + CGV_INDEX best_index = index_q1; + if (err0 < err1) + { + best_err = err0; + best_index = index_q1 - 1; + } + + index_out[k] = best_index; + index_packed_out[k / 8] += ((CGV_INDEXPACKED)best_index) << 4 * (k % 8); + total_err += best_err; + } + + return total_err; +} + +/////////////////////////// +// LS endpoint refinement + +void icmp_opt_endpoints(CGV_IMAGE ep[], CGV_IMAGE image_src[64], CGU_INT bits, CGV_INDEXPACKED color_qendpoint[2], CGV_MASK mask, CGU_CHANNEL channels3or4) +{ + CGU_INT levels = 1 << bits; + + CGV_IMAGE Atb1[4] = { 0,0,0,0 }; + CGV_IMAGE sum_q = 0; + CGV_IMAGE sum_qq = 0; + CGV_IMAGE sum[5] = { 0,0,0,0,0 }; + + CGV_MASK mask_shifted = mask << 1; + for (CGU_INT k1 = 0; k1 < 2; k1++) + { + CGV_INDEXPACKED qbits_shifted = color_qendpoint[k1]; + for (CGU_INT k2 = 0; k2 < 8; k2++) + { + CGU_INT k = k1 * 8 + k2; + CGV_IMAGE q = (CGV_TYPEINT)(qbits_shifted & 15); + + qbits_shifted >>= 4; + + mask_shifted >>= 1; + if ((mask_shifted & 1) == 0) continue; + + CGV_LEVELS x = (levels - 1) - q; + CGV_LEVELS y = q; + + sum_q += q; + sum_qq += q * q; + + sum[4] += 1; + for (CGU_CHANNEL ch = 0; ch < channels3or4; ch++) sum[ch] += image_src[k + ch * 16]; + for (CGU_CHANNEL ch = 0; ch < channels3or4; ch++) Atb1[ch] += x * image_src[k + ch * 16]; + } + } + + CGV_IMAGE Atb2[4]; + for (CGU_CHANNEL ch = 0; ch < channels3or4; ch++) + { + //sum[ch] = dc[ch]*16; + Atb2[ch] = (levels - 1)*sum[ch] - Atb1[ch]; + } + + CGV_IMAGE Cxx = sum[4] * sq_image(levels - 1) - 2 * (levels - 1)*sum_q + sum_qq; + CGV_IMAGE Cyy = sum_qq; + CGV_IMAGE Cxy = (levels - 1)*sum_q - sum_qq; + CGV_IMAGE scale = (levels - 1) / (Cxx*Cyy - Cxy * Cxy); + + for (CGU_CHANNEL ch = 0; ch < channels3or4; ch++) + { + ep[0 + ch] = (Atb1[ch] * Cyy - Atb2[ch] * Cxy)*scale; + ep[4 + ch] = (Atb2[ch] * Cxx - Atb1[ch] * Cxy)*scale; + + //ep[0+ch] = clamp(ep[0+ch], 0, 255); + //ep[4+ch] = clamp(ep[4+ch], 0, 255); + } + + if (img_absf(Cxx*Cyy - Cxy * Cxy) < 0.001f) + { + // flatten + for (CGU_CHANNEL ch = 0; ch < channels3or4; ch++) + { + ep[0 + ch] = sum[ch] / sum[4]; + ep[4 + ch] = ep[0 + ch]; + } + } +} + +////////////////////////// +// parameter estimation + +void icmp_channel_quant_dequant2(CGV_EPOCODE qep[2], CGV_IMAGE ep[2], CGU_INT epbits) +{ + CGV_LEVELS elevels = (1 << epbits); + + for (CGU_INT i = 0; i < 2; i++) + { + CGV_EPOCODE v = ((CGV_EPOCODE)(ep[i] / 255.0f*(elevels - 1) + 0.5f)); + qep[i] = clampEPO(v, 0, elevels - 1); + ep[i] = expandEPObits(qep[i], epbits); + } +} + +void icmp_refineEndpoints(CGV_IMAGE ep[2], CGV_IMAGE block[16], CGU_INT bits, CGV_INDEXPACKED color_index[2]) +{ + CGU_INT levels = 1 << bits; + + CGV_IMAGE Atb1 = 0; + CGV_IMAGE sum_q = 0; + CGV_IMAGE sum_qq = 0; + CGV_IMAGE sum = 0; + + for (CGU_INT k1 = 0; k1 < 2; k1++) + { + CGV_INDEXPACKED qbits_shifted = color_index[k1]; + for (CGU_INT k2 = 0; k2 < 8; k2++) + { + CGU_INT k = k1 * 8 + k2; + CGV_IMAGE q = (CGV_TYPEINT)(qbits_shifted & 15); + qbits_shifted >>= 4; + + CGV_TYPEINT x = (levels - 1) - q; + CGV_TYPEINT y = q; + + sum_q += q; + sum_qq += q * q; + + sum += block[k]; + Atb1 += x * block[k]; + } + } + + CGV_IMAGE Atb2 = (levels - 1)*sum - Atb1; + + CGV_IMAGE Cxx = 16 * sq_image(levels - 1) - 2 * (levels - 1)*sum_q + sum_qq; + CGV_IMAGE Cyy = sum_qq; + CGV_IMAGE Cxy = (levels - 1)*sum_q - sum_qq; + CGV_IMAGE scale = (levels - 1) / (Cxx*Cyy - Cxy * Cxy); + + ep[0] = (Atb1*Cyy - Atb2 * Cxy)*scale; + ep[1] = (Atb2*Cxx - Atb1 * Cxy)*scale; + + ep[0] = clampf(ep[0], 0.0f, 255.0f); + ep[1] = clampf(ep[1], 0.0f, 255.0f); + + if (img_absf(Cxx*Cyy - Cxy * Cxy) < 0.001) + { + ep[0] = sum / 16; + ep[1] = ep[0]; + } +} + +CGV_ERROR icmp_channelQuantizeIndex(CGV_INDEXPACKED color_index[2], CGV_INDEX index[MAX_SUBSET_SIZE], CGV_IMAGE block[16], CGU_INT bits, CGV_IMAGE ep[]) +{ + uniform __constant CGV_RAMP* uniform Ramp = icmp_GetRamp(bits); + CGV_LEVELS levels = (1 << bits); + + color_index[0] = 0; + color_index[1] = 0; + + CGV_ERROR total_err = 0; + + for (CGU_INT k = 0; k < 16; k++) + { + CGV_IMAGE proj = (block[k] - ep[0]) / (ep[1] - ep[0] + 0.001f); + + CGV_INDEX q1 = (CGV_TYPEINT)(proj*levels + 0.5); + q1 = clampEPO(q1, 1, levels - 1); + + CGV_ERROR err0 = 0; + CGV_ERROR err1 = 0; + CGV_RAMP ramp0 = gather_ramp(Ramp, q1 - 1); + CGV_RAMP ramp1 = gather_ramp(Ramp, q1); + + CGV_IMAGE dec_v0 = (CGV_TYPEINT)(((64 - ramp0)*ep[0] + ramp0 * ep[1] + 32) / 64); + CGV_IMAGE dec_v1 = (CGV_TYPEINT)(((64 - ramp1)*ep[0] + ramp1 * ep[1] + 32) / 64); + err0 += sq_image(dec_v0 - block[k]); + err1 += sq_image(dec_v1 - block[k]); + + CGV_TYPEINT best_err = err1; + CGV_INDEX best_q = q1; + if (err0 < err1) + { + best_err = err0; + best_q = q1 - 1; + } + + index[k] = best_q; + color_index[k / 8] += ((CGV_INDEXPACKED)best_q) << 4 * (k % 8); + total_err += best_err; + } + + return total_err; +} + +CGV_ERROR icmp_optQuantizeIndex(BC7_EncodeState EncodeState[], CGV_INDEXPACKED color_index[2], CGV_INDEX index[MAX_SUBSET_SIZE], CGV_EPOCODE qep[2], CGV_IMAGE block[16], CGU_INT bits, CGU_INT epbits) +{ + CGV_IMAGE ep[2] = { 255,0 }; + + for (CGU_INT k = 0; k < 16; k++) + { + ep[0] = minImage(ep[0], block[k]); + ep[1] = maxImage(ep[1], block[k]); + } + + icmp_channel_quant_dequant2(qep, ep, epbits); + CGV_ERROR err = icmp_channelQuantizeIndex(color_index, index, block, bits, ep); + + // refine +#ifndef ASPM_GPU + uniform CMP_CONSTANT +#endif + CGV_ITTERATIONS refineIterations = EncodeState->refineIterations; + for (CGU_INT i = 0; i < refineIterations; i++) + { + icmp_refineEndpoints(ep, block, bits, color_index); + icmp_channel_quant_dequant2(qep, ep, epbits); + err = icmp_channelQuantizeIndex(color_index, index, block, bits, ep); + } + + return err; +} + + +INLINE CGV_SHIFT32 icmp_get_pattern2(CGV_PARTID part_id) +{ + CMP_STATIC uniform __constant CGV_SHIFT32 pattern_table[] = { + 0x50505050u, 0x40404040u, 0x54545454u, 0x54505040u, 0x50404000u, 0x55545450u, 0x55545040u, 0x54504000u, + 0x50400000u, 0x55555450u, 0x55544000u, 0x54400000u, 0x55555440u, 0x55550000u, 0x55555500u, 0x55000000u, + 0x55150100u, 0x00004054u, 0x15010000u, 0x00405054u, 0x00004050u, 0x15050100u, 0x05010000u, 0x40505054u, + 0x00404050u, 0x05010100u, 0x14141414u, 0x05141450u, 0x01155440u, 0x00555500u, 0x15014054u, 0x05414150u, + 0x44444444u, 0x55005500u, 0x11441144u, 0x05055050u, 0x05500550u, 0x11114444u, 0x41144114u, 0x44111144u, + 0x15055054u, 0x01055040u, 0x05041050u, 0x05455150u, 0x14414114u, 0x50050550u, 0x41411414u, 0x00141400u, + 0x00041504u, 0x00105410u, 0x10541000u, 0x04150400u, 0x50410514u, 0x41051450u, 0x05415014u, 0x14054150u, + 0x41050514u, 0x41505014u, 0x40011554u, 0x54150140u, 0x50505500u, 0x00555050u, 0x15151010u, 0x54540404u, + 0xAA685050u, 0x6A5A5040u, 0x5A5A4200u, 0x5450A0A8u, 0xA5A50000u, 0xA0A05050u, 0x5555A0A0u, 0x5A5A5050u, + 0xAA550000u, 0xAA555500u, 0xAAAA5500u, 0x90909090u, 0x94949494u, 0xA4A4A4A4u, 0xA9A59450u, 0x2A0A4250u, + 0xA5945040u, 0x0A425054u, 0xA5A5A500u, 0x55A0A0A0u, 0xA8A85454u, 0x6A6A4040u, 0xA4A45000u, 0x1A1A0500u, + 0x0050A4A4u, 0xAAA59090u, 0x14696914u, 0x69691400u, 0xA08585A0u, 0xAA821414u, 0x50A4A450u, 0x6A5A0200u, + 0xA9A58000u, 0x5090A0A8u, 0xA8A09050u, 0x24242424u, 0x00AA5500u, 0x24924924u, 0x24499224u, 0x50A50A50u, + 0x500AA550u, 0xAAAA4444u, 0x66660000u, 0xA5A0A5A0u, 0x50A050A0u, 0x69286928u, 0x44AAAA44u, 0x66666600u, + 0xAA444444u, 0x54A854A8u, 0x95809580u, 0x96969600u, 0xA85454A8u, 0x80959580u, 0xAA141414u, 0x96960000u, + 0xAAAA1414u, 0xA05050A0u, 0xA0A5A5A0u, 0x96000000u, 0x40804080u, 0xA9A8A9A8u, 0xAAAAAA44u, 0x2A4A5254u + }; + + return gather_uint32(pattern_table, part_id); +} + +CGV_IMAGE icmp_get_pca_bound(CGV_IMAGE covar[10], CGU_CHANNEL channels) +{ + uniform __constant CGV_TYPEINT powerIterations = 4; // quite approximative, but enough for bounding + + CGV_IMAGE inv_var = 1.0 / (256 * 256); + for (CGU_INT k = 0; k < 10; k++) + { + covar[k] *= inv_var; + } + + CGV_IMAGE eps = sq_image(0.001); + covar[0] += eps; + covar[4] += eps; + covar[7] += eps; + + CGV_IMAGE axis[4]; + icmp_compute_axis(axis, covar, powerIterations, channels); + + CGV_IMAGE vec[4]; + if (channels == 3) icmp_ssymv3(vec, covar, axis); + if (channels == 4) icmp_ssymv4_2(vec, covar, axis); + + CGV_IMAGE sq_sum = 0.0f; + for (CGU_INT p = 0; p < channels; p++) sq_sum += sq_image(vec[p]); + CGV_IMAGE lambda = sqrt(sq_sum); + + CGV_IMAGE bound = covar[0] + covar[4] + covar[7]; + if (channels == 4) bound += covar[9]; + bound -= lambda; + bound = maxImage(bound, 0.0f); + + return bound; +} + +CGV_IMAGE icmp_block_pca_bound_split2(CGV_IMAGE image_src[64], CGV_MASK mask, CGV_IMAGE full_stats[15], CGU_CHANNEL channels) +{ + CGV_IMAGE stats[15]; + icmp_compute_stats_masked(stats, image_src, mask, channels); + + CGV_IMAGE covar1[10]; + icmp_covar_from_stats(covar1, stats, channels); + + for (CGU_INT i = 0; i < 15; i++) + stats[i] = full_stats[i] - stats[i]; + + CGV_IMAGE covar2[10]; + icmp_covar_from_stats(covar2, stats, channels); + + CGV_IMAGE bound = 0.0f; + bound += icmp_get_pca_bound(covar1, channels); + bound += icmp_get_pca_bound(covar2, channels); + + return sqrt(bound) * 256; +} + + +#ifdef USE_VARYING +INLINE void scatter_partid(varying CGV_PARTID* uniform ptr, CGV_TYPEINT idx, CGV_PARTID value) +{ + ptr[idx] = value; // (perf warning expected) +} +#endif + +void icmp_sort_partlist(CGV_PARTID list[], CGU_INT length, CGU_INT partial_count) +{ + for (CGU_INT k = 0; k < partial_count; k++) + { + CGV_TYPEINT best_idx = k; + CGV_PARTID best_value = list[k]; + for (CGU_INT i = k + 1; i < length; i++) + { + if (best_value > list[i]) + { + best_value = list[i]; + best_idx = i; + } + } + + // swap +#ifdef USE_VARYING + scatter_partid(list, best_idx, list[k]); +#else + list[best_idx] = list[k]; +#endif + list[k] = best_value; + } +} + +INLINE void copy_epocode(CGV_EPOCODE u[], CGV_EPOCODE v[], CGU_INT n) +{ + for (CGU_INT i = 0; i < n; i++) + { + u[i] = v[i]; + } +} + + +INLINE void copy_indexpacked(CGV_INDEXPACKED u[], CGV_INDEXPACKED v[], CGU_INT n) +{ + for (CGU_INT i = 0; i < n; i++) + { + u[i] = v[i]; + } +} + + +void icmp_enc_mode4_candidate( + BC7_EncodeState EncodeState[], + cmp_mode_parameters best_candidate[], + CGV_ERROR best_err[], + CGU_INT rotated_channel, + CGU_INT idxMode) +{ + CGU_INT bits = 2; + CGU_INT abits = 3; + CGU_INT aepbits = 6; + + if (idxMode == 1) + { + bits = 3; + abits = 2; + } + + CGV_IMAGE src_block[48]; + for (CGU_INT k = 0; k < 16; k++) + { + for (CGU_INT p = 0; p < 3; p++) + src_block[k + p * 16] = EncodeState->image_src[k + p * 16]; + + if (rotated_channel < 3) + { + // apply channel rotation + if (EncodeState->channels == 4) src_block[k + rotated_channel * 16] = EncodeState->image_src[k + 3 * 16]; + if (EncodeState->channels == 3) src_block[k + rotated_channel * 16] = 255; + } + } + + CGV_IMAGE block_endpoints[8]; + CGV_INDEXPACKED color_index[2]; + CGV_INDEX c_index[MAX_SUBSET_SIZE]; + CGV_EPOCODE color_qendpoint[8]; + + icmp_get_block_endpoints(block_endpoints, src_block, -1, 3); + icmp_ep_quant_dequant_mode4(color_qendpoint, block_endpoints); + CGV_ERROR err = icmp_GetQuantizeIndex(color_index, c_index, src_block, bits, block_endpoints, 0, 3); + + // refine + CGU_INT refineIterations = EncodeState->refineIterations; + for (CGU_INT i = 0; i < refineIterations; i++) + { + icmp_opt_endpoints(block_endpoints, src_block, bits, color_index, -1, 3); + icmp_ep_quant_dequant_mode4(color_qendpoint, block_endpoints); + err = icmp_GetQuantizeIndex(color_index, c_index, src_block, bits, block_endpoints, 0, 3); + } + + // encoding selected channel + CGV_EPOCODE alpha_qendpoint[2]; + CGV_INDEXPACKED alpha_index[2]; + CGV_INDEX a_index[MAX_SUBSET_SIZE]; + err += icmp_optQuantizeIndex(EncodeState, alpha_index, a_index, alpha_qendpoint, &EncodeState->image_src[rotated_channel * 16], abits, aepbits); + + if (err < *best_err) + { + copy_epocode(best_candidate->color_qendpoint, color_qendpoint, 8); + copy_epocode(best_candidate->alpha_qendpoint, alpha_qendpoint, 2); + copy_indexpacked(best_candidate->best_color_index, color_index, 2); + copy_indexpacked(best_candidate->best_alpha_index, alpha_index, 2); + best_candidate->rotated_channel = rotated_channel; + best_candidate->idxMode = idxMode; + *best_err = err; + } +} + +void icmp_mode5_candidate( + BC7_EncodeState EncodeState[], + cmp_mode_parameters best_candidate[], + CGV_ERROR best_err[], + CGU_INT rotated_channel) +{ + CGU_INT bits = 2; + CGU_INT abits = 2; + CGU_INT aepbits = 8; + + CGV_IMAGE block[48]; + for (CGU_INT k = 0; k < 16; k++) + { + for (CGU_INT p = 0; p < 3; p++) + block[k + p * 16] = EncodeState->image_src[k + p * 16]; + + if (rotated_channel < 3) + { + // apply channel rotation + if (EncodeState->channels == 4) block[k + rotated_channel * 16] = EncodeState->image_src[k + 3 * 16]; + if (EncodeState->channels == 3) block[k + rotated_channel * 16] = 255; + } + } + + CGV_IMAGE block_endpoints[8]; + CGV_EPOCODE color_qendpoint[8]; + CGV_INDEXPACKED color_index[2]; + CGV_INDEX c_index[MAX_SUBSET_SIZE]; + + icmp_get_block_endpoints(block_endpoints, block, -1, 3); + icmp_GetQuantizedEpoCode(color_qendpoint, block_endpoints, 5, 3); + CGV_ERROR err = icmp_GetQuantizeIndex(color_index, c_index, block, bits, block_endpoints, 0, 3); + + // refine + CGU_INT refineIterations = EncodeState->refineIterations; + for (CGU_INT i = 0; i < refineIterations; i++) + { + icmp_opt_endpoints(block_endpoints, block, bits, color_index, -1, 3); + icmp_GetQuantizedEpoCode(color_qendpoint, block_endpoints, 5, 3); + err = icmp_GetQuantizeIndex(color_index, c_index, block, bits, block_endpoints, 0, 3); + } + + // encoding selected channel + CGV_EPOCODE alpha_qendpoint[2]; + CGV_INDEXPACKED alpha_index[2]; + CGV_INDEX a_index[MAX_SUBSET_SIZE]; + err += icmp_optQuantizeIndex(EncodeState, alpha_index, a_index, alpha_qendpoint, &EncodeState->image_src[rotated_channel * 16], abits, aepbits); + + if (err < *best_err) + { + + icmp_swap_epocode(best_candidate->color_qendpoint, color_qendpoint, 8); + icmp_swap_indexpacked(best_candidate->best_color_index, color_index, 2); + icmp_swap_epocode(best_candidate->alpha_qendpoint, alpha_qendpoint, 2); + icmp_swap_indexpacked(best_candidate->best_alpha_index, alpha_index, 2); + best_candidate->rotated_channel = rotated_channel; + *best_err = err; + } +} + + +// =============== Mode Compression + +CGV_ERROR icmp_enc_mode01237_part_fast( + CGV_EPOCODE qep[24], + CGV_INDEXPACKED color_index[2], + CGV_INDEX index[MAX_SUBSET_SIZE], + CGV_IMAGE image_src[64], + CGV_PARTID part_id, + CGU_INT blockMode) +{ + CGV_SHIFT32 pattern = icmp_get_pattern2(part_id); + CGU_INT bits = 2; if (blockMode == 0 || blockMode == 1) bits = 3; + CGU_INT maxSubSets = 2; if (blockMode == 0 || blockMode == 2) maxSubSets = 3; + CGU_CHANNEL channels = 3; if (blockMode == 7) channels = 4; + + CGV_IMAGE block_endpoints[24]; + for (CGU_INT subset = 0; subset < maxSubSets; subset++) + { + CGV_MASK partition_mask = icmp_get_partition_mask(part_id, subset); + icmp_get_block_endpoints(&block_endpoints[subset * 8], image_src, partition_mask, channels); + } + + icmp_GetQuantizedEpoCode(qep, block_endpoints, blockMode, channels); + CGV_ERROR total_err = icmp_GetQuantizeIndex(color_index, index, image_src, bits, block_endpoints, pattern, channels); + + return total_err; +} + +void icmp_enc_mode01237(BC7_EncodeState EncodeState[], CGU_INT blockMode, CGV_PARTID part_list[], CGU_INT part_count) +{ + if (part_count == 0) return; + CGU_INT bits = 2; if (blockMode == 0 || blockMode == 1) bits = 3; + CGU_INT maxSubSets = 2; if (blockMode == 0 || blockMode == 2) maxSubSets = 3; + CGU_CHANNEL channels = 3; if (blockMode == 7) channels = 4; + + CGV_EPOCODE best_qep[24]; + CGV_INDEXPACKED best_endpoint[2]; + CGV_PARTID best_part_id = -1; + CGV_ERROR best_err = 1e99; + + for (CGU_INT part = 0; part < part_count; part++) + { + CGV_PARTID part_id = part_list[part] & 63; + if (maxSubSets == 3) part_id += 64; + + CGV_EPOCODE qep[24]; + CGV_INDEXPACKED color_index[2]; + CGV_INDEX index[MAX_SUBSET_SIZE]; + CGV_ERROR err = icmp_enc_mode01237_part_fast(qep, color_index, index, EncodeState->image_src, part_id, blockMode); + + if (err < best_err) + { + for (CGU_INT subset = 0; subset < 8 * maxSubSets; subset++) best_qep[subset] = qep[subset]; + for (CGU_INT k = 0; k < 2; k++) best_endpoint[k] = color_index[k]; + best_part_id = part_id; + best_err = err; + } + } + + // refine + CGU_INT refineIterations = EncodeState->refineIterations; + for (CGU_INT _i = 0; _i < refineIterations; _i++) + { + CGV_IMAGE ep[24]; + for (CGU_INT subset = 0; subset < maxSubSets; subset++) + { + CGV_SHIFT32 partition_mask = icmp_get_partition_mask(best_part_id, subset); + icmp_opt_endpoints(&ep[subset * 8], EncodeState->image_src, bits, best_endpoint, partition_mask, channels); + } + + CGV_EPOCODE qep[24]; + CGV_INDEXPACKED color_index[2]; + CGV_INDEX index[MAX_SUBSET_SIZE]; + + icmp_GetQuantizedEpoCode(qep, ep, blockMode, channels); + + CGV_SHIFT32 pattern = icmp_get_pattern2(best_part_id); + CGV_ERROR err = icmp_GetQuantizeIndex(color_index, index, EncodeState->image_src, bits, ep, pattern, channels); + + if (err < best_err) + { + for (CGU_INT subset = 0; subset < 8 * maxSubSets; subset++) best_qep[subset] = qep[subset]; + for (CGU_INT k = 0; k < 2; k++) best_endpoint[k] = color_index[k]; + best_err = err; + } + } + + if (blockMode != 7) best_err += EncodeState->opaque_err; // take into account alpha channel + + if (best_err < EncodeState->best_err) + { + EncodeState->best_err = best_err; + icmp_encode_mode01237(EncodeState->best_cmp_out, best_qep, best_endpoint, best_part_id, blockMode); + } +} + +void icmp_mode5(BC7_EncodeState EncodeState[]) +{ + cmp_mode_parameters best_candidate; + CGV_ERROR best_err = EncodeState->best_err; + +#ifdef ASPM_GPU + cmp_memsetBC7((CGV_BYTE *)&best_candidate, 0, sizeof(cmp_mode_parameters)); +#else + memset(&best_candidate, 0, sizeof(cmp_mode_parameters)); +#endif + + for (CGU_CHANNEL ch = 0; ch < EncodeState->channels; ch++) + { + icmp_mode5_candidate(EncodeState, &best_candidate, &best_err, ch); + } + + if (best_err < EncodeState->best_err) + { + EncodeState->best_err = best_err; + EncodeState->cmp_isout16Bytes = FALSE; + icmp_Encode_mode5(EncodeState->best_cmp_out, &best_candidate); + } +} + +void icmp_mode6(BC7_EncodeState EncodeState[]) +{ + CGV_IMAGE block_endpoints[8]; + icmp_get_block_endpoints(block_endpoints, EncodeState->image_src, -1, 4); + + CGV_EPOCODE epo_code[8]; + icmp_GetQuantizedEpoCode(epo_code, block_endpoints, 6, 4); + + CGV_INDEXPACKED color_index[2]; + CGV_INDEX index[MAX_SUBSET_SIZE]; + CGV_ERROR err = icmp_GetQuantizeIndex(color_index, index, EncodeState->image_src, 4, block_endpoints, 0, 4); + + // refine + CGU_INT refineIterations = EncodeState->refineIterations; + for (CGU_INT i = 0; i < refineIterations; i++) + { + icmp_opt_endpoints(block_endpoints, EncodeState->image_src, 4, color_index, -1, 4); + icmp_GetQuantizedEpoCode(epo_code, block_endpoints, 6, EncodeState->channels); + err = icmp_GetQuantizeIndex(color_index, index, EncodeState->image_src, 4, block_endpoints, 0, 4); + } + + if (err < EncodeState->best_err) + { + EncodeState->best_err = err; + EncodeState->cmp_isout16Bytes = FALSE; + icmp_encode_mode6(EncodeState->best_cmp_out, epo_code, color_index); + } +} + +void icmp_mode02(BC7_EncodeState EncodeState[]) +{ + CGV_PARTID part_list[64]; + for (CGU_INT part = 0; part < 64; part++) + part_list[part] = part; + + if (EncodeState->validModeMask & 0x01) + icmp_enc_mode01237(EncodeState, 0, part_list, 16); + if (EncodeState->validModeMask & 0x04) + icmp_enc_mode01237(EncodeState, 2, part_list, 64); // usually not worth the time +} + +void icmp_mode7(BC7_EncodeState EncodeState[]) +{ + CGV_IMAGE full_stats[15]; + icmp_compute_stats_masked(full_stats, EncodeState->image_src, -1, EncodeState->channels); + + CGV_PARTID part_list[64]; + for (CGU_INT part = 0; part < 64; part++) + { + CGV_MASK partition_mask = icmp_get_partition_mask(part + 0, 0); + CGV_IMAGE bound12 = icmp_block_pca_bound_split2(EncodeState->image_src, partition_mask, full_stats, EncodeState->channels); + CGV_PARTID bound = (CGV_TYPEINT)(bound12); + part_list[part] = part + bound * 64; + } + + icmp_sort_partlist(part_list, 64, EncodeState->part_count); + icmp_enc_mode01237(EncodeState, 7, part_list, EncodeState->part_count); +} + +void icmp_mode13(BC7_EncodeState EncodeState[]) +{ + CGV_IMAGE full_stats[15]; + icmp_compute_stats_masked(full_stats, EncodeState->image_src, -1, 3); + + CGV_PARTID part_list[64]; + for (CGU_INT part = 0; part < 64; part++) + { + CGV_MASK partition_mask = icmp_get_partition_mask(part + 0, 0); + CGV_IMAGE bound12 = icmp_block_pca_bound_split2(EncodeState->image_src, partition_mask, full_stats, 3); + CGV_PARTID bound = (CGV_TYPEINT)(bound12); + part_list[part] = part + bound * 64; + } + + icmp_sort_partlist(part_list, 64, EncodeState->part_count); + + if (EncodeState->validModeMask & 0x02) + icmp_enc_mode01237(EncodeState, 1, part_list, EncodeState->part_count); + if (EncodeState->validModeMask & 0x08) + icmp_enc_mode01237(EncodeState, 3, part_list, EncodeState->part_count); +} + +void icmp_mode4(BC7_EncodeState EncodeState[]) +{ + cmp_mode_parameters best_candidate; + CGV_ERROR best_err = EncodeState->best_err; +#ifdef ASPM_GPU + cmp_memsetBC7((CGV_BYTE *)&best_candidate, 0, sizeof(cmp_mode_parameters)); +#else + memset(&best_candidate, 0, sizeof(cmp_mode_parameters)); +#endif + + for (CGU_CHANNEL rotated_channel = 0; rotated_channel < EncodeState->channels; rotated_channel++) + { + icmp_enc_mode4_candidate(EncodeState, &best_candidate, &best_err, rotated_channel, 0); + icmp_enc_mode4_candidate(EncodeState, &best_candidate, &best_err, rotated_channel, 1); + } + + // mode 4 + if (best_err < EncodeState->best_err) + { + EncodeState->best_err = best_err; + icmp_encode_mode4(EncodeState->best_cmp_out, &best_candidate); + } +} + +#endif +//===================================== COMPRESS CODE ============================================= + +bool notValidBlockForMode( + CGU_UINT32 blockMode, + CGU_BOOL blockNeedsAlpha, + CGU_BOOL blockAlphaZeroOne, + uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) +{ + // Do we need to skip alpha processing blocks + if((blockNeedsAlpha == FALSE) && (blockMode > 3)) + { + return TRUE; + } + + // Optional restriction for colour-only blocks so that they + // don't use modes that have combined colour+alpha - this + // avoids the possibility that the encoder might choose an + // alpha other than 1.0 (due to parity) and cause something to + // become accidentally slightly transparent (it's possible that + // when encoding 3-component texture applications will assume that + // the 4th component can safely be assumed to be 1.0 all the time) + if ((blockNeedsAlpha == FALSE) && + (u_BC7Encode->colourRestrict == TRUE) && + ((blockMode == 6)||(blockMode == 7))) // COMBINED_ALPHA + { + return TRUE; + } + + // Optional restriction for blocks with alpha to avoid issues with + // punch-through or thresholded alpha encoding + if((blockNeedsAlpha == TRUE) && + (u_BC7Encode->alphaRestrict == TRUE) && + (blockAlphaZeroOne == TRUE) && + ((blockMode == 6)||(blockMode == 7))) // COMBINED_ALPHA + { + return TRUE; + } + + return FALSE; +} + +void BC7_CompressBlock( + BC7_EncodeState EncodeState[], +uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) +{ + CGU_BOOL blockNeedsAlpha = FALSE; + CGU_BOOL blockAlphaZeroOne = FALSE; + + CGV_ERROR alpha_err = 0.0f; + CGV_IMAGE alpha_min = 255.0F; + + for (CGU_INT k=0; kimage_src[k+COMP_ALPHA*SOURCE_BLOCK_SIZE] < alpha_min) + alpha_min = EncodeState->image_src[k+COMP_ALPHA*SOURCE_BLOCK_SIZE]; + + alpha_err += sq_image( EncodeState->image_src[k+COMP_ALPHA*SOURCE_BLOCK_SIZE]-255.0F); + + if (blockAlphaZeroOne == FALSE) + { + if(( EncodeState->image_src[k+COMP_ALPHA*SOURCE_BLOCK_SIZE] == 255.0F) || + ( EncodeState->image_src[k+COMP_ALPHA*SOURCE_BLOCK_SIZE] == 0.0F)) + { + blockAlphaZeroOne = TRUE; + } + } + } + + if (alpha_min != 255.0F) + { + blockNeedsAlpha = TRUE; + } + + EncodeState->best_err = CMP_FLOAT_MAX; + EncodeState->opaque_err = alpha_err; + +#ifdef USE_ICMP + EncodeState->refineIterations = 4; + EncodeState->fastSkipTreshold = 4; + EncodeState->channels = 4; + EncodeState->part_count = 64; + EncodeState->cmp_isout16Bytes = FALSE; +#else + EncodeState->cmp_isout16Bytes = TRUE; +#endif + + // We change the order in which we visit the block modes to try to maximize the chance + // that we manage to early out as quickly as possible. + // This is a significant performance optimization for the lower quality modes where the + // exit threshold is higher, and also tends to improve quality (as the generally higher quality + // modes are now enumerated earlier, so the first encoding that passes the threshold will + // tend to pass by a greater margin than if we used a dumb ordering, and thus overall error will + // be improved) + CGU_INT blockModeOrder[NUM_BLOCK_TYPES] = {4, 6, 1, 3, 0, 2, 7, 5}; + + for (CGU_INT block=0; block < NUM_BLOCK_TYPES; block++) + { + CGU_INT blockMode = blockModeOrder[block]; + + if (u_BC7Encode->quality < BC7_qFAST_THRESHOLD) + { + if ( notValidBlockForMode(blockMode,blockNeedsAlpha,blockAlphaZeroOne,u_BC7Encode) ) + continue; + } + + CGU_INT Mode = 0x0001 << blockMode; + if (!(u_BC7Encode->validModeMask & Mode)) + continue; + switch (blockMode) + { + // image processing with no alpha + case 0: + #ifdef USE_ICMP + icmp_mode02(EncodeState); + #else + Compress_mode01237(blockMode, EncodeState, u_BC7Encode); + #endif + break; + case 1: + #ifdef USE_ICMP + icmp_mode13(EncodeState); + #else + Compress_mode01237(blockMode, EncodeState, u_BC7Encode); + #endif + break; + case 2: + #ifdef USE_ICMP + icmp_mode13(EncodeState); + #else + Compress_mode01237(blockMode, EncodeState, u_BC7Encode); + #endif + break; + case 3: + #ifdef USE_ICMP + icmp_mode13(EncodeState); + #else + Compress_mode01237(blockMode, EncodeState, u_BC7Encode); + #endif + break; + // image processing with alpha + case 4: + #ifdef USE_ICMP + icmp_mode4(EncodeState); + #else + Compress_mode45(blockMode, EncodeState, u_BC7Encode); + #endif + break; + case 5: + #ifdef USE_ICMP + icmp_mode5(EncodeState); + #else + Compress_mode45(blockMode, EncodeState, u_BC7Encode); + #endif + break; + case 6: + #ifdef USE_ICMP + icmp_mode6(EncodeState); + #else + Compress_mode6( EncodeState, u_BC7Encode); + #endif + break; + case 7: + #ifdef USE_ICMP + icmp_mode7(EncodeState); + #else + Compress_mode01237(blockMode, EncodeState, u_BC7Encode); + #endif + break; + } + + // Early out if we found we can compress with error below the quality threshold + if( EncodeState->best_err <= u_BC7Encode->errorThreshold) + { + break; + } + } + +} + +//====================================== BC7_ENCODECLASS END ============================================= + +#ifndef ASPM_GPU + +INLINE void load_block_interleaved_rgba2(CGV_IMAGE image_src[64], uniform texture_surface* uniform src, CGUV_BLOCKWIDTH block_xx, CGU_INT block_yy) +{ + for (CGU_INT y=0; y<4; y++) + for (CGU_INT x=0; x<4; x++) + { + CGU_UINT32 * uniform src_ptr = (CGV_SHIFT32*)&src->ptr[(block_yy*4+y)*src->stride]; +#ifdef USE_VARYING + CGV_SHIFT32 rgba = gather_partid(src_ptr, block_xx*4+x); + image_src[16*0+y*4+x] = (CGV_FLOAT)((rgba>> 0)&255); + image_src[16*1+y*4+x] = (CGV_FLOAT)((rgba>> 8)&255); + image_src[16*2+y*4+x] = (CGV_FLOAT)((rgba>>16)&255); + image_src[16*3+y*4+x] = (CGV_FLOAT)((rgba>>24)&255); +#else + CGV_SHIFT32 rgba = src_ptr[block_xx*4+x]; + image_src[16*0+y*4+x] = (CGU_FLOAT)((rgba>> 0)&255); + image_src[16*1+y*4+x] = (CGU_FLOAT)((rgba>> 8)&255); + image_src[16*2+y*4+x] = (CGU_FLOAT)((rgba>>16)&255); + image_src[16*3+y*4+x] = (CGU_FLOAT)((rgba>>24)&255); +#endif + } +} + + +#if defined(CMP_USE_FOREACH_ASPM) || defined(USE_VARYING) +INLINE void scatter_uint2(CGU_UINT32 * ptr, CGUV_BLOCKWIDTH idx, CGV_SHIFT32 value) +{ + ptr[idx] = value; // (perf warning expected) +} +#endif + +INLINE void store_data_uint32(CGU_UINT8 dst[], CGU_INT width, CGUV_BLOCKWIDTH v_xx, CGU_INT yy, CGV_SHIFT32 data[], CGU_INT data_size) +{ + for (CGU_INT k=0; kimage_src,u_srcptr, block_x, block_y); + + BC7_CompressBlock(state, u_settings); + + if (state->cmp_isout16Bytes) + store_data_uint8(u_dst, u_srcptr->width, block_x, block_y, state->cmp_out, 16); + else + store_data_uint32(u_dst, u_srcptr->width, block_x, block_y, state->best_cmp_out, 4); + +} + + CMP_EXPORT void CompressBlockBC7_encode( uniform texture_surface src[], CGU_UINT8 dst[], uniform BC7_Encode settings[]) +{ + // bc7_isa(); ASPM_PRINT(("ASPM encode [%d,%d]\n",bc7_isa(),src->width,src->height)); + + for (CGU_INT u_yy = 0; u_yyheight/4; u_yy++) + #ifdef CMP_USE_FOREACH_ASPM + foreach (v_xx = 0 ... src->width/4) + { + #else + for (CGUV_BLOCKWIDTH v_xx = 0; v_xxwidth/4; v_xx++) + { + #endif + CompressBlockBC7_XY(src, v_xx, u_yy, dst, settings); + } +} + +#endif + +#ifndef ASPM_GPU +#ifndef ASPM +//======================= DECOMPRESS ========================================= +#ifndef USE_HIGH_PRECISION_INTERPOLATION_BC7 +CGU_UINT16 aWeight2[] = { 0, 21, 43, 64 }; +CGU_UINT16 aWeight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; +CGU_UINT16 aWeight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + +CGU_UINT8 interpolate(CGU_UINT8 e0, CGU_UINT8 e1, CGU_UINT8 index, CGU_UINT8 indexprecision) +{ + if (indexprecision == 2) + return (CGU_UINT8)(((64 - aWeight2[index])*CGU_UINT16(e0) + aWeight2[index] * CGU_UINT16(e1) + 32) >> 6); + else if (indexprecision == 3) + return (CGU_UINT8)(((64 - aWeight3[index])*CGU_UINT16(e0) + aWeight3[index] * CGU_UINT16(e1) + 32) >> 6); + else // indexprecision == 4 + return (CGU_UINT8)(((64 - aWeight4[index])*CGU_UINT16(e0) + aWeight4[index] * CGU_UINT16(e1) + 32) >> 6); +} +#endif + +void GetBC7Ramp(CGU_UINT32 endpoint[][MAX_DIMENSION_BIG], + CGU_FLOAT ramp[MAX_DIMENSION_BIG][(1<> componentBits[i]); + ep[1][i] += (CGU_UINT32)(ep[1][i] >> componentBits[i]); + + ep[0][i] = min8(255, max8(0, static_cast(ep[0][i]))); + ep[1][i] = min8(255, max8(0, static_cast(ep[1][i]))); + } + } + + // If this block type has no explicit alpha channel + // then make sure alpha is 1.0 for all points on the ramp + if(!componentBits[COMP_ALPHA]) + { + ep[0][COMP_ALPHA] = ep[1][COMP_ALPHA] = 255; + } + + CGU_UINT32 rampIndex = clusters[0]; + + rampIndex = (CGU_UINT32)(log((double)rampIndex) / log(2.0)); + + // Generate colours for the RGB ramp + for(i=0; i < clusters[0]; i++) + { +#ifdef USE_HIGH_PRECISION_INTERPOLATION_BC7 + ramp[COMP_RED][i] = (CGU_FLOAT)floor((ep[0][COMP_RED] * (1.0-rampLerpWeightsBC7[rampIndex][i])) + + (ep[1][COMP_RED] * rampLerpWeightsBC7[rampIndex][i]) + 0.5); + ramp[COMP_RED][i] = bc7_minf(255.0, bc7_maxf(0., ramp[COMP_RED][i])); + ramp[COMP_GREEN][i] = (CGU_FLOAT)floor((ep[0][COMP_GREEN] * (1.0-rampLerpWeightsBC7[rampIndex][i])) + + (ep[1][COMP_GREEN] * rampLerpWeightsBC7[rampIndex][i]) + 0.5); + ramp[COMP_GREEN][i] = bc7_minf(255.0, bc7_maxf(0., ramp[COMP_GREEN][i])); + ramp[COMP_BLUE][i] = (CGU_FLOAT)floor((ep[0][COMP_BLUE] * (1.0-rampLerpWeightsBC7[rampIndex][i])) + + (ep[1][COMP_BLUE] * rampLerpWeightsBC7[rampIndex][i]) + 0.5); + ramp[COMP_BLUE][i] = bc7_minf(255.0, bc7_maxf(0., ramp[COMP_BLUE][i])); +#else + ramp[COMP_RED][i] = interpolate(ep[0][COMP_RED], ep[1][COMP_RED], i, rampIndex); + ramp[COMP_GREEN][i] = interpolate(ep[0][COMP_GREEN], ep[1][COMP_GREEN], i, rampIndex); + ramp[COMP_BLUE][i] = interpolate(ep[0][COMP_BLUE], ep[1][COMP_BLUE], i, rampIndex); +#endif + } + + + rampIndex = clusters[1]; + rampIndex = (CGU_UINT32)(log((CGU_FLOAT)rampIndex) / log(2.0)); + + if(!componentBits[COMP_ALPHA]) + { + for(i=0; i < clusters[1]; i++) + { + ramp[COMP_ALPHA][i] = 255.; + } + } + else + { + + // Generate alphas + for(i=0; i < clusters[1]; i++) + { +#ifdef USE_HIGH_PRECISION_INTERPOLATION_BC7 + ramp[COMP_ALPHA][i] = (CGU_FLOAT)floor((ep[0][COMP_ALPHA] * (1.0-rampLerpWeightsBC7[rampIndex][i])) + + (ep[1][COMP_ALPHA] * rampLerpWeightsBC7[rampIndex][i]) + 0.5); + ramp[COMP_ALPHA][i] = bc7_minf(255.0, bc7_maxf(0., ramp[COMP_ALPHA][i])); +#else + ramp[COMP_ALPHA][i] = interpolate(ep[0][COMP_ALPHA], ep[1][COMP_ALPHA], i, rampIndex); +#endif + } + + } +} + +// +// Bit reader - reads one bit from a buffer at the current bit offset +// and increments the offset +// + +CGU_UINT32 ReadBit(const CGU_UINT8 base[],CGU_UINT32 &m_bitPosition) +{ + int byteLocation; + int remainder; + CGU_UINT32 bit = 0; + byteLocation = m_bitPosition/8; + remainder = m_bitPosition % 8; + + bit = base[byteLocation]; + bit >>= remainder; + bit &= 0x1; + // Increment bit position + m_bitPosition++; + return (bit); +} + +void DecompressDualIndexBlock( + CGU_UINT8 out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], + const CGU_UINT8 in[COMPRESSED_BLOCK_SIZE], + CGU_UINT32 endpoint[2][MAX_DIMENSION_BIG], + CGU_UINT32 &m_bitPosition, + CGU_UINT32 m_rotation, + CGU_UINT32 m_blockMode, + CGU_UINT32 m_indexSwap, + CGU_UINT32 m_componentBits[MAX_DIMENSION_BIG]) +{ + CGU_UINT32 i, j, k; + CGU_FLOAT ramp[MAX_DIMENSION_BIG][1<(bti[m_blockMode].indexBits[i] - 1); k++) + { + blockIndices[i][j] |= (CGU_UINT32)ReadBit(in,m_bitPosition) << k; + } + } + else + { + for(k=0;k 7) + { + // Something really bad happened... + return; + } + + for (i = 0; i < bti[m_blockMode].rotationBits; i++) + { + m_rotation |= ReadBit(in, m_bitPosition) << i; + } + for (i = 0; i < bti[m_blockMode].indexModeBits; i++) + { + m_indexSwap |= ReadBit(in, m_bitPosition) << i; + } + + for (i = 0; i < bti[m_blockMode].partitionBits; i++) + { + m_partition |= ReadBit(in, m_bitPosition) << i; + } + + + + if (bti[m_blockMode].encodingType == NO_ALPHA) + { + m_componentBits[COMP_ALPHA] = 0; + m_componentBits[COMP_RED] = + m_componentBits[COMP_GREEN] = + m_componentBits[COMP_BLUE] = bti[m_blockMode].vectorBits / 3; + } + else if (bti[m_blockMode].encodingType == COMBINED_ALPHA) + { + m_componentBits[COMP_ALPHA] = + m_componentBits[COMP_RED] = + m_componentBits[COMP_GREEN] = + m_componentBits[COMP_BLUE] = bti[m_blockMode].vectorBits / 4; + } + else if (bti[m_blockMode].encodingType == SEPARATE_ALPHA) + { + m_componentBits[COMP_ALPHA] = bti[m_blockMode].scalarBits; + m_componentBits[COMP_RED] = + m_componentBits[COMP_GREEN] = + m_componentBits[COMP_BLUE] = bti[m_blockMode].vectorBits / 3; + } + + CGU_UINT32 subset, ep, component; + // Endpoints are stored in the following order RRRR GGGG BBBB (AAAA) (PPPP) + // i.e. components are packed together + // Loop over components + for (component = 0; component < MAX_DIMENSION_BIG; component++) + { + // loop over subsets + for (subset = 0; subset < (int)bti[m_blockMode].subsetCount; subset++) + { + // Loop over endpoints + for (ep = 0; ep < 2; ep++) + { + endpoint[subset][ep][component] = 0; + for (j = 0; j < m_componentBits[component]; j++) + { + endpoint[subset][ep][component] |= ReadBit(in, m_bitPosition) << j; + } + } + } + } + + + // Now get any parity bits + if (bti[m_blockMode].pBitType != NO_PBIT) + { + for (subset = 0; subset < (int)bti[m_blockMode].subsetCount; subset++) + { + CGU_UINT32 pBit[2]; + if (bti[m_blockMode].pBitType == ONE_PBIT) + { + pBit[0] = ReadBit(in, m_bitPosition); + pBit[1] = pBit[0]; + } + else if (bti[m_blockMode].pBitType == TWO_PBIT) + { + pBit[0] = ReadBit(in, m_bitPosition); + pBit[1] = ReadBit(in, m_bitPosition); + } + + for (component = 0; component < MAX_DIMENSION_BIG; component++) + { + if (m_componentBits[component]) + { + endpoint[subset][0][component] <<= 1; + endpoint[subset][1][component] <<= 1; + endpoint[subset][0][component] |= pBit[0]; + endpoint[subset][1][component] |= pBit[1]; + } + } + } + } + + if (bti[m_blockMode].pBitType != NO_PBIT) + { + // Now that we've unpacked the parity bits, update the component size information + // for the ramp generator + for (j = 0; j < MAX_DIMENSION_BIG; j++) + { + if (m_componentBits[j]) + { + m_componentBits[j] += 1; + } + } + } + + // If this block has two independent sets of indices then put it to that decoder + if (bti[m_blockMode].encodingType == SEPARATE_ALPHA) + { + DecompressDualIndexBlock(out, in, endpoint[0], m_bitPosition, m_rotation, m_blockMode, m_indexSwap, m_componentBits); + return; + } + + CGU_UINT32 fixup[MAX_SUBSETS] = { 0, 0, 0 }; + switch (bti[m_blockMode].subsetCount) + { + case 3: + fixup[1] = BC7_FIXUPINDICES_LOCAL[2][m_partition][1]; + fixup[2] = BC7_FIXUPINDICES_LOCAL[2][m_partition][2]; + break; + case 2: + fixup[1] = BC7_FIXUPINDICES_LOCAL[1][m_partition][1]; + break; + default: + break; + } + + //-------------------------------------------------------------------- + // New Code : Possible replacement for BC7_PARTITIONS for CPU code + //-------------------------------------------------------------------- + // Extract index bits + // for (i = 0; i < MAX_SUBSET_SIZE; i++) + // { + // CGV_UINT8 p = get_partition_subset(m_partition, bti[m_blockMode].subsetCount - 1, i); + // //CGU_UINT32 p = partitionTable[i]; + // blockIndices[i] = 0; + // CGU_UINT32 bitsToRead = bti[m_blockMode].indexBits[0]; + // + // // If this is a fixup index then set the implicit bit + // if (i == fixup[p]) + // { + // blockIndices[i] &= ~(1 << (bitsToRead - 1)); + // bitsToRead--; + // } + // + // for (j = 0; j < bitsToRead; j++) + // { + // blockIndices[i] |= ReadBit(in, m_bitPosition) << j; + // } + // } + CGU_BYTE *partitionTable = (CGU_BYTE*)BC7_PARTITIONS[bti[m_blockMode].subsetCount-1][m_partition]; + + // Extract index bits + for(i=0; i < MAX_SUBSET_SIZE; i++) + { + CGU_BYTE p = partitionTable[i]; + blockIndices[i] = 0; + CGU_BYTE bitsToRead = bti[m_blockMode].indexBits[0]; + + // If this is a fixup index then set the implicit bit + if(i==fixup[p]) + { + blockIndices[i] &= ~(1 << (bitsToRead-1)); + bitsToRead--; + } + + for(j=0;jimage_src[offsetR++] = (CGV_IMAGE)image_src[i][0]; + state->image_src[offsetG++] = (CGV_IMAGE)image_src[i][1]; + state->image_src[offsetB++] = (CGV_IMAGE)image_src[i][2]; + state->image_src[offsetA++] = (CGV_IMAGE)image_src[i][3]; + } + + BC7_CompressBlock(state, u_BC7Encode); + + if (state->cmp_isout16Bytes) + { + for (CGU_UINT8 i = 0; i < COMPRESSED_BLOCK_SIZE; i++) + { + cmp_out[i] = state->cmp_out[i]; + } + } + else + { +#ifdef ASPM_GPU + cmp_memcpy(cmp_out, (CGU_UINT8 *)state->best_cmp_out, 16); +#else + memcpy(cmp_out, state->best_cmp_out, 16); +#endif + } +} + +//======================= CPU USER INTERFACES ==================================== + +int CMP_CDECL CreateOptionsBC7(void **options) +{ + (*options) = new BC7_Encode; + if (!options) return CGU_CORE_ERR_NEWMEM; + init_BC7ramps(); + SetDefaultBC7Options((BC7_Encode *)(*options)); + return CGU_CORE_OK; +} + +int CMP_CDECL DestroyOptionsBC7(void *options) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + BC7_Encode *BCOptions = reinterpret_cast (options); + delete BCOptions; + return CGU_CORE_OK; +} + +int CMP_CDECL SetErrorThresholdBC7(void *options, CGU_FLOAT minThreshold, CGU_FLOAT maxThreshold) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + BC7_Encode *BC7optionsDefault = (BC7_Encode *)options; + + if (minThreshold < 0.0f) minThreshold = 0.0f; + if (maxThreshold < 0.0f) maxThreshold = 0.0f; + + BC7optionsDefault->minThreshold = minThreshold; + BC7optionsDefault->maxThreshold = maxThreshold; + return CGU_CORE_OK; +} + +int CMP_CDECL SetQualityBC7(void *options, CGU_FLOAT fquality) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + + BC7_Encode *BC7optionsDefault = (BC7_Encode *)options; + if (fquality < 0.0f) fquality = 0.0f; + else + if (fquality > 1.0f) fquality = 1.0f; + BC7optionsDefault->quality = fquality; + + // Set Error Thresholds + BC7optionsDefault->errorThreshold = BC7optionsDefault->maxThreshold * (1.0f - fquality); + if(fquality > BC7_qFAST_THRESHOLD) + BC7optionsDefault->errorThreshold += BC7optionsDefault->minThreshold; + + return CGU_CORE_OK; +} + +int CMP_CDECL SetMaskBC7(void *options, CGU_UINT8 mask) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + BC7_Encode *BC7options = (BC7_Encode *)options; + BC7options->validModeMask = mask; + return CGU_CORE_OK; +} + +int CMP_CDECL SetAlphaOptionsBC7(void *options, CGU_BOOL imageNeedsAlpha, CGU_BOOL colourRestrict, CGU_BOOL alphaRestrict) +{ + if (!options) return CGU_CORE_ERR_INVALIDPTR; + BC7_Encode *u_BC7Encode = (BC7_Encode *)options; + u_BC7Encode->imageNeedsAlpha = imageNeedsAlpha; + u_BC7Encode->colourRestrict = colourRestrict; + u_BC7Encode->alphaRestrict = alphaRestrict; + return CGU_CORE_OK; +} + +int CMP_CDECL CompressBlockBC7( const unsigned char *srcBlock, + unsigned int srcStrideInBytes, + CMP_GLOBAL unsigned char cmpBlock[16], + const void* options = NULL) +{ + CMP_Vec4uc inBlock[SOURCE_BLOCK_SIZE]; + + //---------------------------------- + // Fill the inBlock with source data + //---------------------------------- + CGU_INT srcpos = 0; + CGU_INT dstptr = 0; + for (CGU_UINT8 row = 0; row < 4; row++) + { + srcpos = row * srcStrideInBytes; + for (CGU_UINT8 col = 0; col < 4; col++) + { + inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]); + inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]); + dstptr++; + } + } + + + BC7_Encode *u_BC7Encode = (BC7_Encode *)options; + BC7_Encode BC7EncodeDefault = { 0 }; + if (u_BC7Encode == NULL) + { + u_BC7Encode = &BC7EncodeDefault; + SetDefaultBC7Options(u_BC7Encode); + init_BC7ramps(); + } + + BC7_EncodeState EncodeState +#ifndef ASPM + = { 0 } +#endif + ; + EncodeState.best_err = CMP_FLOAT_MAX; + EncodeState.validModeMask = u_BC7Encode->validModeMask; + EncodeState.part_count = u_BC7Encode->part_count; + EncodeState.channels = static_cast(u_BC7Encode->channels); + + CGU_UINT8 offsetR = 0; + CGU_UINT8 offsetG = 16; + CGU_UINT8 offsetB = 32; + CGU_UINT8 offsetA = 48; + CGU_UINT32 offsetSRC = 0; + for (CGU_UINT8 i = 0; i < SOURCE_BLOCK_SIZE; i++) + { + EncodeState.image_src[offsetR++] = (CGV_IMAGE)inBlock[offsetSRC].x; + EncodeState.image_src[offsetG++] = (CGV_IMAGE)inBlock[offsetSRC].y; + EncodeState.image_src[offsetB++] = (CGV_IMAGE)inBlock[offsetSRC].z; + EncodeState.image_src[offsetA++] = (CGV_IMAGE)inBlock[offsetSRC].w; + offsetSRC++; + } + + BC7_CompressBlock(&EncodeState, u_BC7Encode); + + if (EncodeState.cmp_isout16Bytes) + { + for (CGU_UINT8 i = 0; i < COMPRESSED_BLOCK_SIZE; i++) + { + cmpBlock[i] = EncodeState.cmp_out[i]; + } + } + else + { + memcpy(cmpBlock, EncodeState.best_cmp_out, 16); + } + + return CGU_CORE_OK; +} + +int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16], + unsigned char srcBlock[64], + const void *options = NULL) { + BC7_Encode *u_BC7Encode = (BC7_Encode *)options; + BC7_Encode BC7EncodeDefault = { 0 }; // for q = 0.05 + if (u_BC7Encode == NULL) + { + // set for q = 1.0 + u_BC7Encode = &BC7EncodeDefault; + SetDefaultBC7Options(u_BC7Encode); + init_BC7ramps(); + } + DecompressBC7_internal((CGU_UINT8(*)[4])srcBlock, (CGU_UINT8 *)cmpBlock,u_BC7Encode); + return CGU_CORE_OK; +} +#endif +#endif + +//============================================== OpenCL USER INTERFACE ==================================================== +#ifdef ASPM_GPU +CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc ImageSource[], + CMP_GLOBAL CGV_CMPOUT ImageDestination[], + uniform CMP_GLOBAL Source_Info SourceInfo[], + uniform CMP_GLOBAL BC7_Encode BC7Encode[] ) +{ + CGU_INT xID=0; + CGU_INT yID=0; + + xID = get_global_id(0); // ToDo: Define a size_t 32 bit and 64 bit basd on clGetDeviceInfo + yID = get_global_id(1); + + CGU_INT srcWidth = SourceInfo->m_src_width; + CGU_INT srcHeight = SourceInfo->m_src_height; + if (xID >= (srcWidth / BlockX)) return; + if (yID >= (srcHeight / BlockY)) return; + + CGU_INT destI = (xID*COMPRESSED_BLOCK_SIZE) + (yID*(srcWidth / BlockX)*COMPRESSED_BLOCK_SIZE); + CGU_INT srcindex = 4 * (yID * srcWidth + xID); + CGU_INT blkindex = 0; + BC7_EncodeState EncodeState; + varying BC7_EncodeState* uniform state = &EncodeState; + + copy_BC7_Encode_settings(state, BC7Encode); + + //Check if it is a complete 4X4 block + if (((xID + 1)*BlockX <= srcWidth) && ((yID + 1)*BlockY <= srcHeight)) + { + srcWidth = srcWidth - 4; + for (CGU_INT j = 0; j < 4; j++) { + for (CGU_INT i = 0; i < 4; i++) { + state->image_src[blkindex+0*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].x; + state->image_src[blkindex+1*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].y; + state->image_src[blkindex+2*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].z; + state->image_src[blkindex+3*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].w; + blkindex++; + srcindex++; + } + + srcindex += srcWidth; + } + + copy_BC7_Encode_settings(state, BC7Encode); + + BC7_CompressBlock(&EncodeState, BC7Encode); + + for (CGU_INT i=0; icmp_out[i]; + } + + } + else + { + ASPM_PRINT(("[ASPM_GPU] Unable to process, make sure image size is divisible by 4")); + } +} +#endif diff --git a/extern/CMP_Core/shaders/BC7_Encode_Kernel.h b/extern/CMP_Core/shaders/BC7_Encode_Kernel.h new file mode 100644 index 0000000..1a812b9 --- /dev/null +++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.h @@ -0,0 +1,1580 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef BC7_ENCODE_KERNEL_H +#define BC7_ENCODE_KERNEL_H + +#if defined(ISPC)||defined(ASPM) +//#include "..\..\Common\Common_Def.h" +#include "Common_Def.h" +#else +#include "Common_Def.h" +#endif + +// cmp param uniform data tracking +typedef CGU_UINT8 CGU_CHANNEL; +typedef CGV_UINT32 CGV_SHIFT32; +typedef CGV_UINT8 CGV_BYTE; +typedef CGV_FLOAT CGV_ERROR; +typedef CGV_FLOAT CGV_IMAGE; +typedef CGV_INT CGV_EPOCODE; +typedef CGV_UINT8 CGV_CMPOUT; +typedef CGV_UINT8 CGV_INDEX; +typedef CGV_UINT32 CGV_INDEXPACKED; +typedef CGV_UINT32 CGV_CMPOUTPACKED; +typedef CGV_INT CGV_LEVELS; +typedef CGV_INT CGV_SUBSETS; +typedef CGV_INT CGV_MASK; +typedef CGV_INT CGV_ITTERATIONS; +typedef CGV_INT CGV_PARTID; +typedef CGV_INT CGV_FIXUPINDEX; +typedef CGV_INT CGV_RAMP; +typedef CGV_INT CGV_ENTRIES; +typedef CGV_INT CGV_TYPEINT; +typedef CGV_UINT32 CGV_TYPEUINT32; +typedef CGU_UINT8 CGU_BYTE; +typedef CGV_CMPOUT CGUV_CMPOUT; +typedef CGU_UINT8 CGUV_DSTPTR; + +#define USE_VARYING +#ifdef USE_VARYING +typedef CGV_INT CGUV_BLOCKWIDTH; +#else +typedef CGU_INT CGUV_BLOCKWIDTH; +#endif + +#ifndef ASPM_GPU + +struct cmp_bc7_state +{ + CGV_IMAGE block[16][4]; + CGV_SHIFT32 best_data[4]; +} ; + + + typedef enum + { + CGU_FORMAT_Unknown, // Undefined texture format. + + // Channel Component formats------------------------------------------------------------------------------- + CGU_FORMAT_RGBA_8888, // RGBA format with 8-bit fixed channels. + + // Formats supported by GPU + CGU_FORMAT_BC1, // A four component opaque (or 1-bit alpha) compressed texture format for Microsoft DirectX10. Identical to DXT1. Four bits per pixel. + CGU_FORMAT_BC6H, // BC6H compressed texture format + CGU_FORMAT_BC7, // BC7 compressed texture format + + // Formats supported by CPU + CGU_FORMAT_GTC, // GTC Gradient Texture Compressor + CGU_FORMAT_MAX + } CGU_FORMAT; + + //------------------------------------ + // The structure describing a texture + //------------------------------------ + struct CGU_Texture_Type + { + // Optional Settings + CGU_FLOAT m_fquality; // Minimum resulting quality to maintain while processing the texture, default is 0.05 + CGU_INT8 m_nBlockHeight; // Size of the texture tiles (blocks) to use.during processing + CGU_INT8 m_nBlockWidth; // default = 4 + CGU_INT8 m_nBlockDepth; // default = 1 + + // Required settings + CGU_FORMAT m_format; // Texture format + CGU_UINT32 m_src_width; // Width of the texture. + CGU_UINT32 m_src_height; // Height of the texture. + CGU_UINT32 m_stride; // Number of bytes to start of next line + CGU_UINT32 m_dwDataSize; // Size of the allocated texture data. + CGU_UINT8* m_pData; // Pointer to the texture data + }; + +#endif // End of ASPM_CPU + +#define SOURCE_BLOCK_SIZE 16 // Size of a source block in pixels (each pixel has RGBA:8888 channels) +#define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes +#define MAX_CHANNELS 4 +#define MAX_SUBSETS 3 // Maximum number of possible subsets +#define MAX_SUBSET_SIZE 16 // Largest possible size for an individual subset +#define BC7_qFAST_THRESHOLD 0.50f +#define MAX_INDEX_BITS 4 // Maximum number of index bits + +typedef struct +{ + CGV_IMAGE image; + CGV_INDEX index; +} CMP_di; + +typedef struct +{ + CGV_IMAGE image; + CGV_UINT8 index; +} CMP_du; + +#define MAX_PARTITION_ENTRIES 64 + +#define MAX_PARTITIONS_TABLE 193 + +#define MAX_PARTITIONS 64 // Maximum number of partition types + +#define EPSILON 0.00390625f +#define DIMENSION 4 +#define BlockX 4 +#define BlockY 4 +#define QUANT_RT 250.0f // quality = 0.05f +//========================================================================================================== +#define LOG_CL_RANGE 5 +#define LOG_CL_BASE 2 +#define BIT_BASE 5 +#define BIT_RANGE 9 +#define MAX_CLUSTERS_BIG 16 +#define MAX_CLUSTERS 8 +#define BTT(bits) (bits-BIT_BASE) +#define CLT(cl) (cl-LOG_CL_BASE) + +#define MAX_TRY_QUANT_TRACE 2 // used in optQuantTrace_d : increasing this has no gain in quality!, keep it set at 2 +#define MAX_TRY_SHAKER 5 // used in ep_shaker_2_d if set at 4 PSNR drops by -0.1 SSIM stays the same +#define NUM_BLOCK_TYPES 8 // Number of block types in the format + +#define BC7_MAX_TRACE 25000 + +// If this is defined, ramp calculation is done via math floor and division. +// Otherwise, ramp calculation is done by bit shifting +#define USE_HIGH_PRECISION_INTERPOLATION_BC7 + +typedef struct +{ + CGU_INT32 k; + CGV_FLOAT d; +} TRACE; + + +typedef struct +#ifdef ASPM +BC7_EncodeState +#endif +{ + CGV_IMAGE image_src[64]; + CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE]; + + // Common + CGV_ERROR opaque_err; // error for coding alpha=255 + CGV_ERROR best_err; + + // set per mode + CGU_CHANNEL channels3or4; + CGU_UINT8 bits; + CGU_INT clusters; + CGU_BYTE componentBits; + CGU_UINT8 numPartitionModes; + CGU_INT maxSubSets; + CGU_UINT8 numClusters0[2]; + CGU_UINT8 numClusters1[2]; + CGU_UINT8 max_idxMode; + CGU_INT modeBits[2]; + CGU_BOOL optimizedQ; + + CGU_UINT32 validModeMask; + CGU_INT part_count; + CGU_CHANNEL channels; + + // use_icmp + CGV_CMPOUTPACKED best_cmp_out[5]; + CGV_BOOL cmp_isout16Bytes; + CGU_INT refineIterations; + CGU_INT fastSkipTreshold; +} +#ifndef ASPM +BC7_EncodeState +#endif +; + +typedef struct +#ifdef ASPM +cmp_mode_parameters +#endif +{ + CGV_EPOCODE color_qendpoint[8]; + CGV_EPOCODE alpha_qendpoint[8]; + CGV_INDEXPACKED best_color_index[2]; + CGV_INDEXPACKED best_alpha_index[2]; + CGV_INDEX color_index[SOURCE_BLOCK_SIZE]; + CGV_INDEX alpha_index[SOURCE_BLOCK_SIZE]; + CGV_SHIFT32 idxMode; + CGV_SHIFT32 rotated_channel; +} +#ifndef ASPM +cmp_mode_parameters +#endif +; + + +typedef struct +#ifdef ASPM +BC7_Encode +#endif +{ + // Global data setup at initialization time + CGU_FLOAT quality; // range is 0 to 1 + CGU_FLOAT errorThreshold; // use 5 to 75 + CGU_UINT32 validModeMask; // bit for mode masks def to 0xFF + CGU_BOOL imageNeedsAlpha; // default: false + CGU_BOOL colourRestrict; // default: false + CGU_BOOL alphaRestrict; // default: false + + // Used to track errors in internal state code + CGV_ERROR opaque_err; + CGV_ERROR best_err; + + CGU_FLOAT minThreshold; + CGU_FLOAT maxThreshold;; + + // icmp code settings + CGU_INT refineIterations; + CGU_INT part_count; + CGU_INT channels; + +} +#ifndef ASPM +BC7_Encode +#endif +; + +CMP_CONSTANT CGU_FLOAT rampWeights[5][SOURCE_BLOCK_SIZE] = { +{ 0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f}, // 0 bit index +{ 0.000000f,1.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f}, // 1 bit index +{ 0.000000f,0.328125f,0.671875f,1.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f}, // 2 bit index +{ 0.000000f,0.140625f,0.281250f,0.421875f,0.578125f,0.718750f,0.859375f,1.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f}, // 3 bit index +{ 0.000000f,0.062500f,0.140625f,0.203125f,0.265625f,0.328125f,0.406250f,0.468750f,0.531250f,0.593750f,0.671875f,0.734375f,0.796875f,0.859375f,0.937500f,1.000000f} // 4 bit index +}; + +#ifndef ASPM_GPU +typedef struct +#ifdef ASPM +BC7_EncodeRamps +#endif +{ + CGU_INT ep_d[4][256]; +#ifdef USE_BC7_SP_ERR_IDX + CGU_UINT8 sp_err[3*4*256*2*2*16]; + CGU_INT sp_idx[3*4*256*2*2*16*2]; +#endif +#ifdef USE_BC7_RAMP + CGU_FLOAT ramp[3*4*256*256*16]; +#endif + CGU_BOOL ramp_init; +} +#ifndef ASPM +BC7_EncodeRamps +#endif +; +#endif + +CMP_CONSTANT CGU_UINT8 npv_nd[2][8] = { + {1,2,4,8,16,32,0,0}, // 3 + {1,2,4,0,0 ,0 ,0,0} // 4 +}; + +typedef enum +{ + NO_ALPHA, + COMBINED_ALPHA, + SEPARATE_ALPHA +} CMP_BCE; + +// Endpoint encoding type +typedef enum +{ + NO_PBIT, + ONE_PBIT, + TWO_PBIT, + THREE_PBIT, + FOUR_PBIT, + FIVE_PBIT +} CMP_PBIT; + +typedef struct +#ifdef ASPM +BC7_Encode_local +#endif +{ + // Data for compressing a particular block mode + CGV_INT clusters[2]; + CGV_BYTE parityBits; + CGV_BYTE componentBits[MAX_CHANNELS]; + + CMP_BCE encodingType; // Type of block + CGU_UINT8 partitionBits; // Number of bits for partition data + CGU_UINT8 rotationBits; // Number of bits for component rotation + CGU_UINT8 indexModeBits; // Number of bits for index selection + CMP_PBIT pBitType; // Type of P-bit encoding + CGU_UINT8 subsetCount; // Number of subsets + CGU_UINT8 indexBits[2]; // Number of bits per index in each index set + + // Bulky temporary data used during compression of a block + CGV_UINT8 storedindex[MAX_PARTITIONS][MAX_SUBSETS][MAX_SUBSET_SIZE]; + CGV_ERROR storedError[MAX_PARTITIONS]; + CGV_UINT8 sortedModes[MAX_PARTITIONS]; + + // This stores the min and max for the components of the block, and the ranges + CGV_IMAGE blockMin[MAX_CHANNELS]; + CGV_IMAGE blockMax[MAX_CHANNELS]; + CGV_IMAGE blockRange[MAX_CHANNELS]; + CGV_IMAGE blockMaxRange; +} +#ifndef ASPM +BC7_Encode_local +#endif +; + + + +typedef enum +{ + CART, + SAME_PAR, + BCC, + SAME_FCC, + FCC, + FCC_SAME_BCC, +} CMP_qt; + +// Block component encoding + + +// Descriptor structure for block encodings +typedef struct +{ + uniform CMP_BCE encodingType; // Type of block + CGU_UINT8 partitionBits; // Number of bits for partition data + CGU_UINT8 rotationBits; // Number of bits for component rotation + CGU_UINT8 indexModeBits; // Number of bits for index selection + CGU_UINT8 scalarBits; // Number of bits for one scalar endpoint + CGU_UINT8 vectorBits; // Number of bits for one vector endpoint(excluding P bits) + uniform CMP_PBIT pBitType; // Type of P-bit encoding + CGU_UINT8 subsetCount; // Number of subsets + CGU_UINT8 indexBits[2]; // Number of bits per index in each index set +} CMP_BTI; + +typedef enum + { + COMP_RED = 0, + COMP_GREEN = 1, + COMP_BLUE = 2, + COMP_ALPHA = 3 + } COMPONENT; + +CMP_CONSTANT CGU_UINT8 componentRotations[4][4] = { +{ COMP_ALPHA, COMP_RED, COMP_GREEN, COMP_BLUE }, +{ COMP_RED, COMP_ALPHA, COMP_GREEN, COMP_BLUE }, +{ COMP_GREEN, COMP_RED, COMP_ALPHA, COMP_BLUE }, +{ COMP_BLUE, COMP_RED, COMP_GREEN, COMP_ALPHA } +}; + +CMP_CONSTANT CMP_BTI bti[NUM_BLOCK_TYPES] = { +//encodingType,partitionBits,rotationBits,indexModeBits,scalarBits,vectorBits,pBitType, subsetCount,indexBits[0]&[1] +{ NO_ALPHA, 4, 0, 0, 0, 12, TWO_PBIT, 3, { 3, 0 } }, // Format Mode 0 +{ NO_ALPHA, 6, 0, 0, 0, 18, ONE_PBIT, 2, { 3, 0 } }, // Format Mode 1 +{ NO_ALPHA, 6, 0, 0, 0, 15, NO_PBIT, 3, { 2, 0 } }, // Format Mode 2 +{ NO_ALPHA, 6, 0, 0, 0, 21, TWO_PBIT, 2, { 2, 0 } }, // Format Mode 3 +{ SEPARATE_ALPHA, 0, 2, 1, 6, 15, NO_PBIT, 1, { 2, 3 } }, // Format Mode 4 +{ SEPARATE_ALPHA, 0, 2, 0, 8, 21, NO_PBIT, 1, { 2, 2 } }, // Format Mode 5 +{ COMBINED_ALPHA, 0, 0, 0, 0, 28, TWO_PBIT, 1, { 4, 0 } }, // Format Mode 6 +{ COMBINED_ALPHA, 6, 0, 0, 0, 20, TWO_PBIT, 2, { 2, 0 } } // Format Mode 7 +}; + +CMP_CONSTANT CGU_UINT8 par_vectors_nd[2][8][64][2][4] = { +{ // 3D +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{1,1,1,0},{1,1,1,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{1,1,1,0}},{{1,1,1,0},{0,0,0,0}},{{1,1,1,0},{1,1,1,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{1,1,0,0},{1,1,0,0}},{{1,0,1,0},{1,0,1,0}},{{0,1,1,0},{0,1,1,0}},{{0,0,0,0},{1,1,1,0}},{{1,1,1,0},{0,0,0,0}},{{0,1,0,0},{0,1,0,0}},{{1,1,1,0},{1,1,1,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{1,1,0,0},{0,0,0,0}},{{1,0,1,0},{0,0,0,0}},{{0,1,1,0},{0,0,0,0}},{{0,0,0,0},{1,1,0,0}},{{1,1,0,0},{1,1,0,0}},{{1,0,1,0},{1,1,0,0}},{{0,1,1,0},{1,1,0,0}}, +{{0,0,0,0},{1,0,1,0}},{{1,1,0,0},{1,0,1,0}},{{1,0,1,0},{1,0,1,0}},{{0,1,1,0},{1,0,1,0}},{{0,0,0,0},{0,1,1,0}},{{1,1,0,0},{0,1,1,0}},{{1,0,1,0},{0,1,1,0}},{{0,1,1,0},{0,1,1,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{1,1,0,0},{0,0,0,0}},{{1,0,1,0},{0,0,0,0}},{{0,1,1,0},{0,0,0,0}},{{0,0,0,0},{1,1,0,0}},{{1,1,0,0},{1,1,0,0}},{{1,0,1,0},{1,1,0,0}},{{0,1,1,0},{1,1,0,0}}, +{{0,0,0,0},{1,0,1,0}},{{1,1,0,0},{1,0,1,0}},{{1,0,1,0},{1,0,1,0}},{{0,1,1,0},{1,0,1,0}},{{0,0,0,0},{0,1,1,0}},{{1,1,0,0},{0,1,1,0}},{{1,0,1,0},{0,1,1,0}},{{0,1,1,0},{0,1,1,0}}, +{{1,0,0,0},{1,1,1,0}},{{0,1,0,0},{1,1,1,0}},{{0,0,1,0},{1,1,1,0}},{{1,1,1,0},{1,1,1,0}},{{1,0,0,0},{0,0,1,0}},{{0,1,0,0},{0,0,1,0}},{{0,0,1,0},{0,0,1,0}},{{1,1,1,0},{0,0,1,0}}, +{{1,0,0,0},{1,0,0,0}},{{0,1,0,0},{1,0,0,0}},{{0,0,1,0},{1,0,0,0}},{{1,1,1,0},{1,0,0,0}},{{1,0,0,0},{0,1,0,0}},{{0,1,0,0},{0,1,0,0}},{{0,0,1,0},{0,1,0,0}},{{1,1,1,0},{0,1,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +}, +{ // 4D +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{1,1,1,1},{1,1,1,1}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{1,1,1,1}},{{1,1,1,1},{0,0,0,0}},{{1,1,1,1},{1,1,1,1}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,1,1,1}},{{0,1,1,1},{0,0,0,0}},{{0,1,1,1},{0,1,1,1}},{{1,0,0,0},{1,0,0,0}},{{1,0,0,0},{1,1,1,1}},{{1,1,1,1},{1,0,0,0}},{{1,1,1,1},{1,1,1,1}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,1,1,1}},{{0,1,1,1},{0,0,0,0}},{{0,1,1,1},{0,1,1,1}},{{1,0,0,0},{1,0,0,0}},{{1,0,0,0},{1,1,1,1}},{{1,1,1,1},{1,0,0,0}},{{1,1,1,1},{1,1,1,1}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,1,1}},{{0,0,1,1},{0,0,0,0}},{{0,1,0,1},{0,1,0,1}},{{1,0,0,0},{1,0,0,0}},{{1,0,0,0},{1,0,1,1}},{{1,0,1,1},{1,0,0,0}},{{1,1,0,1},{1,1,0,1}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +{ +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}}, +{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}},{{0,0,0,0},{0,0,0,0}} +}, +}, +}; + +#ifndef ASPM_GPU +// =============================== USED BY DECODER THIS CODE NEEDS TO BE UPDATED ========================================= +CMP_CONSTANT CGU_UINT32 BC7_FIXUPINDICES_LOCAL[MAX_SUBSETS][MAX_PARTITIONS][3] = +{ + // One subset + { + {0, 0, 0}, + }, + + { + {0, 15},{0, 15},{0, 15},{0, 15}, + {0, 15},{0, 15},{0, 15},{0, 15}, + {0, 15},{0, 15},{0, 15},{0, 15}, + {0, 15},{0, 15},{0, 15},{0, 15}, + {0, 15},{0, 2},{0, 8},{0, 2}, + {0, 2},{0, 8},{0, 8},{0, 15}, + {0, 2},{0, 8},{0, 2},{0, 2}, + {0, 8},{0, 8},{0, 2},{0, 2}, + {0, 15},{0, 15},{0, 6},{0, 8}, + {0, 2},{0, 8},{0, 15},{0, 15}, + {0, 2},{0, 8},{0, 2},{0, 2}, + {0, 2},{0, 15},{0, 15},{0, 6}, + {0, 6},{0, 2},{0, 6},{0, 8}, + {0, 15},{0, 15},{0, 2},{0, 2}, + {0, 15},{0, 15},{0, 15},{0, 15}, + {0, 15},{0, 2},{0, 2},{0, 15}, + }, + + // Three subsets + { + {0, 3,15}, {0, 3, 8}, {0,15, 8}, {0,15, 3}, + {0, 8,15}, {0, 3,15}, {0,15, 3}, {0,15, 8}, + {0, 8,15}, {0, 8,15}, {0, 6,15}, {0, 6,15}, + {0, 6,15}, {0, 5,15}, {0, 3,15}, {0, 3, 8}, + {0, 3,15}, {0, 3, 8}, {0, 8,15}, {0,15, 3}, + {0, 3,15}, {0, 3, 8}, {0, 6,15}, {0,10, 8}, + {0, 5, 3}, {0, 8,15}, {0, 8, 6}, {0, 6,10}, + {0, 8,15}, {0, 5,15}, {0,15,10}, {0,15, 8}, + {0, 8,15}, {0,15, 3}, {0, 3,15}, {0, 5,10}, + {0, 6,10}, {0,10, 8}, {0, 8, 9}, {0,15,10}, + {0,15, 6}, {0, 3,15}, {0,15, 8}, {0, 5,15}, + {0,15, 3}, {0,15, 6}, {0,15, 6}, {0,15, 8}, + {0, 3,15}, {0,15, 3}, {0, 5,15}, {0, 5,15}, + {0, 5,15}, {0, 8,15}, {0, 5,15}, {0,10,15}, + {0, 5,15}, {0,10,15}, {0, 8,15}, {0,13,15}, + {0,15, 3}, {0,12,15}, {0, 3,15}, {0, 3, 8} + + }, + +}; + +CMP_STATIC void SetDefaultBC7Options(BC7_Encode *BC7Encode) +{ +if (BC7Encode) +{ + // Set for max quality + BC7Encode->quality = 1.0f; + BC7Encode->minThreshold = 5.0f; + BC7Encode->maxThreshold = 80.0f; + BC7Encode->errorThreshold = 5.0f; + BC7Encode->validModeMask = 0xFF; + + BC7Encode->imageNeedsAlpha = FALSE; + BC7Encode->colourRestrict = FALSE; + BC7Encode->alphaRestrict = FALSE; + + BC7Encode->channels = 4; + BC7Encode->part_count = 128; +} +} + +#ifndef ASPM +//===================== +// Used by Decoder +//===================== +__constant CGU_FLOAT rampLerpWeightsBC7[5][16] = +{ + { 0.0 }, // 0 bit index + { 0.0, 1.0 }, // 1 bit index + { 0.0, 21.0 / 64.0, 43.0 / 64.0, 1.0 }, // 2 bit index + { 0.0, 9.0 / 64.0, 18.0 / 64.0, 27.0 / 64.0, 37.0 / 64.0, 46.0 / 64.0, 55.0 / 64.0, 1.0 }, // 3 bit index + { 0.0, 4.0 / 64.0, 9.0 / 64.0, 13.0 / 64.0, 17.0 / 64.0, 21.0 / 64.0, 26.0 / 64.0, 30.0 / 64.0, + 34.0 / 64.0, 38.0 / 64.0, 43.0 / 64.0, 47.0 / 64.0, 51.0 / 64.0, 55.0 / 64.0, 60.0 / 64.0, 1.0 } // 4 bit index +}; + + +__constant CGU_UINT8 BC7_PARTITIONS[MAX_SUBSETS][MAX_PARTITIONS][MAX_SUBSET_SIZE] = +{ + // Single subset partitions for both BC6H abd BC7 + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + + { + { // 0 + 0,0,1,1, + 0,0,1,1, + 0,0,1,1, + 0,0,1,1 + }, + + { // 1 + 0,0,0,1, + 0,0,0,1, + 0,0,0,1, + 0,0,0,1 + }, + + { // 2 + 0,1,1,1, + 0,1,1,1, + 0,1,1,1, + 0,1,1,1 + }, + + { // 3 + 0,0,0,1, + 0,0,1,1, + 0,0,1,1, + 0,1,1,1 + }, + + { // 4 + 0,0,0,0, + 0,0,0,1, + 0,0,0,1, + 0,0,1,1 + }, + + { // 5 + 0,0,1,1, + 0,1,1,1, + 0,1,1,1, + 1,1,1,1 + }, + + { // 6 + 0,0,0,1, + 0,0,1,1, + 0,1,1,1, + 1,1,1,1 + }, + + { // 7 + 0,0,0,0, + 0,0,0,1, + 0,0,1,1, + 0,1,1,1 + }, + + { // 8 + 0,0,0,0, + 0,0,0,0, + 0,0,0,1, + 0,0,1,1 + }, + + { // 9 + 0,0,1,1, + 0,1,1,1, + 1,1,1,1, + 1,1,1,1 + }, + + { // 10 + 0,0,0,0, + 0,0,0,1, + 0,1,1,1, + 1,1,1,1 + }, + + { // 11 + 0,0,0,0, + 0,0,0,0, + 0,0,0,1, + 0,1,1,1 + }, + + { // 12 + 0,0,0,1, + 0,1,1,1, + 1,1,1,1, + 1,1,1,1 + }, + + { // 13 + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + 1,1,1,1 + }, + + { // 14 + 0,0,0,0, + 1,1,1,1, + 1,1,1,1, + 1,1,1,1 + }, + + { // 15 + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1 + }, + + { // 16 + 0,0,0,0, + 1,0,0,0, + 1,1,1,0, + 1,1,1,1 + }, + + { // 17 + 0,1,1,1, + 0,0,0,1, + 0,0,0,0, + 0,0,0,0 + }, + + { // 18 + 0,0,0,0, + 0,0,0,0, + 1,0,0,0, + 1,1,1,0 + }, + + { // 19 + 0,1,1,1, + 0,0,1,1, + 0,0,0,1, + 0,0,0,0 + }, + + { // 20 + 0,0,1,1, + 0,0,0,1, + 0,0,0,0, + 0,0,0,0 + }, + + { // 21 + 0,0,0,0, + 1,0,0,0, + 1,1,0,0, + 1,1,1,0 + }, + + { // 22 + 0,0,0,0, + 0,0,0,0, + 1,0,0,0, + 1,1,0,0 + }, + + { // 23 + 0,1,1,1, + 0,0,1,1, + 0,0,1,1, + 0,0,0,1 + }, + + { // 24 + 0,0,1,1, + 0,0,0,1, + 0,0,0,1, + 0,0,0,0 + }, + + { // 25 + 0,0,0,0, + 1,0,0,0, + 1,0,0,0, + 1,1,0,0 + }, + + { // 26 + 0,1,1,0, + 0,1,1,0, + 0,1,1,0, + 0,1,1,0 + }, + + { // 27 + 0,0,1,1, + 0,1,1,0, + 0,1,1,0, + 1,1,0,0 + }, + + { // 28 + 0,0,0,1, + 0,1,1,1, + 1,1,1,0, + 1,0,0,0 + }, + + { // 29 + 0,0,0,0, + 1,1,1,1, + 1,1,1,1, + 0,0,0,0 + }, + + { // 30 + 0,1,1,1, + 0,0,0,1, + 1,0,0,0, + 1,1,1,0 + }, + + { // 31 + 0,0,1,1, + 1,0,0,1, + 1,0,0,1, + 1,1,0,0 + }, + // ----------- BC7 only shapes from here on ------------- + { // 32 + 0,1,0,1, + 0,1,0,1, + 0,1,0,1, + 0,1,0,1 + }, + + { // 33 + 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 1,1,1,1 + }, + + { // 34 + 0,1,0,1, + 1,0,1,0, + 0,1,0,1, + 1,0,1,0 + }, + + { // 35 + 0,0,1,1, + 0,0,1,1, + 1,1,0,0, + 1,1,0,0 + }, + + { // 36 + 0,0,1,1, + 1,1,0,0, + 0,0,1,1, + 1,1,0,0 + }, + + { // 37 + 0,1,0,1, + 0,1,0,1, + 1,0,1,0, + 1,0,1,0 + }, + + { // 38 + 0,1,1,0, + 1,0,0,1, + 0,1,1,0, + 1,0,0,1 + }, + + { // 39 + 0,1,0,1, + 1,0,1,0, + 1,0,1,0, + 0,1,0,1 + }, + + { // 40 + 0,1,1,1, + 0,0,1,1, + 1,1,0,0, + 1,1,1,0 + }, + + { // 41 + 0,0,0,1, + 0,0,1,1, + 1,1,0,0, + 1,0,0,0 + }, + + { // 42 + 0,0,1,1, + 0,0,1,0, + 0,1,0,0, + 1,1,0,0 + }, + + { // 43 + 0,0,1,1, + 1,0,1,1, + 1,1,0,1, + 1,1,0,0 + }, + + { // 44 + 0,1,1,0, + 1,0,0,1, + 1,0,0,1, + 0,1,1,0 + }, + + { // 45 + 0,0,1,1, + 1,1,0,0, + 1,1,0,0, + 0,0,1,1 + }, + + { // 46 + 0,1,1,0, + 0,1,1,0, + 1,0,0,1, + 1,0,0,1 + }, + + { // 47 + 0,0,0,0, + 0,1,1,0, + 0,1,1,0, + 0,0,0,0 + }, + + { // 48 + 0,1,0,0, + 1,1,1,0, + 0,1,0,0, + 0,0,0,0 + }, + + { // 49 + 0,0,1,0, + 0,1,1,1, + 0,0,1,0, + 0,0,0,0 + }, + + { // 50 + 0,0,0,0, + 0,0,1,0, + 0,1,1,1, + 0,0,1,0 + }, + + { // 51 + 0,0,0,0, + 0,1,0,0, + 1,1,1,0, + 0,1,0,0 + }, + + { // 52 + 0,1,1,0, + 1,1,0,0, + 1,0,0,1, + 0,0,1,1 + }, + + { // 53 + 0,0,1,1, + 0,1,1,0, + 1,1,0,0, + 1,0,0,1 + }, + + { // 54 + 0,1,1,0, + 0,0,1,1, + 1,0,0,1, + 1,1,0,0 + }, + + { // 55 + 0,0,1,1, + 1,0,0,1, + 1,1,0,0, + 0,1,1,0 + }, + + { // 56 + 0,1,1,0, + 1,1,0,0, + 1,1,0,0, + 1,0,0,1 + }, + + { // 57 + 0,1,1,0, + 0,0,1,1, + 0,0,1,1, + 1,0,0,1 + }, + + { // 58 + 0,1,1,1, + 1,1,1,0, + 1,0,0,0, + 0,0,0,1 + }, + + { // 59 + 0,0,0,1, + 1,0,0,0, + 1,1,1,0, + 0,1,1,1 + }, + + { // 60 + 0,0,0,0, + 1,1,1,1, + 0,0,1,1, + 0,0,1,1 + }, + + { // 61 + 0,0,1,1, + 0,0,1,1, + 1,1,1,1, + 0,0,0,0 + }, + + { // 62 + 0,0,1,0, + 0,0,1,0, + 1,1,1,0, + 1,1,1,0 + }, + + { // 63 + 0,1,0,0, + 0,1,0,0, + 0,1,1,1, + 0,1,1,1 + }, + }, + + + // Table.P3 - only for BC7 + + { + + { + 0,0,1,1, + 0,0,1,1, + 0,2,2,1, + 2,2,2,2 + }, + + { + 0,0,0,1, + 0,0,1,1, + 2,2,1,1, + 2,2,2,1 + }, + + { + 0,0,0,0, + 2,0,0,1, + 2,2,1,1, + 2,2,1,1 + }, + + { + 0,2,2,2, + 0,0,2,2, + 0,0,1,1, + 0,1,1,1 + }, + + { + 0,0,0,0, + 0,0,0,0, + 1,1,2,2, + 1,1,2,2 + }, + + { + 0,0,1,1, + 0,0,1,1, + 0,0,2,2, + 0,0,2,2 + }, + + { + 0,0,2,2, + 0,0,2,2, + 1,1,1,1, + 1,1,1,1 + }, + + { + 0,0,1,1, + 0,0,1,1, + 2,2,1,1, + 2,2,1,1 + }, + + { + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + 2,2,2,2 + }, + + { + 0,0,0,0, + 1,1,1,1, + 1,1,1,1, + 2,2,2,2 + }, + + { + 0,0,0,0, + 1,1,1,1, + 2,2,2,2, + 2,2,2,2 + }, + + { + 0,0,1,2, + 0,0,1,2, + 0,0,1,2, + 0,0,1,2 + }, + + { + 0,1,1,2, + 0,1,1,2, + 0,1,1,2, + 0,1,1,2 + }, + + { + 0,1,2,2, + 0,1,2,2, + 0,1,2,2, + 0,1,2,2 + }, + + { + 0,0,1,1, + 0,1,1,2, + 1,1,2,2, + 1,2,2,2 + }, + + { + 0,0,1,1, + 2,0,0,1, + 2,2,0,0, + 2,2,2,0 + }, + + { + 0,0,0,1, + 0,0,1,1, + 0,1,1,2, + 1,1,2,2 + }, + + { + 0,1,1,1, + 0,0,1,1, + 2,0,0,1, + 2,2,0,0 + }, + + { + 0,0,0,0, + 1,1,2,2, + 1,1,2,2, + 1,1,2,2 + }, + + { + 0,0,2,2, + 0,0,2,2, + 0,0,2,2, + 1,1,1,1 + }, + + { + 0,1,1,1, + 0,1,1,1, + 0,2,2,2, + 0,2,2,2 + }, + + { + 0,0,0,1, + 0,0,0,1, + 2,2,2,1, + 2,2,2,1 + }, + + { + 0,0,0,0, + 0,0,1,1, + 0,1,2,2, + 0,1,2,2 + }, + + { + 0,0,0,0, + 1,1,0,0, + 2,2,1,0, + 2,2,1,0 + }, + + { + 0,1,2,2, + 0,1,2,2, + 0,0,1,1, + 0,0,0,0 + }, + + { + 0,0,1,2, + 0,0,1,2, + 1,1,2,2, + 2,2,2,2 + }, + + { + 0,1,1,0, + 1,2,2,1, + 1,2,2,1, + 0,1,1,0 + }, + + { + 0,0,0,0, + 0,1,1,0, + 1,2,2,1, + 1,2,2,1 + }, + + { + 0,0,2,2, + 1,1,0,2, + 1,1,0,2, + 0,0,2,2 + }, + + { + 0,1,1,0, + 0,1,1,0, + 2,0,0,2, + 2,2,2,2 + }, + + { + 0,0,1,1, + 0,1,2,2, + 0,1,2,2, + 0,0,1,1 + }, + + { + 0,0,0,0, + 2,0,0,0, + 2,2,1,1, + 2,2,2,1 + }, + + { + 0,0,0,0, + 0,0,0,2, + 1,1,2,2, + 1,2,2,2 + }, + + { + 0,2,2,2, + 0,0,2,2, + 0,0,1,2, + 0,0,1,1 + }, + + { + 0,0,1,1, + 0,0,1,2, + 0,0,2,2, + 0,2,2,2 + }, + + { + 0,1,2,0, + 0,1,2,0, + 0,1,2,0, + 0,1,2,0 + }, + + { + 0,0,0,0, + 1,1,1,1, + 2,2,2,2, + 0,0,0,0 + }, + + { + 0,1,2,0, + 1,2,0,1, + 2,0,1,2, + 0,1,2,0 + }, + + { + 0,1,2,0, + 2,0,1,2, + 1,2,0,1, + 0,1,2,0 + }, + + { + 0,0,1,1, + 2,2,0,0, + 1,1,2,2, + 0,0,1,1 + }, + + { + 0,0,1,1, + 1,1,2,2, + 2,2,0,0, + 0,0,1,1 + }, + + { + 0,1,0,1, + 0,1,0,1, + 2,2,2,2, + 2,2,2,2 + }, + + { + 0,0,0,0, + 0,0,0,0, + 2,1,2,1, + 2,1,2,1 + }, + + { + 0,0,2,2, + 1,1,2,2, + 0,0,2,2, + 1,1,2,2 + }, + + { + 0,0,2,2, + 0,0,1,1, + 0,0,2,2, + 0,0,1,1 + }, + + { + 0,2,2,0, + 1,2,2,1, + 0,2,2,0, + 1,2,2,1 + }, + + { + 0,1,0,1, + 2,2,2,2, + 2,2,2,2, + 0,1,0,1 + }, + + { + 0,0,0,0, + 2,1,2,1, + 2,1,2,1, + 2,1,2,1 + }, + + { + 0,1,0,1, + 0,1,0,1, + 0,1,0,1, + 2,2,2,2 + }, + + { + 0,2,2,2, + 0,1,1,1, + 0,2,2,2, + 0,1,1,1 + }, + + { + 0,0,0,2, + 1,1,1,2, + 0,0,0,2, + 1,1,1,2 + }, + + { + 0,0,0,0, + 2,1,1,2, + 2,1,1,2, + 2,1,1,2 + }, + + { + 0,2,2,2, + 0,1,1,1, + 0,1,1,1, + 0,2,2,2 + }, + + { + 0,0,0,2, + 1,1,1,2, + 1,1,1,2, + 0,0,0,2 + }, + + { + 0,1,1,0, + 0,1,1,0, + 0,1,1,0, + 2,2,2,2 + }, + + { + 0,0,0,0, + 0,0,0,0, + 2,1,1,2, + 2,1,1,2 + }, + + { + 0,1,1,0, + 0,1,1,0, + 2,2,2,2, + 2,2,2,2 + }, + + { + 0,0,2,2, + 0,0,1,1, + 0,0,1,1, + 0,0,2,2 + }, + + { + 0,0,2,2, + 1,1,2,2, + 1,1,2,2, + 0,0,2,2 + }, + + { + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 2,1,1,2 + }, + + { + 0,0,0,2, + 0,0,0,1, + 0,0,0,2, + 0,0,0,1 + }, + + { + 0,2,2,2, + 1,2,2,2, + 0,2,2,2, + 1,2,2,2 + }, + + { + 0,1,0,1, + 2,2,2,2, + 2,2,2,2, + 2,2,2,2 + }, + + { + 0,1,1,1, + 2,0,1,1, + 2,2,0,1, + 2,2,2,0 + }, + }, +}; +#endif + +#endif // !ASPM_GPU + + + +#endif diff --git a/extern/CMP_Core/shaders/BCn_Common_Kernel.h b/extern/CMP_Core/shaders/BCn_Common_Kernel.h new file mode 100644 index 0000000..e9db4a3 --- /dev/null +++ b/extern/CMP_Core/shaders/BCn_Common_Kernel.h @@ -0,0 +1,2360 @@ +//===================================================================== +// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef _BCn_Common_Kernel_H +#define _BCn_Common_Kernel_H + +#include "Common_Def.h" + +#ifndef ASPM_GPU +#if defined(WIN32) || defined(_WIN64) +#define ALIGN_16 __declspec(align(16)) +#else // !WIN32 && !_WIN64 +#define ALIGN_16 +#endif // !WIN32 && !_WIN64 +#else +#define ALIGN_16 +#endif + +#define DXTC_OFFSET_ALPHA 0 +#define DXTC_OFFSET_RGB 2 + +#define RC 2 +#define GC 1 +#define BC 0 +#define AC 3 + +/* +Channel Bits +*/ +#define RG 5 +#define GG 6 +#define BG 5 + +#define RGBA8888_CHANNEL_A 3 +#define RGBA8888_CHANNEL_R 2 +#define RGBA8888_CHANNEL_G 1 +#define RGBA8888_CHANNEL_B 0 +#define RGBA8888_OFFSET_A (RGBA8888_CHANNEL_A * 8) +#define RGBA8888_OFFSET_R (RGBA8888_CHANNEL_R * 8) +#define RGBA8888_OFFSET_G (RGBA8888_CHANNEL_G * 8) +#define RGBA8888_OFFSET_B (RGBA8888_CHANNEL_B * 8) + +#define MAX_BLOCK 64 +#define BLOCK_SIZE MAX_BLOCK + +#ifndef MAX_ERROR +#define MAX_ERROR 128000.f +#endif + +#define MAX_BLOCK 64 +#define MAX_POINTS 16 +#define BLOCK_SIZE MAX_BLOCK +#define NUM_CHANNELS 4 +#define NUM_ENDPOINTS 2 +#define BLOCK_SIZE_4X4 16 + +#define ConstructColour(r, g, b) (((r) << 11) | ((g) << 5) | (b)) + +// Find the first approximation of the line +// Assume there is a linear relation +// Z = a * X_In +// Z = b * Y_In +// Find a,b to minimize MSE between Z and Z_In +#define EPS (2.f / 255.f) * (2.f / 255.f) +#define EPS2 3.f * (2.f / 255.f) * (2.f / 255.f) + +// Grid precision +#define PIX_GRID 8 + +#define BYTE_MASK 0x00ff + +CMP_CONSTANT CGU_UINT8 nByteBitsMask[9] = {0x00, 0x80, 0xc0, 0xe0, 0xf0, + 0xf8, 0xfc, 0xfe, 0xff}; +CMP_CONSTANT CGU_DWORD dwRndAmount[9] = {0, 0, 0, 0, 1, 1, 2, 2, 3}; + +#define _INT_GRID (_bFixedRamp && _FracPrc == 0) +#define SCH_STPS 3 // number of search steps to make at each end of interval +static CMP_CONSTANT CGU_FLOAT sMvF[] = {0.f, -1.f, 1.f, -2.f, 2.f, -3.f, + 3.f, -4.f, 4.f, -5.f, 5.f, -6.f, + 6.f, -7.f, 7.f, -8.f, 8.f}; + +#ifndef GBL_SCH_STEP +#define GBL_SCH_STEP_MXS 0.018f +#define GBL_SCH_EXT_MXS 0.1f +#define LCL_SCH_STEP_MXS 0.6f +#define GBL_SCH_STEP_MXQ 0.0175f +#define GBL_SCH_EXT_MXQ 0.154f +#define LCL_SCH_STEP_MXQ 0.45f + +#define GBL_SCH_STEP GBL_SCH_STEP_MXS +#define GBL_SCH_EXT GBL_SCH_EXT_MXS +#define LCL_SCH_STEP LCL_SCH_STEP_MXS +#endif + +typedef struct { + CGU_UINT32 data; + CGU_UINT32 index; +} CMP_di; + +typedef struct { + CGU_FLOAT data; + CGU_UINT32 index; +} CMP_df; + +typedef struct { + // user setable + CGU_FLOAT m_fquality; + CGU_FLOAT m_fChannelWeights[3]; + CGU_BOOL m_bUseChannelWeighting; + CGU_BOOL m_bUseAdaptiveWeighting; + CGU_BOOL m_bUseFloat; + CGU_BOOL m_b3DRefinement; + CGU_UINT8 m_nRefinementSteps; + CGU_UINT8 m_nAlphaThreshold; + + CGU_BOOL m_mapDecodeRGBA; + + // ?? Remove this + CGU_UINT32 m_src_width; + CGU_UINT32 m_src_height; +} CMP_BC15Options; + +//---------------------------------------- Common Code ------------------------------------------------------- + +static void SetDefaultBC15Options(CMP_BC15Options *BC15Options) { + if (BC15Options) { + BC15Options->m_fquality = 1.0f; + BC15Options->m_bUseChannelWeighting = false; + BC15Options->m_bUseAdaptiveWeighting = false; + BC15Options->m_fChannelWeights[0] = 0.3086f; + BC15Options->m_fChannelWeights[1] = 0.6094f; + BC15Options->m_fChannelWeights[2] = 0.0820f; + BC15Options->m_nAlphaThreshold = 128; + BC15Options->m_bUseFloat = false; + BC15Options->m_b3DRefinement = false; + BC15Options->m_nRefinementSteps = 1; + BC15Options->m_src_width = 4; + BC15Options->m_src_height = 4; +#ifdef CMP_SET_BC13_DECODER_RGBA + BC15Options->m_mapDecodeRGBA = true; +#else + BC15Options->m_mapDecodeRGBA = false; +#endif + } +} + +inline CGU_UINT8 minb(CGU_UINT8 a, CGU_UINT8 b) { return a < b ? a : b; } +inline CGU_FLOAT minf(CGU_FLOAT a, CGU_FLOAT b) { return a < b ? a : b; } +inline CGU_FLOAT maxf(CGU_FLOAT a, CGU_FLOAT b) { return a > b ? a : b; } + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +static void CalculateColourWeightings(CGU_UINT8 block[BLOCK_SIZE_4X4X4], + CMP_GLOBAL CMP_BC15Options *BC15options) { + CGU_FLOAT fBaseChannelWeights[3] = {0.3086f, 0.6094f, 0.0820f}; + + if (!BC15options->m_bUseChannelWeighting) { + BC15options->m_fChannelWeights[0] = 1.0F; + BC15options->m_fChannelWeights[1] = 1.0F; + BC15options->m_fChannelWeights[2] = 1.0F; + return; + } + + if (BC15options->m_bUseAdaptiveWeighting) { + float medianR = 0.0f, medianG = 0.0f, medianB = 0.0f; + + for (CGU_UINT32 k = 0; k < BLOCK_SIZE_4X4; k++) { + CGU_DWORD R = (block[k] & 0xff0000) >> 16; + CGU_DWORD G = (block[k] & 0xff00) >> 8; + CGU_DWORD B = block[k] & 0xff; + + medianR += R; + medianG += G; + medianB += B; + } + + medianR /= BLOCK_SIZE_4X4; + medianG /= BLOCK_SIZE_4X4; + medianB /= BLOCK_SIZE_4X4; + + // Now skew the colour weightings based on the gravity center of the block + float largest = maxf(maxf(medianR, medianG), medianB); + + if (largest > 0) { + medianR /= largest; + medianG /= largest; + medianB /= largest; + } else + medianR = medianG = medianB = 1.0f; + + // Scale weightings back up to 1.0f + CGU_FLOAT fWeightScale = + 1.0f / (fBaseChannelWeights[0] + fBaseChannelWeights[1] + + fBaseChannelWeights[2]); + BC15options->m_fChannelWeights[0] = fBaseChannelWeights[0] * fWeightScale; + BC15options->m_fChannelWeights[1] = fBaseChannelWeights[1] * fWeightScale; + BC15options->m_fChannelWeights[2] = fBaseChannelWeights[2] * fWeightScale; + BC15options->m_fChannelWeights[0] = + ((BC15options->m_fChannelWeights[0] * 3 * medianR) + + BC15options->m_fChannelWeights[0]) * + 0.25f; + BC15options->m_fChannelWeights[1] = + ((BC15options->m_fChannelWeights[1] * 3 * medianG) + + BC15options->m_fChannelWeights[1]) * + 0.25f; + BC15options->m_fChannelWeights[2] = + ((BC15options->m_fChannelWeights[2] * 3 * medianB) + + BC15options->m_fChannelWeights[2]) * + 0.25f; + fWeightScale = 1.0f / (BC15options->m_fChannelWeights[0] + + BC15options->m_fChannelWeights[1] + + BC15options->m_fChannelWeights[2]); + BC15options->m_fChannelWeights[0] *= fWeightScale; + BC15options->m_fChannelWeights[1] *= fWeightScale; + BC15options->m_fChannelWeights[2] *= fWeightScale; + } else { + BC15options->m_fChannelWeights[0] = fBaseChannelWeights[0]; + BC15options->m_fChannelWeights[1] = fBaseChannelWeights[1]; + BC15options->m_fChannelWeights[2] = fBaseChannelWeights[2]; + } +} +#endif // !BC5 +#endif // !BC4 + +/*------------------------------------------------------------------------------------------------ +1 dim error +------------------------------------------------------------------------------------------------*/ +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) + +static CGU_FLOAT RampSrchW(CGU_FLOAT _Blck[MAX_BLOCK], + CGU_FLOAT _BlckErr[MAX_BLOCK], + CGU_FLOAT _Rpt[MAX_BLOCK], CGU_FLOAT _maxerror, + CGU_FLOAT _min_ex, CGU_FLOAT _max_ex, int _NmbClrs, + int _block) { + CGU_FLOAT error = 0; + CGU_FLOAT step = (_max_ex - _min_ex) / (_block - 1); + CGU_FLOAT step_h = step * (CGU_FLOAT)0.5; + CGU_FLOAT rstep = (CGU_FLOAT)1.0f / step; + + for (CGU_INT32 i = 0; i < _NmbClrs; i++) { + CGU_FLOAT v; + // Work out which value in the block this select + CGU_FLOAT del; + + if ((del = _Blck[i] - _min_ex) <= 0) + v = _min_ex; + else if (_Blck[i] - _max_ex >= 0) + v = _max_ex; + else + v = floor((del + step_h) * rstep) * step + _min_ex; + + // And accumulate the error + CGU_FLOAT d = (_Blck[i] - v); + d *= d; + CGU_FLOAT err = _Rpt[i] * d + _BlckErr[i]; + error += err; + if (_maxerror < error) { + error = _maxerror; + break; + } + } + return error; +} +#endif // !BC5 +#endif // BC4 + +/*------------------------------------------------------------------------------------------------ +// this is how the end points is going to be rounded in compressed format +------------------------------------------------------------------------------------------------*/ +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) + +static void MkRmpOnGrid(CGU_FLOAT _RmpF[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_FLOAT _MnMx[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_FLOAT _Min, CGU_FLOAT _Max, CGU_UINT8 nRedBits, + CGU_UINT8 nGreenBits, CGU_UINT8 nBlueBits) { + CGU_FLOAT Fctrs0[3]; + CGU_FLOAT Fctrs1[3]; + + Fctrs1[RC] = (CGU_FLOAT)(1 << nRedBits); + Fctrs1[GC] = (CGU_FLOAT)(1 << nGreenBits); + Fctrs1[BC] = (CGU_FLOAT)(1 << nBlueBits); + Fctrs0[RC] = (CGU_FLOAT)(1 << (PIX_GRID - nRedBits)); + Fctrs0[GC] = (CGU_FLOAT)(1 << (PIX_GRID - nGreenBits)); + Fctrs0[BC] = (CGU_FLOAT)(1 << (PIX_GRID - nBlueBits)); + + for (CGU_INT32 j = 0; j < 3; j++) { + for (CGU_INT32 k = 0; k < 2; k++) { + _RmpF[j][k] = floor(_MnMx[j][k]); + if (_RmpF[j][k] <= _Min) + _RmpF[j][k] = _Min; + else { + _RmpF[j][k] += + floor(128.f / Fctrs1[j]) - floor(_RmpF[j][k] / Fctrs1[j]); + _RmpF[j][k] = minf(_RmpF[j][k], _Max); + } + + _RmpF[j][k] = floor(_RmpF[j][k] / Fctrs0[j]) * Fctrs0[j]; + } + } +} +#endif // !BC5 +#endif // BC4 + +/*------------------------------------------------------------------------------------------------ +// this is how the end points is going to be look like when decompressed +------------------------------------------------------------------------------------------------*/ +inline void MkWkRmpPts(CGU_BOOL *_bEq, + CGU_FLOAT _OutRmpPts[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_FLOAT _InpRmpPts[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, + CGU_UINT8 nBlueBits) { + CGU_FLOAT Fctrs[3]; + Fctrs[RC] = (CGU_FLOAT)(1 << nRedBits); + Fctrs[GC] = (CGU_FLOAT)(1 << nGreenBits); + Fctrs[BC] = (CGU_FLOAT)(1 << nBlueBits); + + *_bEq = TRUE; + // find whether input ramp is flat + for (CGU_INT32 j = 0; j < 3; j++) + *_bEq &= (_InpRmpPts[j][0] == _InpRmpPts[j][1]); + + // end points on the integer grid + for (CGU_INT32 j = 0; j < 3; j++) { + for (CGU_INT32 k = 0; k < 2; k++) { + // Apply the lower bit replication to give full dynamic range + _OutRmpPts[j][k] = _InpRmpPts[j][k] + floor(_InpRmpPts[j][k] / Fctrs[j]); + _OutRmpPts[j][k] = maxf((CGU_FLOAT)_OutRmpPts[j][k], 0.f); + _OutRmpPts[j][k] = minf((CGU_FLOAT)_OutRmpPts[j][k], 255.f); + } + } +} + +/*------------------------------------------------------------------------------------------------ +1 DIM ramp +------------------------------------------------------------------------------------------------*/ + +inline void BldClrRmp(CGU_FLOAT _Rmp[MAX_POINTS], + CGU_FLOAT _InpRmp[NUM_ENDPOINTS], CGU_UINT8 dwNumPoints) { + // linear interpolate end points to get the ramp + _Rmp[0] = _InpRmp[0]; + _Rmp[dwNumPoints - 1] = _InpRmp[1]; + if (dwNumPoints % 2) + _Rmp[dwNumPoints] = + 1000000.f; // for 3 point ramp; not to select the 4th point as min + for (CGU_INT32 e = 1; e < dwNumPoints - 1; e++) + _Rmp[e] = floor((_Rmp[0] * (dwNumPoints - 1 - e) + + _Rmp[dwNumPoints - 1] * e + dwRndAmount[dwNumPoints]) / + (CGU_FLOAT)(dwNumPoints - 1)); +} + +/*------------------------------------------------------------------------------------------------ +// build 3D ramp +------------------------------------------------------------------------------------------------*/ +inline void BldRmp(CGU_FLOAT _Rmp[NUM_CHANNELS][MAX_POINTS], + CGU_FLOAT _InpRmp[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_UINT8 dwNumPoints) { + for (CGU_INT32 j = 0; j < 3; j++) BldClrRmp(_Rmp[j], _InpRmp[j], dwNumPoints); +} + +/*------------------------------------------------------------------------------------------------ +Compute cumulative error for the current cluster +------------------------------------------------------------------------------------------------*/ +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) + +static CGU_FLOAT ClstrErr(CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], + CGU_FLOAT _Rpt[MAX_BLOCK], + CGU_FLOAT _Rmp[NUM_CHANNELS][MAX_POINTS], + int _NmbClrs, int _blcktp, CGU_BOOL _ConstRamp, + CMP_GLOBAL const CMP_BC15Options *BC15options) { + CGU_FLOAT fError = 0.f; + int rmp_l = (_ConstRamp) ? 1 : _blcktp; + + // For each colour in the original block, find the closest cluster + // and compute the comulative error + for (CGU_INT32 i = 0; i < _NmbClrs; i++) { + CGU_FLOAT fShortest = 99999999999.f; + + if (BC15options->m_bUseChannelWeighting) + for (CGU_INT32 r = 0; r < rmp_l; r++) { + // calculate the distance for each component + CGU_FLOAT fDistance = + (_Blk[i][RC] - _Rmp[RC][r]) * (_Blk[i][RC] - _Rmp[RC][r]) * + BC15options->m_fChannelWeights[0] + + (_Blk[i][GC] - _Rmp[GC][r]) * (_Blk[i][GC] - _Rmp[GC][r]) * + BC15options->m_fChannelWeights[1] + + (_Blk[i][BC] - _Rmp[BC][r]) * (_Blk[i][BC] - _Rmp[BC][r]) * + BC15options->m_fChannelWeights[2]; + + if (fDistance < fShortest) fShortest = fDistance; + } + else + for (CGU_INT32 r = 0; r < rmp_l; r++) { + // calculate the distance for each component + CGU_FLOAT fDistance = + (_Blk[i][RC] - _Rmp[RC][r]) * (_Blk[i][RC] - _Rmp[RC][r]) + + (_Blk[i][GC] - _Rmp[GC][r]) * (_Blk[i][GC] - _Rmp[GC][r]) + + (_Blk[i][BC] - _Rmp[BC][r]) * (_Blk[i][BC] - _Rmp[BC][r]); + + if (fDistance < fShortest) fShortest = fDistance; + } + + // accumulate the error + fError += fShortest * _Rpt[i]; + } + + return fError; +} +#endif // !BC5 +#endif // !BC4 + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +static CGU_FLOAT Refine3D(CGU_FLOAT _OutRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_FLOAT _InpRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], + CGU_FLOAT _Rpt[MAX_BLOCK], int _NmrClrs, + CGU_UINT8 dwNumPoints, + CMP_GLOBAL const CMP_BC15Options *BC15options, + CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, + CGU_UINT8 nBlueBits, CGU_UINT8 nRefineSteps) { + ALIGN_16 CGU_FLOAT Rmp[NUM_CHANNELS][MAX_POINTS]; + + CGU_FLOAT Blk[MAX_BLOCK][NUM_CHANNELS]; + for (CGU_INT32 i = 0; i < _NmrClrs; i++) + for (CGU_INT32 j = 0; j < 3; j++) Blk[i][j] = _Blk[i][j]; + + CGU_FLOAT fWeightRed = BC15options->m_fChannelWeights[0]; + CGU_FLOAT fWeightGreen = BC15options->m_fChannelWeights[1]; + CGU_FLOAT fWeightBlue = BC15options->m_fChannelWeights[2]; + + // here is our grid + CGU_FLOAT Fctrs[3]; + Fctrs[RC] = (CGU_FLOAT)(1 << (PIX_GRID - nRedBits)); + Fctrs[GC] = (CGU_FLOAT)(1 << (PIX_GRID - nGreenBits)); + Fctrs[BC] = (CGU_FLOAT)(1 << (PIX_GRID - nBlueBits)); + + CGU_FLOAT InpRmp0[NUM_CHANNELS][NUM_ENDPOINTS]; + CGU_FLOAT InpRmp[NUM_CHANNELS][NUM_ENDPOINTS]; + for (CGU_INT32 k = 0; k < 2; k++) + for (CGU_INT32 j = 0; j < 3; j++) + InpRmp0[j][k] = InpRmp[j][k] = _OutRmpPnts[j][k] = _InpRmpPnts[j][k]; + + // make ramp endpoints the way they'll going to be decompressed + // plus check whether the ramp is flat + CGU_BOOL Eq; + CGU_FLOAT WkRmpPts[NUM_CHANNELS][NUM_ENDPOINTS]; + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + + // build ramp for all 3 colors + BldRmp(Rmp, WkRmpPts, dwNumPoints); + + // clusterize for the current ramp + CGU_FLOAT bestE = + ClstrErr(Blk, _Rpt, Rmp, _NmrClrs, dwNumPoints, Eq, BC15options); + if (bestE == 0.f || !nRefineSteps) // if exact, we've done + return bestE; + + // Jitter endpoints in each direction + int nRefineStart = 0 - (minb(nRefineSteps, (CGU_UINT8)8)); + int nRefineEnd = minb(nRefineSteps, (CGU_UINT8)8); + for (CGU_INT32 nJitterG0 = nRefineStart; nJitterG0 <= nRefineEnd; + nJitterG0++) { + InpRmp[GC][0] = + minf(maxf(InpRmp0[GC][0] + nJitterG0 * Fctrs[GC], 0.f), 255.f); + for (CGU_INT32 nJitterG1 = nRefineStart; nJitterG1 <= nRefineEnd; + nJitterG1++) { + InpRmp[GC][1] = + minf(maxf(InpRmp0[GC][1] + nJitterG1 * Fctrs[GC], 0.f), 255.f); + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + BldClrRmp(Rmp[GC], WkRmpPts[GC], dwNumPoints); + + CGU_FLOAT RmpErrG[MAX_POINTS][MAX_BLOCK]; + for (CGU_INT32 i = 0; i < _NmrClrs; i++) { + for (CGU_INT32 r = 0; r < dwNumPoints; r++) { + CGU_FLOAT DistG = (Rmp[GC][r] - Blk[i][GC]); + RmpErrG[r][i] = DistG * DistG * fWeightGreen; + } + } + + for (CGU_INT32 nJitterB0 = nRefineStart; nJitterB0 <= nRefineEnd; + nJitterB0++) { + InpRmp[BC][0] = + minf(maxf(InpRmp0[BC][0] + nJitterB0 * Fctrs[BC], 0.f), 255.f); + for (CGU_INT32 nJitterB1 = nRefineStart; nJitterB1 <= nRefineEnd; + nJitterB1++) { + InpRmp[BC][1] = + minf(maxf(InpRmp0[BC][1] + nJitterB1 * Fctrs[BC], 0.f), 255.f); + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + BldClrRmp(Rmp[BC], WkRmpPts[BC], dwNumPoints); + + CGU_FLOAT RmpErr[MAX_POINTS][MAX_BLOCK]; + for (CGU_INT32 i = 0; i < _NmrClrs; i++) { + for (CGU_INT32 r = 0; r < dwNumPoints; r++) { + CGU_FLOAT DistB = (Rmp[BC][r] - Blk[i][BC]); + RmpErr[r][i] = RmpErrG[r][i] + DistB * DistB * fWeightBlue; + } + } + + for (CGU_INT32 nJitterR0 = nRefineStart; nJitterR0 <= nRefineEnd; + nJitterR0++) { + InpRmp[RC][0] = + minf(maxf(InpRmp0[RC][0] + nJitterR0 * Fctrs[RC], 0.f), 255.f); + for (CGU_INT32 nJitterR1 = nRefineStart; nJitterR1 <= nRefineEnd; + nJitterR1++) { + InpRmp[RC][1] = minf( + maxf(InpRmp0[RC][1] + nJitterR1 * Fctrs[RC], 0.f), 255.f); + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, + nBlueBits); + BldClrRmp(Rmp[RC], WkRmpPts[RC], dwNumPoints); + + // compute cumulative error + CGU_FLOAT mse = 0.f; + int rmp_l = (Eq) ? 1 : dwNumPoints; + for (CGU_INT32 k = 0; k < _NmrClrs; k++) { + CGU_FLOAT MinErr = 10000000.f; + for (CGU_INT32 r = 0; r < rmp_l; r++) { + CGU_FLOAT Dist = (Rmp[RC][r] - Blk[k][RC]); + CGU_FLOAT Err = RmpErr[r][k] + Dist * Dist * fWeightRed; + MinErr = minf(MinErr, Err); + } + mse += MinErr * _Rpt[k]; + } + + // save if we achieve better result + if (mse < bestE) { + bestE = mse; + for (CGU_INT32 k = 0; k < 2; k++) + for (CGU_INT32 j = 0; j < 3; j++) + _OutRmpPnts[j][k] = InpRmp[j][k]; + } + } + } + } + } + } + } + + return bestE; +} +#endif // !BC5 +#endif // BC4 + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) + +static CGU_FLOAT Refine(CGU_FLOAT _OutRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_FLOAT _InpRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], + CGU_FLOAT _Rpt[MAX_BLOCK], int _NmrClrs, + CGU_UINT8 dwNumPoints, + CMP_GLOBAL const CMP_BC15Options *BC15options, + CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, + CGU_UINT8 nBlueBits, CGU_UINT8 nRefineSteps) { + ALIGN_16 CGU_FLOAT Rmp[NUM_CHANNELS][MAX_POINTS]; + + CGU_FLOAT Blk[MAX_BLOCK][NUM_CHANNELS]; + for (CGU_INT32 i = 0; i < _NmrClrs; i++) + for (CGU_INT32 j = 0; j < 3; j++) Blk[i][j] = _Blk[i][j]; + + CGU_FLOAT fWeightRed = BC15options->m_fChannelWeights[0]; + CGU_FLOAT fWeightGreen = BC15options->m_fChannelWeights[1]; + CGU_FLOAT fWeightBlue = BC15options->m_fChannelWeights[2]; + + // here is our grid + CGU_FLOAT Fctrs[3]; + Fctrs[RC] = (CGU_FLOAT)(1 << (PIX_GRID - nRedBits)); + Fctrs[GC] = (CGU_FLOAT)(1 << (PIX_GRID - nGreenBits)); + Fctrs[BC] = (CGU_FLOAT)(1 << (PIX_GRID - nBlueBits)); + + CGU_FLOAT InpRmp0[NUM_CHANNELS][NUM_ENDPOINTS]; + CGU_FLOAT InpRmp[NUM_CHANNELS][NUM_ENDPOINTS]; + for (CGU_INT32 k = 0; k < 2; k++) + for (CGU_INT32 j = 0; j < 3; j++) + InpRmp0[j][k] = InpRmp[j][k] = _OutRmpPnts[j][k] = _InpRmpPnts[j][k]; + + // make ramp endpoints the way they'll going to be decompressed + // plus check whether the ramp is flat + CGU_BOOL Eq; + CGU_FLOAT WkRmpPts[NUM_CHANNELS][NUM_ENDPOINTS]; + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + + // build ramp for all 3 colors + BldRmp(Rmp, WkRmpPts, dwNumPoints); + + // clusterize for the current ramp + CGU_FLOAT bestE = + ClstrErr(Blk, _Rpt, Rmp, _NmrClrs, dwNumPoints, Eq, BC15options); + if (bestE == 0.f || !nRefineSteps) // if exact, we've done + return bestE; + + // Tweak each component in isolation and get the best values + + // precompute ramp errors for Green and Blue + CGU_FLOAT RmpErr[MAX_POINTS][MAX_BLOCK]; + for (CGU_INT32 i = 0; i < _NmrClrs; i++) { + for (CGU_INT32 r = 0; r < dwNumPoints; r++) { + CGU_FLOAT DistG = (Rmp[GC][r] - Blk[i][GC]); + CGU_FLOAT DistB = (Rmp[BC][r] - Blk[i][BC]); + RmpErr[r][i] = DistG * DistG * fWeightGreen + DistB * DistB * fWeightBlue; + } + } + + // First Red + CGU_FLOAT bstC0 = InpRmp0[RC][0]; + CGU_FLOAT bstC1 = InpRmp0[RC][1]; + int nRefineStart = 0 - (minb(nRefineSteps, (CGU_UINT8)8)); + int nRefineEnd = minb(nRefineSteps, (CGU_UINT8)8); + for (CGU_INT32 i = nRefineStart; i <= nRefineEnd; i++) { + for (CGU_INT32 j = nRefineStart; j <= nRefineEnd; j++) { + // make a move; both sides of interval. + InpRmp[RC][0] = minf(maxf(InpRmp0[RC][0] + i * Fctrs[RC], 0.f), 255.f); + InpRmp[RC][1] = minf(maxf(InpRmp0[RC][1] + j * Fctrs[RC], 0.f), 255.f); + + // make ramp endpoints the way they'll going to be decompressed + // plus check whether the ramp is flat + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + + // build ramp only for red + BldClrRmp(Rmp[RC], WkRmpPts[RC], dwNumPoints); + + // compute cumulative error + CGU_FLOAT mse = 0.f; + int rmp_l = (Eq) ? 1 : dwNumPoints; + for (CGU_INT32 k = 0; k < _NmrClrs; k++) { + CGU_FLOAT MinErr = 10000000.f; + for (CGU_INT32 r = 0; r < rmp_l; r++) { + CGU_FLOAT Dist = (Rmp[RC][r] - Blk[k][RC]); + CGU_FLOAT Err = RmpErr[r][k] + Dist * Dist * fWeightRed; + MinErr = minf(MinErr, Err); + } + mse += MinErr * _Rpt[k]; + } + + // save if we achieve better result + if (mse < bestE) { + bstC0 = InpRmp[RC][0]; + bstC1 = InpRmp[RC][1]; + bestE = mse; + } + } + } + + // our best REDs + InpRmp[RC][0] = bstC0; + InpRmp[RC][1] = bstC1; + + // make ramp endpoints the way they'll going to be decompressed + // plus check whether the ramp is flat + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + + // build ramp only for green + BldRmp(Rmp, WkRmpPts, dwNumPoints); + + // precompute ramp errors for Red and Blue + for (CGU_INT32 i = 0; i < _NmrClrs; i++) { + for (CGU_INT32 r = 0; r < dwNumPoints; r++) { + CGU_FLOAT DistR = (Rmp[RC][r] - Blk[i][RC]); + CGU_FLOAT DistB = (Rmp[BC][r] - Blk[i][BC]); + RmpErr[r][i] = DistR * DistR * fWeightRed + DistB * DistB * fWeightBlue; + } + } + + // Now green + bstC0 = InpRmp0[GC][0]; + bstC1 = InpRmp0[GC][1]; + for (CGU_INT32 i = nRefineStart; i <= nRefineEnd; i++) { + for (CGU_INT32 j = nRefineStart; j <= nRefineEnd; j++) { + InpRmp[GC][0] = minf(maxf(InpRmp0[GC][0] + i * Fctrs[GC], 0.f), 255.f); + InpRmp[GC][1] = minf(maxf(InpRmp0[GC][1] + j * Fctrs[GC], 0.f), 255.f); + + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + BldClrRmp(Rmp[GC], WkRmpPts[GC], dwNumPoints); + + CGU_FLOAT mse = 0.f; + int rmp_l = (Eq) ? 1 : dwNumPoints; + for (CGU_INT32 k = 0; k < _NmrClrs; k++) { + CGU_FLOAT MinErr = 10000000.f; + for (CGU_INT32 r = 0; r < rmp_l; r++) { + CGU_FLOAT Dist = (Rmp[GC][r] - Blk[k][GC]); + CGU_FLOAT Err = RmpErr[r][k] + Dist * Dist * fWeightGreen; + MinErr = minf(MinErr, Err); + } + mse += MinErr * _Rpt[k]; + } + + if (mse < bestE) { + bstC0 = InpRmp[GC][0]; + bstC1 = InpRmp[GC][1]; + bestE = mse; + } + } + } + + // our best GREENs + InpRmp[GC][0] = bstC0; + InpRmp[GC][1] = bstC1; + + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + BldRmp(Rmp, WkRmpPts, dwNumPoints); + + // ramp err for Red and Green + for (CGU_INT32 i = 0; i < _NmrClrs; i++) { + for (CGU_INT32 r = 0; r < dwNumPoints; r++) { + CGU_FLOAT DistR = (Rmp[RC][r] - Blk[i][RC]); + CGU_FLOAT DistG = (Rmp[GC][r] - Blk[i][GC]); + RmpErr[r][i] = DistR * DistR * fWeightRed + DistG * DistG * fWeightGreen; + } + } + + bstC0 = InpRmp0[BC][0]; + bstC1 = InpRmp0[BC][1]; + // Now blue + for (CGU_INT32 i = nRefineStart; i <= nRefineEnd; i++) { + for (CGU_INT32 j = nRefineStart; j <= nRefineEnd; j++) { + InpRmp[BC][0] = minf(maxf(InpRmp0[BC][0] + i * Fctrs[BC], 0.f), 255.f); + InpRmp[BC][1] = minf(maxf(InpRmp0[BC][1] + j * Fctrs[BC], 0.f), 255.f); + + MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + BldClrRmp(Rmp[BC], WkRmpPts[BC], dwNumPoints); + + CGU_FLOAT mse = 0.f; + int rmp_l = (Eq) ? 1 : dwNumPoints; + for (CGU_INT32 k = 0; k < _NmrClrs; k++) { + CGU_FLOAT MinErr = 10000000.f; + for (CGU_INT32 r = 0; r < rmp_l; r++) { + CGU_FLOAT Dist = (Rmp[BC][r] - Blk[k][BC]); + CGU_FLOAT Err = RmpErr[r][k] + Dist * Dist * fWeightBlue; + MinErr = minf(MinErr, Err); + } + mse += MinErr * _Rpt[k]; + } + + if (mse < bestE) { + bstC0 = InpRmp[BC][0]; + bstC1 = InpRmp[BC][1]; + bestE = mse; + } + } + } + + // our best BLUEs + InpRmp[BC][0] = bstC0; + InpRmp[BC][1] = bstC1; + + // return our best choice + for (CGU_INT32 j = 0; j < 3; j++) + for (CGU_INT32 k = 0; k < 2; k++) _OutRmpPnts[j][k] = InpRmp[j][k]; + + return bestE; +} +#endif // !BC5 +#endif //! BC4 + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) + +static CGU_DWORD ConstructColor(CGU_UINT8 R, CGU_UINT8 nRedBits, CGU_UINT8 G, + CGU_UINT8 nGreenBits, CGU_UINT8 B, + CGU_UINT8 nBlueBits) { + return (((R & nByteBitsMask[nRedBits]) + << (nGreenBits + nBlueBits - (PIX_GRID - nRedBits))) | + ((G & nByteBitsMask[nGreenBits]) + << (nBlueBits - (PIX_GRID - nGreenBits))) | + ((B & nByteBitsMask[nBlueBits]) >> ((PIX_GRID - nBlueBits)))); +} +#endif // !BC5 +#endif // !BC4 + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +// Compute error and find DXTC indexes for the current cluster +static CGU_FLOAT ClstrIntnl(CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], + CGU_UINT8 *_Indxs, + CGU_FLOAT _Rmp[NUM_CHANNELS][MAX_POINTS], + int dwBlockSize, CGU_UINT8 dwNumPoints, + CGU_BOOL _ConstRamp, + CMP_GLOBAL const CMP_BC15Options *BC15options, + CGU_BOOL _bUseAlpha) { + CGU_FLOAT Err = 0.f; + CGU_UINT8 rmp_l = (_ConstRamp) ? 1 : dwNumPoints; + + // For each colour in the original block assign it + // to the closest cluster and compute the cumulative error + for (CGU_INT32 i = 0; i < dwBlockSize; i++) { + if (_bUseAlpha && *((CGU_DWORD *)&_Blk[i][AC]) == 0) + _Indxs[i] = dwNumPoints; + else { + CGU_FLOAT shortest = 99999999999.f; + CGU_UINT8 shortestIndex = 0; + if (BC15options) + for (CGU_UINT8 r = 0; r < rmp_l; r++) { + // calculate the distance for each component + CGU_FLOAT distance = + (_Blk[i][RC] - _Rmp[RC][r]) * (_Blk[i][RC] - _Rmp[RC][r]) * + BC15options->m_fChannelWeights[0] + + (_Blk[i][GC] - _Rmp[GC][r]) * (_Blk[i][GC] - _Rmp[GC][r]) * + BC15options->m_fChannelWeights[1] + + (_Blk[i][BC] - _Rmp[BC][r]) * (_Blk[i][BC] - _Rmp[BC][r]) * + BC15options->m_fChannelWeights[2]; + + if (distance < shortest) { + shortest = distance; + shortestIndex = r; + } + } + else + for (CGU_UINT8 r = 0; r < rmp_l; r++) { + // calculate the distance for each component + CGU_FLOAT distance = + (_Blk[i][RC] - _Rmp[RC][r]) * (_Blk[i][RC] - _Rmp[RC][r]) + + (_Blk[i][GC] - _Rmp[GC][r]) * (_Blk[i][GC] - _Rmp[GC][r]) + + (_Blk[i][BC] - _Rmp[BC][r]) * (_Blk[i][BC] - _Rmp[BC][r]); + + if (distance < shortest) { + shortest = distance; + shortestIndex = r; + } + } + + Err += shortest; + + // We have the index of the best cluster, so assign this in the block + // Reorder indices to match correct DXTC ordering + if (shortestIndex == dwNumPoints - 1) + shortestIndex = 1; + else if (shortestIndex) + shortestIndex++; + _Indxs[i] = shortestIndex; + } + } + + return Err; +} +#endif // !BC5 +#endif // !BC4 + +/*------------------------------------------------------------------------------------------------ +// input ramp is on the coarse grid +------------------------------------------------------------------------------------------------*/ +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +static CGU_FLOAT ClstrBas(CGU_UINT8 *_Indxs, + CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], + CGU_FLOAT _InpRmp[NUM_CHANNELS][NUM_ENDPOINTS], + int dwBlockSize, CGU_UINT8 dwNumPoints, + CMP_GLOBAL const CMP_BC15Options *BC15options, + CGU_BOOL _bUseAlpha, CGU_UINT8 nRedBits, + CGU_UINT8 nGreenBits, CGU_UINT8 nBlueBits) { + // make ramp endpoints the way they'll going to be decompressed + CGU_BOOL Eq = TRUE; + CGU_FLOAT InpRmp[NUM_CHANNELS][NUM_ENDPOINTS]; + MkWkRmpPts(&Eq, InpRmp, _InpRmp, nRedBits, nGreenBits, nBlueBits); + + // build ramp as it would be built by decompressor + CGU_FLOAT Rmp[NUM_CHANNELS][MAX_POINTS]; + BldRmp(Rmp, InpRmp, dwNumPoints); + + // clusterize and find a cumulative error + return ClstrIntnl(_Blk, _Indxs, Rmp, dwBlockSize, dwNumPoints, Eq, + BC15options, _bUseAlpha); +} +#endif // !BC5 +#endif // !BC4 + +/*------------------------------------------------------------------------------------------------ +Clusterization the way it looks from the DXTC decompressor +------------------------------------------------------------------------------------------------*/ +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +static CGU_FLOAT Clstr(CGU_UINT32 block_32[MAX_BLOCK], CGU_UINT32 dwBlockSize, + CGU_UINT8 nEndpoints[3][NUM_ENDPOINTS], + CGU_UINT8 *pcIndices, CGU_UINT8 dwNumPoints, + CMP_GLOBAL const CMP_BC15Options *BC15options, + CGU_BOOL _bUseAlpha, CGU_UINT8 _nAlphaThreshold, + CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, + CGU_UINT8 nBlueBits) { + CGU_INT32 c0 = ConstructColor(nEndpoints[RC][0], nRedBits, nEndpoints[GC][0], + nGreenBits, nEndpoints[BC][0], nBlueBits); + CGU_INT32 c1 = ConstructColor(nEndpoints[RC][1], nRedBits, nEndpoints[GC][1], + nGreenBits, nEndpoints[BC][1], nBlueBits); + CGU_INT32 nEndpointIndex0 = 0; + CGU_INT32 nEndpointIndex1 = 1; + if ((!(dwNumPoints & 0x1) && c0 <= c1) || ((dwNumPoints & 0x1) && c0 > c1)) { + nEndpointIndex0 = 1; + nEndpointIndex1 = 0; + } + + CGU_FLOAT InpRmp[NUM_CHANNELS][NUM_ENDPOINTS]; + InpRmp[RC][0] = (CGU_FLOAT)nEndpoints[RC][nEndpointIndex0]; + InpRmp[RC][1] = (CGU_FLOAT)nEndpoints[RC][nEndpointIndex1]; + InpRmp[GC][0] = (CGU_FLOAT)nEndpoints[GC][nEndpointIndex0]; + InpRmp[GC][1] = (CGU_FLOAT)nEndpoints[GC][nEndpointIndex1]; + InpRmp[BC][0] = (CGU_FLOAT)nEndpoints[BC][nEndpointIndex0]; + InpRmp[BC][1] = (CGU_FLOAT)nEndpoints[BC][nEndpointIndex1]; + + CGU_UINT32 dwAlphaThreshold = _nAlphaThreshold << 24; + CGU_FLOAT Blk[MAX_BLOCK][NUM_CHANNELS]; + for (CGU_UINT32 i = 0; i < dwBlockSize; i++) { + Blk[i][RC] = (CGU_FLOAT)((block_32[i] & 0xff0000) >> 16); + Blk[i][GC] = (CGU_FLOAT)((block_32[i] & 0xff00) >> 8); + Blk[i][BC] = (CGU_FLOAT)(block_32[i] & 0xff); + if (_bUseAlpha) + Blk[i][AC] = ((block_32[i] & 0xff000000) >= dwAlphaThreshold) ? 1.f : 0.f; + } + + return ClstrBas(pcIndices, Blk, InpRmp, dwBlockSize, dwNumPoints, BC15options, + _bUseAlpha, nRedBits, nGreenBits, nBlueBits); +} +#endif // !BC5 +#endif // !BC4 + +//---------------------------------------------------- +// This function decompresses a DXT colour block +// The block is decompressed to 8 bits per channel +// Result buffer is RGBA format +//---------------------------------------------------- +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +#ifndef ASPM_GPU +static void DecompressDXTRGB_Internal(CGU_UINT8 rgbBlock[BLOCK_SIZE_4X4X4], + const CGU_UINT32 compressedBlock[2], + const CMP_BC15Options *BC15options) { + + CGU_BOOL bDXT1 = TRUE; + CGU_UINT32 n0 = compressedBlock[0] & 0xffff; + CGU_UINT32 n1 = compressedBlock[0] >> 16; + CGU_UINT32 r0; + CGU_UINT32 g0; + CGU_UINT32 b0; + CGU_UINT32 r1; + CGU_UINT32 g1; + CGU_UINT32 b1; + + r0 = ((n0 & 0xf800) >> 8); + g0 = ((n0 & 0x07e0) >> 3); + b0 = ((n0 & 0x001f) << 3); + + r1 = ((n1 & 0xf800) >> 8); + g1 = ((n1 & 0x07e0) >> 3); + b1 = ((n1 & 0x001f) << 3); + + // Apply the lower bit replication to give full dynamic range + r0 += (r0 >> 5); + r1 += (r1 >> 5); + g0 += (g0 >> 6); + g1 += (g1 >> 6); + b0 += (b0 >> 5); + b1 += (b1 >> 5); + +if (!BC15options->m_mapDecodeRGBA) +{ + //-------------------------------------------------------------- + // Channel mapping output as BGRA + //-------------------------------------------------------------- + CGU_UINT32 c0 = 0xff000000 | (r0<<16) | (g0<<8) | b0; + CGU_UINT32 c1 = 0xff000000 | (r1<<16) | (g1<<8) | b1; + + if(!bDXT1 || n0 > n1) + { + CGU_UINT32 c2 = 0xff000000 | (((2*r0+r1+1)/3)<<16) | (((2*g0+g1+1)/3)<<8) | (((2*b0+b1+1)/3)); + CGU_UINT32 c3 = 0xff000000 | (((2*r1+r0+1)/3)<<16) | (((2*g1+g0+1)/3)<<8) | (((2*b1+b0+1)/3)); + + for(int i=0; i<16; i++) + { + int index = (compressedBlock[1] >> (2 * i)) & 3; + + switch(index) + { + case 0: + ((CGU_UINT32*)rgbBlock)[i] = c0; + break; + case 1: + ((CGU_UINT32*)rgbBlock)[i] = c1; + break; + case 2: + ((CGU_UINT32*)rgbBlock)[i] = c2; + break; + case 3: + ((CGU_UINT32*)rgbBlock)[i] = c3; + break; + } + } + } + else + { + // Transparent decode + CGU_UINT32 c2 = 0xff000000 | (((r0+r1)/2)<<16) | (((g0+g1)/2)<<8) | (((b0+b1)/2)); + + for(int i=0; i<16; i++) + { + int index = (compressedBlock[1] >> (2 * i)) & 3; + + switch(index) + { + case 0: + ((CGU_UINT32*)rgbBlock)[i] = c0; + break; + case 1: + ((CGU_UINT32*)rgbBlock)[i] = c1; + break; + case 2: + ((CGU_UINT32*)rgbBlock)[i] = c2; + break; + case 3: + ((CGU_UINT32*)rgbBlock)[i] = 0x00000000; + break; + } + } + } +} +else { // MAP_BC15_TO_ABGR + //-------------------------------------------------------------- + // Channel mapping output as ARGB + //-------------------------------------------------------------- + + CGU_UINT32 c0 = 0xff000000 | (b0 << 16) | (g0 << 8) | r0; + CGU_UINT32 c1 = 0xff000000 | (b1 << 16) | (g1 << 8) | r1; + + if (!bDXT1 || n0 > n1) { + CGU_UINT32 c2 = 0xff000000 | (((2 * b0 + b1 + 1) / 3) << 16) | + (((2 * g0 + g1 + 1) / 3) << 8) | (((2 * r0 + r1 + 1) / 3)); + CGU_UINT32 c3 = 0xff000000 | (((2 * b1 + b0 + 1) / 3) << 16) | + (((2 * g1 + g0 + 1) / 3) << 8) | (((2 * r1 + r0 + 1) / 3)); + + for (int i = 0; i < 16; i++) { + int index = (compressedBlock[1] >> (2 * i)) & 3; + switch (index) { + case 0: + ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c0; + break; + case 1: + ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c1; + break; + case 2: + ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c2; + break; + case 3: + ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c3; + break; + } + } + } else { + // Transparent decode + CGU_UINT32 c2 = 0xff000000 | (((b0 + b1) / 2) << 16) | + (((g0 + g1) / 2) << 8) | (((r0 + r1) / 2)); + + for (int i = 0; i < 16; i++) { + int index = (compressedBlock[1] >> (2 * i)) & 3; + switch (index) { + case 0: + ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c0; + break; + case 1: + ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c1; + break; + case 2: + ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c2; + break; + case 3: + ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = 0x00000000; + break; + } + } + } +} //MAP_ABGR +} +#endif // !ASPM_GPU +#endif // !BC5 +#endif // !BC4 + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +static int QSortIntCmp(const void *Elem1, const void *Elem2) { + return (*(CGU_INT32 *)Elem1 - *(CGU_INT32 *)Elem2); +} +#endif // !BC5 +#endif // !BC4 + +// Find the first approximation of the line +// Assume there is a linear relation +// Z = a * X_In +// Z = b * Y_In +// Find a,b to minimize MSE between Z and Z_In +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) + +static void FindAxis(CGU_FLOAT _outBlk[MAX_BLOCK][NUM_CHANNELS], + CGU_FLOAT fLineDirection[NUM_CHANNELS], + CGU_FLOAT fBlockCenter[NUM_CHANNELS], CGU_BOOL *_pbSmall, + CGU_FLOAT _inpBlk[MAX_BLOCK][NUM_CHANNELS], + CGU_FLOAT _inpRpt[MAX_BLOCK], int nDimensions, + int nNumColors) { + CGU_FLOAT Crrl[NUM_CHANNELS]; + CGU_FLOAT RGB2[NUM_CHANNELS]; + + fLineDirection[0] = fLineDirection[1] = fLineDirection[2] = RGB2[0] = + RGB2[1] = RGB2[2] = Crrl[0] = Crrl[1] = Crrl[2] = fBlockCenter[0] = + fBlockCenter[1] = fBlockCenter[2] = 0.f; + + // sum position of all points + CGU_FLOAT fNumPoints = 0.f; + for (CGU_INT32 i = 0; i < nNumColors; i++) { + fBlockCenter[0] += _inpBlk[i][0] * _inpRpt[i]; + fBlockCenter[1] += _inpBlk[i][1] * _inpRpt[i]; + fBlockCenter[2] += _inpBlk[i][2] * _inpRpt[i]; + fNumPoints += _inpRpt[i]; + } + + // and then average to calculate center coordinate of block + fBlockCenter[0] /= fNumPoints; + fBlockCenter[1] /= fNumPoints; + fBlockCenter[2] /= fNumPoints; + + for (CGU_INT32 i = 0; i < nNumColors; i++) { + // calculate output block as offsets around block center + _outBlk[i][0] = _inpBlk[i][0] - fBlockCenter[0]; + _outBlk[i][1] = _inpBlk[i][1] - fBlockCenter[1]; + _outBlk[i][2] = _inpBlk[i][2] - fBlockCenter[2]; + + // compute correlation matrix + // RGB2 = sum of ((distance from point from center) squared) + // Crrl = ???????. Seems to be be some calculation based on distance from + // point center in two dimensions + for (CGU_INT32 j = 0; j < nDimensions; j++) { + RGB2[j] += _outBlk[i][j] * _outBlk[i][j] * _inpRpt[i]; + Crrl[j] += _outBlk[i][j] * _outBlk[i][(j + 1) % 3] * _inpRpt[i]; + } + } + + // if set's diameter is small + int i0 = 0, i1 = 1; + CGU_FLOAT mxRGB2 = 0.f; + int k = 0, j = 0; + CGU_FLOAT fEPS = fNumPoints * EPS; + for (k = 0, j = 0; j < 3; j++) { + if (RGB2[j] >= fEPS) + k++; + else + RGB2[j] = 0.f; + + if (mxRGB2 < RGB2[j]) { + mxRGB2 = RGB2[j]; + i0 = j; + } + } + + CGU_FLOAT fEPS2 = fNumPoints * EPS2; + *_pbSmall = TRUE; + for (j = 0; j < 3; j++) *_pbSmall &= (RGB2[j] < fEPS2); + + if (*_pbSmall) // all are very small to avoid division on the small + // determinant + return; + + if (k == 1) // really only 1 dimension + fLineDirection[i0] = 1.; + else if (k == 2) // really only 2 dimensions + { + i1 = (RGB2[(i0 + 1) % 3] > 0.f) ? (i0 + 1) % 3 : (i0 + 2) % 3; + CGU_FLOAT Crl = (i1 == (i0 + 1) % 3) ? Crrl[i0] : Crrl[(i0 + 2) % 3]; + fLineDirection[i1] = Crl / RGB2[i0]; + fLineDirection[i0] = 1.; + } else { + CGU_FLOAT maxDet = 100000.f; + CGU_FLOAT Cs[3]; + // select max det for precision + for (j = 0; j < nDimensions; j++) { + CGU_FLOAT Det = RGB2[j] * RGB2[(j + 1) % 3] - Crrl[j] * Crrl[j]; + Cs[j] = fabs(Crrl[j] / sqrt(RGB2[j] * RGB2[(j + 1) % 3])); + if (maxDet < Det) { + maxDet = Det; + i0 = j; + } + } + + // inverse correl matrix + // -- -- -- -- + // | A B | | C -B | + // | B C | => | -B A | + // -- -- -- -- + CGU_FLOAT mtrx1[2][2]; + CGU_FLOAT vc1[2]; + CGU_FLOAT vc[2]; + vc1[0] = Crrl[(i0 + 2) % 3]; + vc1[1] = Crrl[(i0 + 1) % 3]; + // C + mtrx1[0][0] = RGB2[(i0 + 1) % 3]; + // A + mtrx1[1][1] = RGB2[i0]; + // -B + mtrx1[1][0] = mtrx1[0][1] = -Crrl[i0]; + // find a solution + vc[0] = mtrx1[0][0] * vc1[0] + mtrx1[0][1] * vc1[1]; + vc[1] = mtrx1[1][0] * vc1[0] + mtrx1[1][1] * vc1[1]; + // normalize + vc[0] /= maxDet; + vc[1] /= maxDet; + // find a line direction vector + fLineDirection[i0] = 1.; + fLineDirection[(i0 + 1) % 3] = 1.; + fLineDirection[(i0 + 2) % 3] = vc[0] + vc[1]; + } + + // normalize direction vector + CGU_FLOAT Len = fLineDirection[0] * fLineDirection[0] + + fLineDirection[1] * fLineDirection[1] + + fLineDirection[2] * fLineDirection[2]; + Len = sqrt(Len); + + for (j = 0; j < 3; j++) + fLineDirection[j] = (Len > 0.f) ? fLineDirection[j] / Len : 0.f; +} +#endif // !BC5 +#endif // !BC4 + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +static void CompressRGBBlockX( + CGU_FLOAT _RsltRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], + CGU_FLOAT _BlkIn[MAX_BLOCK][NUM_CHANNELS], CGU_FLOAT _Rpt[MAX_BLOCK], + int _UniqClrs, CGU_UINT8 dwNumPoints, CGU_BOOL b3DRefinement, + CGU_UINT8 nRefinementSteps, CMP_GLOBAL const CMP_BC15Options *BC15options, + CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, CGU_UINT8 nBlueBits) { + ALIGN_16 CGU_FLOAT Prj0[MAX_BLOCK]; + ALIGN_16 CGU_FLOAT Prj[MAX_BLOCK]; + ALIGN_16 CGU_FLOAT PrjErr[MAX_BLOCK]; + ALIGN_16 CGU_FLOAT LineDir[NUM_CHANNELS]; + ALIGN_16 CGU_FLOAT RmpIndxs[MAX_BLOCK]; + + CGU_FLOAT LineDirG[NUM_CHANNELS]; + CGU_FLOAT PosG[NUM_ENDPOINTS]; + CGU_FLOAT Blk[MAX_BLOCK][NUM_CHANNELS]; + CGU_FLOAT BlkSh[MAX_BLOCK][NUM_CHANNELS]; + CGU_FLOAT LineDir0[NUM_CHANNELS]; + CGU_FLOAT Mdl[NUM_CHANNELS]; + + CGU_FLOAT rsltC[NUM_CHANNELS][NUM_ENDPOINTS]; + int i, j, k; + + // down to [0., 1.] + for (i = 0; i < _UniqClrs; i++) + for (j = 0; j < 3; j++) Blk[i][j] = _BlkIn[i][j] / 255.f; + + CGU_BOOL isDONE = FALSE; + + // as usual if not more then 2 different colors, we've done + if (_UniqClrs <= 2) { + for (j = 0; j < 3; j++) { + rsltC[j][0] = _BlkIn[0][j]; + rsltC[j][1] = _BlkIn[_UniqClrs - 1][j]; + } + isDONE = TRUE; + } + + if (!isDONE) { + // This is our first attempt to find an axis we will go along. + // The cumulation is done to find a line minimizing the MSE from the + // input 3D points. + CGU_BOOL bSmall = TRUE; + FindAxis(BlkSh, LineDir0, Mdl, &bSmall, Blk, _Rpt, 3, _UniqClrs); + + // While trying to find the axis we found that the diameter of the input + // set is quite small. Do not bother. + if (bSmall) { + for (j = 0; j < 3; j++) { + rsltC[j][0] = _BlkIn[0][j]; + rsltC[j][1] = _BlkIn[_UniqClrs - 1][j]; + } + isDONE = TRUE; + } + } + + // GCC is being an awful being when it comes to goto-jumps. + // So please bear with this. + if (!isDONE) { + CGU_FLOAT ErrG = 10000000.f; + CGU_FLOAT PrjBnd[NUM_ENDPOINTS]; + ALIGN_16 CGU_FLOAT PreMRep[MAX_BLOCK]; + for (j = 0; j < 3; j++) LineDir[j] = LineDir0[j]; + + // Here is the main loop. + // 1. Project input set on the axis in consideration. + // 2. Run 1 dimensional search (see scalar case) to find an (sub) optimal + // pair of end points. + // 3. Compute the vector of indexes (or clusters) for the current + // approximate ramp. + // 4. Present our color channels as 3 16DIM vectors. + // 5. Find closest approximation of each of 16DIM color vector with the + // projection of the 16DIM index vector. + // 6. Plug the projections as a new directional vector for the axis. + // 7. Goto 1. + + // D - is 16 dim "index" vector (or 16 DIM vector of indexes - {0, 1/3, + // 2/3, 0, ...,}, but shifted and normalized). Ci - is a 16 dim vector of + // color i. for each Ci find a scalar Ai such that (Ai * D - Ci) (Ai * D + // - Ci) -> min , i.e distance between vector AiD and C is min. You can + // think of D as a unit interval(vector) "clusterizer", and Ai is a scale + // you need to apply to the clusterizer to approximate the Ci vector + // instead of the unit vector. + + // Solution is + + // Ai = (D . Ci) / (D . D); . - is a dot product. + + // in 3 dim space Ai(s) represent a line direction, along which + // we again try to find (sub)optimal quantizer. + + // That's what our for(;;) loop is about. + for (;;) { + // 1. Project input set on the axis in consideration. + // From Foley & Van Dam: Closest point of approach of a line (P + v) to a + // point (R) is + // P + ((R-P).v) / (v.v))v + // The distance along v is therefore (R-P).v / (v.v) + // (v.v) is 1 if v is a unit vector. + // + PrjBnd[0] = 1000.; + PrjBnd[1] = -1000.; + for (i = 0; i < MAX_BLOCK; i++) + Prj0[i] = Prj[i] = PrjErr[i] = PreMRep[i] = 0.f; + + for (i = 0; i < _UniqClrs; i++) { + Prj0[i] = Prj[i] = BlkSh[i][0] * LineDir[0] + BlkSh[i][1] * LineDir[1] + + BlkSh[i][2] * LineDir[2]; + + PrjErr[i] = (BlkSh[i][0] - LineDir[0] * Prj[i]) * + (BlkSh[i][0] - LineDir[0] * Prj[i]) + + (BlkSh[i][1] - LineDir[1] * Prj[i]) * + (BlkSh[i][1] - LineDir[1] * Prj[i]) + + (BlkSh[i][2] - LineDir[2] * Prj[i]) * + (BlkSh[i][2] - LineDir[2] * Prj[i]); + + PrjBnd[0] = minf(PrjBnd[0], Prj[i]); + PrjBnd[1] = maxf(PrjBnd[1], Prj[i]); + } + + // 2. Run 1 dimensional search (see scalar case) to find an (sub) optimal + // pair of end points. + + // min and max of the search interval + CGU_FLOAT Scl[NUM_ENDPOINTS]; + Scl[0] = PrjBnd[0] - (PrjBnd[1] - PrjBnd[0]) * 0.125f; + ; + Scl[1] = PrjBnd[1] + (PrjBnd[1] - PrjBnd[0]) * 0.125f; + ; + + // compute scaling factor to scale down the search interval to [0.,1] + const CGU_FLOAT Scl2 = (Scl[1] - Scl[0]) * (Scl[1] - Scl[0]); + const CGU_FLOAT overScl = 1.f / (Scl[1] - Scl[0]); + + for (i = 0; i < _UniqClrs; i++) { + // scale them + Prj[i] = (Prj[i] - Scl[0]) * overScl; + // premultiply the scale squire to plug into error computation later + PreMRep[i] = _Rpt[i] * Scl2; + } + + // scale first approximation of end points + for (k = 0; k < 2; k++) PrjBnd[k] = (PrjBnd[k] - Scl[0]) * overScl; + + CGU_FLOAT Err = MAX_ERROR; + + // search step + CGU_FLOAT stp = 0.025f; + + // low Start/End; high Start/End + const CGU_FLOAT lS = + (PrjBnd[0] - 2.f * stp > 0.f) ? PrjBnd[0] - 2.f * stp : 0.f; + const CGU_FLOAT hE = + (PrjBnd[1] + 2.f * stp < 1.f) ? PrjBnd[1] + 2.f * stp : 1.f; + + // find the best endpoints + CGU_FLOAT Pos[NUM_ENDPOINTS]; + CGU_FLOAT lP, hP; + int l, h; + for (l = 0, lP = lS; l < 8; l++, lP += stp) { + for (h = 0, hP = hE; h < 8; h++, hP -= stp) { + CGU_FLOAT err = Err; + // compute an error for the current pair of end points. + err = RampSrchW(Prj, PrjErr, PreMRep, err, lP, hP, _UniqClrs, + dwNumPoints); + + if (err < Err) { + // save better result + Err = err; + Pos[0] = lP; + Pos[1] = hP; + } + } + } + + // inverse the scaling + for (k = 0; k < 2; k++) Pos[k] = Pos[k] * (Scl[1] - Scl[0]) + Scl[0]; + + // did we find somthing better from the previous run? + if (Err + 0.001 < ErrG) { + // yes, remember it + ErrG = Err; + LineDirG[0] = LineDir[0]; + LineDirG[1] = LineDir[1]; + LineDirG[2] = LineDir[2]; + PosG[0] = Pos[0]; + PosG[1] = Pos[1]; + // 3. Compute the vector of indexes (or clusters) for the current + // approximate ramp. + // indexes + const CGU_FLOAT step = (Pos[1] - Pos[0]) / (CGU_FLOAT)(dwNumPoints - 1); + const CGU_FLOAT step_h = step * (CGU_FLOAT)0.5; + const CGU_FLOAT rstep = (CGU_FLOAT)1.0f / step; + const CGU_FLOAT overBlkTp = 1.f / (CGU_FLOAT)(dwNumPoints - 1); + + // here the index vector is computed, + // shifted and normalized + CGU_FLOAT indxAvrg = (CGU_FLOAT)(dwNumPoints - 1) / 2.f; + + for (i = 0; i < _UniqClrs; i++) { + CGU_FLOAT del; + // CGU_UINT32 n = (CGU_UINT32)((b - _min_ex + (step*0.5f)) * rstep); + if ((del = Prj0[i] - Pos[0]) <= 0) + RmpIndxs[i] = 0.f; + else if (Prj0[i] - Pos[1] >= 0) + RmpIndxs[i] = (CGU_FLOAT)(dwNumPoints - 1); + else + RmpIndxs[i] = floor((del + step_h) * rstep); + // shift and normalization + RmpIndxs[i] = (RmpIndxs[i] - indxAvrg) * overBlkTp; + } + + // 4. Present our color channels as 3 16DIM vectors. + // 5. Find closest aproximation of each of 16DIM color vector with the + // pojection of the 16DIM index vector. + CGU_FLOAT Crs[3], Len, Len2; + for (i = 0, Crs[0] = Crs[1] = Crs[2] = Len = 0.f; i < _UniqClrs; i++) { + const CGU_FLOAT PreMlt = RmpIndxs[i] * _Rpt[i]; + Len += RmpIndxs[i] * PreMlt; + for (j = 0; j < 3; j++) Crs[j] += BlkSh[i][j] * PreMlt; + } + + LineDir[0] = LineDir[1] = LineDir[2] = 0.f; + if (Len > 0.f) { + LineDir[0] = Crs[0] / Len; + LineDir[1] = Crs[1] / Len; + LineDir[2] = Crs[2] / Len; + + // 6. Plug the projections as a new directional vector for the axis. + // 7. Goto 1. + Len2 = LineDir[0] * LineDir[0] + LineDir[1] * LineDir[1] + + LineDir[2] * LineDir[2]; + Len2 = sqrt(Len2); + + LineDir[0] /= Len2; + LineDir[1] /= Len2; + LineDir[2] /= Len2; + } + } else // We was not able to find anything better. Drop dead. + break; + } + + // inverse transform to find end-points of 3-color ramp + for (k = 0; k < 2; k++) + for (j = 0; j < 3; j++) + rsltC[j][k] = (PosG[k] * LineDirG[j] + Mdl[j]) * 255.f; + } + + // We've dealt with (almost) unrestricted full precision realm. + // Now back to the dirty digital world. + + // round the end points to make them look like compressed ones + CGU_FLOAT inpRmpEndPts[NUM_CHANNELS][NUM_ENDPOINTS]; + MkRmpOnGrid(inpRmpEndPts, rsltC, 0.f, 255.f, nRedBits, nGreenBits, nBlueBits); + + // This not a small procedure squeezes and stretches the ramp along each + // axis (R,G,B) separately while other 2 are fixed. It does it only over + // coarse grid - 565 that is. It tries to squeeze more precision for the + // real world ramp. + if (b3DRefinement) + Refine3D(_RsltRmpPnts, inpRmpEndPts, _BlkIn, _Rpt, _UniqClrs, dwNumPoints, + BC15options, nRedBits, nGreenBits, nBlueBits, nRefinementSteps); + else + Refine(_RsltRmpPnts, inpRmpEndPts, _BlkIn, _Rpt, _UniqClrs, dwNumPoints, + BC15options, nRedBits, nGreenBits, nBlueBits, nRefinementSteps); +} +#endif // !BC5 +#endif // !BC4 + +#ifdef ASPM_GPU +void cmp_memsetfBCn(CGU_FLOAT ptr[], CGU_FLOAT value, CGU_UINT32 size) { + for (CGU_UINT32 i = 0; i < size; i++) { + ptr[i] = value; + } +} +#endif + +#ifdef ASPM_GPU +void memset(CGU_UINT8 *srcdata, CGU_UINT8 value, CGU_INT size) { + for (CGU_INT i = 0; i < size; i++) *srcdata++ = value; +} + +void memcpy(CGU_UINT8 *srcdata, CGU_UINT8 *dstdata, CGU_INT size) { + for (CGU_INT i = 0; i < size; i++) { + *srcdata = *dstdata; + srcdata++; + dstdata++; + } +} + +void cmp_memsetBC1(CGU_UINT8 ptr[], CGU_UINT8 value, CGU_UINT32 size) { + for (CGU_UINT32 i = 0; i < size; i++) { + ptr[i] = value; + } +} +#endif + +#ifdef ASPM_GPU +static void sortData_UINT32(CGU_UINT32 data_ordered[BLOCK_SIZE], + CGU_UINT32 projection[BLOCK_SIZE], + CGU_UINT32 numEntries // max 64 +) { + CMP_di what[BLOCK_SIZE]; + + for (CGU_UINT32 i = 0; i < numEntries; i++) { + what[i].index = i; + what[i].data = projection[i]; + } + + CGU_UINT32 tmp_index; + CGU_UINT32 tmp_data; + + for (CGU_UINT32 i = 1; i < numEntries; i++) { + for (CGU_UINT32 j = i; j > 0; j--) { + if (what[j - 1].data > what[j].data) { + tmp_index = what[j].index; + tmp_data = what[j].data; + what[j].index = what[j - 1].index; + what[j].data = what[j - 1].data; + what[j - 1].index = tmp_index; + what[j - 1].data = tmp_data; + } + } + } + + for (CGU_UINT32 i = 0; i < numEntries; i++) data_ordered[i] = what[i].data; +}; + +static void sortData_FLOAT(CGU_FLOAT data_ordered[BLOCK_SIZE], + CGU_FLOAT projection[BLOCK_SIZE], + CGU_UINT32 numEntries // max 64 +) { + CMP_df what[BLOCK_SIZE]; + + for (CGU_UINT32 i = 0; i < numEntries; i++) { + what[i].index = i; + what[i].data = projection[i]; + } + + CGU_UINT32 tmp_index; + CGU_FLOAT tmp_data; + + for (CGU_UINT32 i = 1; i < numEntries; i++) { + for (CGU_UINT32 j = i; j > 0; j--) { + if (what[j - 1].data > what[j].data) { + tmp_index = what[j].index; + tmp_data = what[j].data; + what[j].index = what[j - 1].index; + what[j].data = what[j - 1].data; + what[j - 1].index = tmp_index; + what[j - 1].data = tmp_data; + } + } + } + + for (CGU_UINT32 i = 0; i < numEntries; i++) data_ordered[i] = what[i].data; +}; +#endif + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +static CGU_FLOAT CompRGBBlock(CGU_UINT32 *block_32, CGU_UINT32 dwBlockSize, + CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, + CGU_UINT8 nBlueBits, + CGU_UINT8 nEndpoints[3][NUM_ENDPOINTS], + CGU_UINT8 *pcIndices, CGU_UINT8 dwNumPoints, + CGU_BOOL b3DRefinement, + CGU_UINT8 nRefinementSteps, + CMP_GLOBAL const CMP_BC15Options *BC15options, + CGU_BOOL _bUseAlpha, CGU_UINT8 _nAlphaThreshold) { + ALIGN_16 CGU_FLOAT Rpt[BLOCK_SIZE]; + ALIGN_16 CGU_FLOAT BlkIn[BLOCK_SIZE][NUM_CHANNELS]; +#ifndef ASPM_GPU + memset(Rpt, 0, sizeof(Rpt)); + memset(BlkIn, 0, sizeof(BlkIn)); +#else + cmp_memsetfBCn(&Rpt[0], 0, BLOCK_SIZE); + cmp_memsetfBCn(&BlkIn[0][0], 0, BLOCK_SIZE * NUM_CHANNELS); +#endif + + CGU_UINT32 dwAlphaThreshold = _nAlphaThreshold << 24; + CGU_UINT32 dwColors = 0; + CGU_UINT32 dwBlk[BLOCK_SIZE]; + for (CGU_UINT32 i = 0; i < dwBlockSize; i++) + if (!_bUseAlpha || (block_32[i] & 0xff000000) >= dwAlphaThreshold) + dwBlk[dwColors++] = block_32[i] | 0xff000000; + + // Do we have any colors ? + if (dwColors) { + CGU_BOOL bHasAlpha = (dwColors != dwBlockSize); + if (bHasAlpha && _bUseAlpha && !(dwNumPoints & 0x1)) return CMP_FLOAT_MAX; + + // CGU_UINT32 dwBlk_sorted[BLOCK_SIZE]; + // Here we are computing an unique number of colors. + // For each unique value we compute the number of it appearences. +#ifndef ASPM_GPU + qsort((void *)dwBlk, (size_t)dwColors, sizeof(CGU_UINT32), QSortIntCmp); +#else + sortData_UINT32(dwBlk, dwBlk, dwColors); +#endif + + CGU_UINT32 new_p; + CGU_UINT32 dwBlkU[BLOCK_SIZE]; + CGU_UINT32 dwUniqueColors = 0; + new_p = dwBlkU[0] = dwBlk[0]; + Rpt[dwUniqueColors] = 1.f; + for (CGU_UINT32 i = 1; i < dwColors; i++) { + if (new_p != dwBlk[i]) { + dwUniqueColors++; + new_p = dwBlkU[dwUniqueColors] = dwBlk[i]; + Rpt[dwUniqueColors] = 1.f; + } else + Rpt[dwUniqueColors] += 1.f; + } + dwUniqueColors++; + + // switch to float + for (CGU_UINT32 i = 0; i < dwUniqueColors; i++) { + BlkIn[i][RC] = (CGU_FLOAT)((dwBlkU[i] >> 16) & 0xff); // R + BlkIn[i][GC] = (CGU_FLOAT)((dwBlkU[i] >> 8) & 0xff); // G + BlkIn[i][BC] = (CGU_FLOAT)((dwBlkU[i] >> 0) & 0xff); // B + BlkIn[i][AC] = 255.f; // A + } + + CGU_FLOAT rsltC[NUM_CHANNELS][NUM_ENDPOINTS]; + CompressRGBBlockX(rsltC, BlkIn, Rpt, dwUniqueColors, dwNumPoints, + b3DRefinement, nRefinementSteps, BC15options, nRedBits, + nGreenBits, nBlueBits); + + // return to integer realm + for (CGU_INT32 i = 0; i < 3; i++) + for (CGU_INT32 j = 0; j < 2; j++) + nEndpoints[i][j] = (CGU_UINT8)rsltC[i][j]; + + return Clstr(block_32, dwBlockSize, nEndpoints, pcIndices, dwNumPoints, + BC15options, _bUseAlpha, _nAlphaThreshold, nRedBits, + nGreenBits, nBlueBits); + } else { + // All colors transparent + nEndpoints[0][0] = nEndpoints[1][0] = nEndpoints[2][0] = 0; + nEndpoints[0][1] = nEndpoints[1][1] = nEndpoints[2][1] = 0xff; +#ifndef ASPM_GPU + memset(pcIndices, 0xff, dwBlockSize); +#else + cmp_memsetBC1(pcIndices, 0xff, dwBlockSize); +#endif + return 0.0; + } +} +#endif // !BC5 +#endif // !BC4 + +#if !defined(BC4_ENCODE_KERNEL_H) +#if !defined(BC5_ENCODE_KERNEL_H) +static void CompressRGBBlock(const CGU_UINT8 rgbBlock[64], + CMP_GLOBAL CGU_UINT32 compressedBlock[2], + CMP_GLOBAL const CMP_BC15Options *BC15options, + CGU_BOOL bDXT1, CGU_BOOL bDXT1UseAlpha, + CGU_UINT8 nDXT1AlphaThreshold) { + CGU_BOOL m_b3DRefinement = FALSE; + CGU_UINT8 m_nRefinementSteps = 1; + + /* + ARGB Channel indexes + */ + if (bDXT1) { + CGU_UINT8 nEndpoints[2][3][2]; + CGU_UINT8 nIndices[2][16]; + + CGU_FLOAT fError3 = CompRGBBlock((CGU_UINT32 *)rgbBlock, BLOCK_SIZE_4X4, RG, GG, BG, nEndpoints[0], + nIndices[0], 3, m_b3DRefinement, m_nRefinementSteps, BC15options, + bDXT1UseAlpha, nDXT1AlphaThreshold); + CGU_FLOAT fError4 = (fError3 == 0.0) ? CMP_FLOAT_MAX : CompRGBBlock((CGU_UINT32 *)rgbBlock, BLOCK_SIZE_4X4, RG, GG, BG, + nEndpoints[1], nIndices[1], 4, m_b3DRefinement, + m_nRefinementSteps, BC15options, bDXT1UseAlpha, + nDXT1AlphaThreshold); + + CGU_INT32 nMethod = (fError3 <= fError4) ? 0 : 1; + CGU_INT32 c0 = ConstructColour((nEndpoints[nMethod][RC][0] >> (8 - RG)), + (nEndpoints[nMethod][GC][0] >> (8 - GG)), + (nEndpoints[nMethod][BC][0] >> (8 - BG))); + CGU_INT32 c1 = ConstructColour((nEndpoints[nMethod][RC][1] >> (8 - RG)), + (nEndpoints[nMethod][GC][1] >> (8 - GG)), + (nEndpoints[nMethod][BC][1] >> (8 - BG))); + CGU_BOOL m1 = (nMethod == 1 && c0 <= c1); + CGU_BOOL m2 = (nMethod == 0 && c0 > c1); + if (m1 || m2) + compressedBlock[0] = c1 | (c0 << 16); + else + compressedBlock[0] = c0 | (c1 << 16); + + compressedBlock[1] = 0; + for (CGU_INT32 i = 0; i < 16; i++) + compressedBlock[1] |= (nIndices[nMethod][i] << (2 * i)); + } else { + CGU_UINT8 nEndpoints[3][2]; + CGU_UINT8 nIndices[BLOCK_SIZE_4X4]; + + CompRGBBlock((CGU_UINT32 *)rgbBlock, BLOCK_SIZE_4X4, RG, GG, BG, nEndpoints, + nIndices, 4, m_b3DRefinement, m_nRefinementSteps, BC15options, + bDXT1UseAlpha, nDXT1AlphaThreshold); + + CGU_INT32 c0 = ConstructColour((nEndpoints[RC][0] >> (8 - RG)), + (nEndpoints[GC][0] >> (8 - GG)), + (nEndpoints[BC][0] >> (8 - BG))); + CGU_INT32 c1 = ConstructColour((nEndpoints[RC][1] >> (8 - RG)), + (nEndpoints[GC][1] >> (8 - GG)), + (nEndpoints[BC][1] >> (8 - BG))); + if (c0 <= c1) + compressedBlock[0] = c1 | (c0 << 16); + else + compressedBlock[0] = c0 | (c1 << 16); + + compressedBlock[1] = 0; + for (CGU_INT32 i = 0; i < 16; i++) + compressedBlock[1] |= (nIndices[i] << (2 * i)); + } +} +#endif // BC5 + +#endif // BC4 + +#if !defined(BC1_ENCODE_KERNEL_H) +#if !defined(BC2_ENCODE_KERNEL_H) +static CGU_FLOAT RmpSrch1(CGU_FLOAT _Blk[MAX_BLOCK], CGU_FLOAT _Rpt[MAX_BLOCK], + CGU_FLOAT _maxerror, CGU_FLOAT _min_ex, + CGU_FLOAT _max_ex, CGU_INT _NmbrClrs, + CGU_UINT8 nNumPoints) { + CGU_FLOAT error = 0; + const CGU_FLOAT step = (_max_ex - _min_ex) / (CGU_FLOAT)(nNumPoints - 1); + const CGU_FLOAT step_h = step * 0.5f; + const CGU_FLOAT rstep = 1.0f / step; + + for (CGU_INT i = 0; i < _NmbrClrs; i++) { + CGU_FLOAT v; + // Work out which value in the block this select + CGU_FLOAT del; + + if ((del = _Blk[i] - _min_ex) <= 0) + v = _min_ex; + else if (_Blk[i] - _max_ex >= 0) + v = _max_ex; + else + v = (floor((del + step_h) * rstep) * step) + _min_ex; + + // And accumulate the error + CGU_FLOAT del2 = (_Blk[i] - v); + error += del2 * del2 * _Rpt[i]; + + // if we've already lost to the previous step bail out + if (_maxerror < error) { + error = _maxerror; + break; + } + } + return error; +} +#endif // !BC2 + +#if !defined(BC2_ENCODE_KERNEL_H) +static CGU_FLOAT BlockRefine1(CGU_FLOAT _Blk[MAX_BLOCK], + CGU_FLOAT _Rpt[MAX_BLOCK], CGU_FLOAT _MaxError, + CGU_FLOAT *_min_ex, CGU_FLOAT *_max_ex, + CGU_FLOAT _m_step, CGU_FLOAT _min_bnd, + CGU_FLOAT _max_bnd, CGU_INT _NmbrClrs, + CGU_UINT8 dwNumPoints) { + // Start out assuming our endpoints are the min and max values we've + // determined + + // Attempt a (simple) progressive refinement step to reduce noise in the + // output image by trying to find a better overall match for the endpoints. + + CGU_FLOAT maxerror = _MaxError; + CGU_FLOAT min_ex = *_min_ex; + CGU_FLOAT max_ex = *_max_ex; + + int mode, bestmode; + do { + CGU_FLOAT cr_min0 = min_ex; + CGU_FLOAT cr_max0 = max_ex; + for (bestmode = -1, mode = 0; mode < SCH_STPS * SCH_STPS; mode++) { + // check each move (see sStep for direction) + CGU_FLOAT cr_min = min_ex + _m_step * sMvF[mode / SCH_STPS]; + CGU_FLOAT cr_max = max_ex + _m_step * sMvF[mode % SCH_STPS]; + + cr_min = maxf(cr_min, _min_bnd); + cr_max = minf(cr_max, _max_bnd); + + CGU_FLOAT error; + error = RmpSrch1(_Blk, _Rpt, maxerror, cr_min, cr_max, _NmbrClrs, + dwNumPoints); + + if (error < maxerror) { + maxerror = error; + bestmode = mode; + cr_min0 = cr_min; + cr_max0 = cr_max; + } + } + + if (bestmode != -1) { + // make move (see sStep for direction) + min_ex = cr_min0; + max_ex = cr_max0; + } + } while (bestmode != -1); + + *_min_ex = min_ex; + *_max_ex = max_ex; + + return maxerror; +} +#endif //! BC2 + +#if !defined(BC2_ENCODE_KERNEL_H) +static int QSortFCmp(const void *Elem1, const void *Elem2) { + int ret = 0; + + if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 < 0.) + ret = -1; + else if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 > 0.) + ret = 1; + return ret; +} +#endif // !BC2 + +#if !defined(BC2_ENCODE_KERNEL_H) +static CGU_FLOAT CompBlock1(CGU_FLOAT _RmpPnts[NUM_ENDPOINTS], + CGU_FLOAT _Blk[MAX_BLOCK], CGU_INT _Nmbr, + CGU_UINT8 dwNumPoints, CGU_BOOL bFixedRampPoints, + CGU_INT _IntPrc, CGU_INT _FracPrc, + CGU_BOOL _bFixedRamp) { + CGU_FLOAT fMaxError = 0.f; + + CGU_FLOAT Ramp[NUM_ENDPOINTS]; + + CGU_FLOAT IntFctr = (CGU_FLOAT)(1 << _IntPrc); + // CGU_FLOAT FracFctr = (CGU_FLOAT)(1 << _FracPrc); + + ALIGN_16 CGU_FLOAT afUniqueValues[MAX_BLOCK]; + ALIGN_16 CGU_FLOAT afValueRepeats[MAX_BLOCK]; + for (int i = 0; i < MAX_BLOCK; i++) + afUniqueValues[i] = afValueRepeats[i] = 0.f; + + // For each unique value we compute the number of it appearances. + CGU_FLOAT fBlk[MAX_BLOCK]; +#ifdef ASPM_GPU + for (CGU_INT i = 0; i < _Nmbr; i++) { + fBlk[i] = _Blk[i]; + } +#else + memcpy(fBlk, _Blk, _Nmbr * sizeof(CGU_FLOAT)); +#endif + + // sort the input +#ifndef ASPM_GPU + qsort((void *)fBlk, (size_t)_Nmbr, sizeof(CGU_FLOAT), QSortFCmp); +#else + sortData_FLOAT(fBlk, fBlk, _Nmbr); +#endif + + CGU_FLOAT new_p = -2.; + + int N0s = 0, N1s = 0; + CGU_UINT32 dwUniqueValues = 0; + afUniqueValues[0] = 0.f; + + bool requiresCalculation = true; + + if (bFixedRampPoints) { + for (CGU_INT i = 0; i < _Nmbr; i++) { + if (new_p != fBlk[i]) { + new_p = fBlk[i]; + if (new_p <= 1.5 / 255.) + N0s++; + else if (new_p >= 253.5 / 255.) + N1s++; + else { + afUniqueValues[dwUniqueValues] = fBlk[i]; + afValueRepeats[dwUniqueValues] = 1.f; + dwUniqueValues++; + } + } else { + if (dwUniqueValues > 0) { + if (afUniqueValues[dwUniqueValues - 1] == new_p) + afValueRepeats[dwUniqueValues - 1] += 1.f; + } + } + } + + // if number of unique colors is less or eq 2 we've done either, but we know + // that we may have 0s and/or 1s as well. To avoid for the ramp to be + // considered flat we invented couple entries on the way. + if (dwUniqueValues <= 2) { + if (dwUniqueValues == 2) // if 2, take them + { + Ramp[0] = floor(afUniqueValues[0] * (IntFctr - 1) + 0.5f); + Ramp[1] = floor(afUniqueValues[1] * (IntFctr - 1) + 0.5f); + } else if (dwUniqueValues == 1) // if 1, add another one + { + Ramp[0] = floor(afUniqueValues[0] * (IntFctr - 1) + 0.5f); + Ramp[1] = Ramp[0] + 1.f; + } else // if 0, invent them + { + Ramp[0] = 128.f; + Ramp[1] = Ramp[0] + 1.f; + } + + fMaxError = 0.f; + requiresCalculation = false; + } + } else { + for (CGU_INT i = 0; i < _Nmbr; i++) { + if (new_p != fBlk[i]) { + afUniqueValues[dwUniqueValues] = new_p = fBlk[i]; + afValueRepeats[dwUniqueValues] = 1.f; + dwUniqueValues++; + } else + afValueRepeats[dwUniqueValues - 1] += 1.f; + } + + // if number of unique colors is less or eq 2, we've done + if (dwUniqueValues <= 2) { + Ramp[0] = floor(afUniqueValues[0] * (IntFctr - 1) + 0.5f); + if (dwUniqueValues == 1) + Ramp[1] = Ramp[0] + 1.f; + else + Ramp[1] = floor(afUniqueValues[1] * (IntFctr - 1) + 0.5f); + fMaxError = 0.f; + requiresCalculation = false; + } + } + + if (requiresCalculation) { + CGU_FLOAT min_ex = afUniqueValues[0]; + CGU_FLOAT max_ex = afUniqueValues[dwUniqueValues - 1]; + CGU_FLOAT min_bnd = 0, max_bnd = 1.; + CGU_FLOAT min_r = min_ex, max_r = max_ex; + CGU_FLOAT gbl_l = 0, gbl_r = 0; + CGU_FLOAT cntr = (min_r + max_r) / 2; + + CGU_FLOAT gbl_err = MAX_ERROR; + // Trying to avoid unnecessary calculations. Heuristics: after some analisis + // it appears that in integer case, if the input interval not more then 48 + // we won't get much better + + bool wantsSearch = !(_INT_GRID && max_ex - min_ex <= 48.f / IntFctr); + + if (wantsSearch) { + // Search. + // 1. take the vicinities of both low and high bound of the input + // interval. + // 2. setup some search step + // 3. find the new low and high bound which provides an (sub) optimal + // (infinite precision) clusterization. + CGU_FLOAT gbl_llb = + (min_bnd > min_r - GBL_SCH_EXT) ? min_bnd : min_r - GBL_SCH_EXT; + CGU_FLOAT gbl_rrb = + (max_bnd < max_r + GBL_SCH_EXT) ? max_bnd : max_r + GBL_SCH_EXT; + CGU_FLOAT gbl_lrb = + (cntr < min_r + GBL_SCH_EXT) ? cntr : min_r + GBL_SCH_EXT; + CGU_FLOAT gbl_rlb = + (cntr > max_r - GBL_SCH_EXT) ? cntr : max_r - GBL_SCH_EXT; + for (CGU_FLOAT step_l = gbl_llb; step_l < gbl_lrb; + step_l += GBL_SCH_STEP) { + for (CGU_FLOAT step_r = gbl_rrb; gbl_rlb <= step_r; + step_r -= GBL_SCH_STEP) { + CGU_FLOAT sch_err; + // an sse version is avaiable + sch_err = RmpSrch1(afUniqueValues, afValueRepeats, gbl_err, step_l, + step_r, dwUniqueValues, dwNumPoints); + if (sch_err < gbl_err) { + gbl_err = sch_err; + gbl_l = step_l; + gbl_r = step_r; + } + } + } + + min_r = gbl_l; + max_r = gbl_r; + } + + // This is a refinement call. The function tries to make several small + // stretches or squashes to minimize quantization error. + CGU_FLOAT m_step = LCL_SCH_STEP / IntFctr; + fMaxError = + BlockRefine1(afUniqueValues, afValueRepeats, gbl_err, &min_r, &max_r, + m_step, min_bnd, max_bnd, dwUniqueValues, dwNumPoints); + + min_ex = min_r; + max_ex = max_r; + + max_ex *= (IntFctr - 1); + min_ex *= (IntFctr - 1); + /* + this one is tricky. for the float or high fractional precision ramp it tries + to avoid for the ramp to be collapsed into one integer number after + rounding. Notice the condition. There is a difference between max_ex and + min_ex but after rounding they may collapse into the same integer. + + So we try to run the same refinement procedure but with starting position on + the integer grid and step equal 1. + */ + if (!_INT_GRID && max_ex - min_ex > 0. && + floor(min_ex + 0.5f) == floor(max_ex + 0.5f)) { + m_step = 1.; + gbl_err = MAX_ERROR; + for (CGU_UINT32 i = 0; i < dwUniqueValues; i++) + afUniqueValues[i] *= (IntFctr - 1); + + max_ex = min_ex = floor(min_ex + 0.5f); + + gbl_err = BlockRefine1(afUniqueValues, afValueRepeats, gbl_err, &min_ex, + &max_ex, m_step, 0.f, 255.f, dwUniqueValues, + dwNumPoints); + + fMaxError = gbl_err; + } + Ramp[1] = floor(max_ex + 0.5f); + Ramp[0] = floor(min_ex + 0.5f); + } + + // Ensure that the two endpoints are not the same + // This is legal but serves no need & can break some optimizations in the + // compressor + if (Ramp[0] == Ramp[1]) { + if (Ramp[1] < 255.f) + Ramp[1]++; + else + Ramp[1]--; + } + _RmpPnts[0] = Ramp[0]; + _RmpPnts[1] = Ramp[1]; + + return fMaxError; +} +#endif // !BC2 + +#if !defined(BC2_ENCODE_KERNEL_H) +static void BldRmp1(CGU_FLOAT _Rmp[MAX_POINTS], + CGU_FLOAT _InpRmp[NUM_ENDPOINTS], int nNumPoints) { + // for 3 point ramp; not to select the 4th point in min + for (int e = nNumPoints; e < MAX_POINTS; e++) _Rmp[e] = 100000.f; + + _Rmp[0] = _InpRmp[0]; + _Rmp[1] = _InpRmp[1]; + for (int e = 1; e < nNumPoints - 1; e++) + _Rmp[e + 1] = (_Rmp[0] * (nNumPoints - 1 - e) + _Rmp[1] * e) / + (CGU_FLOAT)(nNumPoints - 1); +} +#endif //! BC2 + +#if !defined(BC2_ENCODE_KERNEL_H) +static void GetRmp1(CGU_FLOAT _rampDat[MAX_POINTS], + CGU_FLOAT _ramp[NUM_ENDPOINTS], int nNumPoints, + CGU_BOOL bFixedRampPoints, CGU_INT _intPrec, + CGU_INT _fracPrec, CGU_BOOL _bFixedRamp) { + if (_ramp[0] == _ramp[1]) return; + + CGU_BOOL r0 = _ramp[0] <= _ramp[1]; + CGU_BOOL r1 = _ramp[0] > _ramp[1]; + if ((!bFixedRampPoints && r0) || (bFixedRampPoints && r1)) { + CGU_FLOAT t = _ramp[0]; + _ramp[0] = _ramp[1]; + _ramp[1] = t; + } + + _rampDat[0] = _ramp[0]; + _rampDat[1] = _ramp[1]; + + CGU_FLOAT IntFctr = (CGU_FLOAT)(1 << _intPrec); + CGU_FLOAT FracFctr = (CGU_FLOAT)(1 << _fracPrec); + + CGU_FLOAT ramp[NUM_ENDPOINTS]; + ramp[0] = _ramp[0] * FracFctr; + ramp[1] = _ramp[1] * FracFctr; + + BldRmp1(_rampDat, ramp, nNumPoints); + if (bFixedRampPoints) { + _rampDat[nNumPoints] = 0.; + _rampDat[nNumPoints + 1] = FracFctr * IntFctr - 1.f; + } + + if (_bFixedRamp) { + for (CGU_INT i = 0; i < nNumPoints; i++) { + _rampDat[i] = floor(_rampDat[i] + 0.5f); + _rampDat[i] /= FracFctr; + } + } +} +#endif + +#if !defined(BC2_ENCODE_KERNEL_H) +static CGU_FLOAT Clstr1(CGU_UINT8 *pcIndices, CGU_FLOAT _blockIn[MAX_BLOCK], + CGU_FLOAT _ramp[NUM_ENDPOINTS], CGU_INT _NmbrClrs, + CGU_INT nNumPoints, CGU_BOOL bFixedRampPoints, + CGU_INT _intPrec, CGU_INT _fracPrec, + CGU_BOOL _bFixedRamp) { + CGU_FLOAT Err = 0.f; + CGU_FLOAT alpha[MAX_POINTS]; + + for (CGU_INT i = 0; i < _NmbrClrs; i++) pcIndices[i] = 0; + + if (_ramp[0] == _ramp[1]) return Err; + + if (!_bFixedRamp) { + _intPrec = 8; + _fracPrec = 0; + } + + GetRmp1(alpha, _ramp, nNumPoints, bFixedRampPoints, _intPrec, _fracPrec, + _bFixedRamp); + + if (bFixedRampPoints) nNumPoints += 2; + + const CGU_FLOAT OverIntFctr = 1.f / ((CGU_FLOAT)(1 << _intPrec) - 1.f); + for (int i = 0; i < nNumPoints; i++) alpha[i] *= OverIntFctr; + + // For each colour in the original block, calculate its weighted + // distance from each point in the original and assign it + // to the closest cluster + for (int i = 0; i < _NmbrClrs; i++) { + CGU_FLOAT shortest = 10000000.f; + + // Get the original alpha + CGU_FLOAT acur = _blockIn[i]; + + for (CGU_UINT8 j = 0; j < nNumPoints; j++) { + CGU_FLOAT adist = (acur - alpha[j]); + adist *= adist; + + if (adist < shortest) { + shortest = adist; + pcIndices[i] = j; + } + } + + Err += shortest; + } + + return Err; +} +#endif // !BC2 + +#if !defined(BC2_ENCODE_KERNEL_H) +static CGU_FLOAT CompBlock1XF(CGU_FLOAT *_Blk, CGU_UINT32 dwBlockSize, + CGU_UINT8 nEndpoints[2], CGU_UINT8 *pcIndices, + CGU_UINT8 dwNumPoints, CGU_BOOL bFixedRampPoints, + CGU_INT _intPrec, CGU_INT _fracPrec, + CGU_BOOL _bFixedRamp) { + // just to make them initialized + if (!_bFixedRamp) { + _intPrec = 8; + _fracPrec = 0; + } + + // this one makes the bulk of the work + CGU_FLOAT Ramp[NUM_ENDPOINTS]; + CompBlock1(Ramp, _Blk, dwBlockSize, dwNumPoints, bFixedRampPoints, _intPrec, + _fracPrec, _bFixedRamp); + + // final clusterization applied + CGU_FLOAT fError = Clstr1(pcIndices, _Blk, Ramp, dwBlockSize, dwNumPoints, + bFixedRampPoints, _intPrec, _fracPrec, _bFixedRamp); + nEndpoints[0] = (CGU_UINT8)Ramp[0]; + nEndpoints[1] = (CGU_UINT8)Ramp[1]; + + return fError; +} +#endif //! BC2 +#endif //! BC1 + +#if !defined(BC1_ENCODE_KERNEL_H) +#if !defined(BC2_ENCODE_KERNEL_H) +static CGU_FLOAT CompBlock1X(const CGU_UINT8 *_Blk, CGU_UINT32 dwBlockSize, + CGU_UINT8 nEndpoints[2], CGU_UINT8 *pcIndices, + CGU_UINT8 dwNumPoints, CGU_BOOL bFixedRampPoints, + CGU_INT _intPrec, CGU_INT _fracPrec, + CGU_BOOL _bFixedRamp) { + // convert the input and call the float equivalent. + CGU_FLOAT fBlk[MAX_BLOCK]; + for (CGU_UINT32 i = 0; i < dwBlockSize; i++) + fBlk[i] = (CGU_FLOAT)_Blk[i] / 255.f; + + return CompBlock1XF(fBlk, dwBlockSize, nEndpoints, pcIndices, dwNumPoints, + bFixedRampPoints, _intPrec, _fracPrec, _bFixedRamp); +} +#endif + +#if !defined(BC2_ENCODE_KERNEL_H) +static void EncodeAlphaBlock(CMP_GLOBAL CGU_UINT32 compressedBlock[2], + CGU_UINT8 nEndpoints[2], + CGU_UINT8 nIndices[BLOCK_SIZE_4X4]) { + compressedBlock[0] = + ((CGU_UINT32)nEndpoints[0]) | (((CGU_UINT32)nEndpoints[1]) << 8); + compressedBlock[1] = 0; + + for (CGU_INT i = 0; i < BLOCK_SIZE_4X4; i++) { + if (i < 5) + compressedBlock[0] |= (nIndices[i] & 0x7) << (16 + (i * 3)); + else if (i > 5) + compressedBlock[1] |= (nIndices[i] & 0x7) << (2 + (i - 6) * 3); + else { + compressedBlock[0] |= (nIndices[i] & 0x1) << 31; + compressedBlock[1] |= (nIndices[i] & 0x6) >> 1; + } + } +} +#endif + +#endif + +#if !defined(BC1_ENCODE_KERNEL_H) +#if !defined(BC2_ENCODE_KERNEL_H) +static CGU_INT32 CompressAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], + CMP_GLOBAL CGU_UINT32 compressedBlock[2]) { + CGU_UINT8 nEndpoints[2][2]; + CGU_UINT8 nIndices[2][BLOCK_SIZE_4X4]; + CGU_FLOAT fError8 = CompBlock1X(alphaBlock, BLOCK_SIZE_4X4, nEndpoints[0], + nIndices[0], 8, false, 8, 0, true); + CGU_FLOAT fError6 = + (fError8 == 0.f) ? CMP_FLOAT_MAX + : CompBlock1X(alphaBlock, BLOCK_SIZE_4X4, nEndpoints[1], + nIndices[1], 6, true, 8, 0, true); + if (fError8 <= fError6) + EncodeAlphaBlock(compressedBlock, nEndpoints[0], nIndices[0]); + else + EncodeAlphaBlock(compressedBlock, nEndpoints[1], nIndices[1]); + return CGU_CORE_OK; +} +#endif + +#if !defined(BC2_ENCODE_KERNEL_H) +static void GetCompressedAlphaRamp(CGU_UINT8 alpha[8], + const CGU_UINT32 compressedBlock[2]) { + alpha[0] = (CGU_UINT8)(compressedBlock[0] & 0xff); + alpha[1] = (CGU_UINT8)((compressedBlock[0] >> 8) & 0xff); + + if (alpha[0] > alpha[1]) { + // 8-alpha block: derive the other six alphas. + // Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated. +#ifdef ASPM_GPU + alpha[2] = + (CGU_UINT8)((6 * alpha[0] + 1 * alpha[1] + 3) / 7); // bit code 010 + alpha[3] = + (CGU_UINT8)((5 * alpha[0] + 2 * alpha[1] + 3) / 7); // bit code 011 + alpha[4] = + (CGU_UINT8)((4 * alpha[0] + 3 * alpha[1] + 3) / 7); // bit code 100 + alpha[5] = + (CGU_UINT8)((3 * alpha[0] + 4 * alpha[1] + 3) / 7); // bit code 101 + alpha[6] = + (CGU_UINT8)((2 * alpha[0] + 5 * alpha[1] + 3) / 7); // bit code 110 + alpha[7] = + (CGU_UINT8)((1 * alpha[0] + 6 * alpha[1] + 3) / 7); // bit code 111 +#else + alpha[2] = static_cast((6 * alpha[0] + 1 * alpha[1] + 3) / + 7); // bit code 010 + alpha[3] = static_cast((5 * alpha[0] + 2 * alpha[1] + 3) / + 7); // bit code 011 + alpha[4] = static_cast((4 * alpha[0] + 3 * alpha[1] + 3) / + 7); // bit code 100 + alpha[5] = static_cast((3 * alpha[0] + 4 * alpha[1] + 3) / + 7); // bit code 101 + alpha[6] = static_cast((2 * alpha[0] + 5 * alpha[1] + 3) / + 7); // bit code 110 + alpha[7] = static_cast((1 * alpha[0] + 6 * alpha[1] + 3) / + 7); // bit code 111 +#endif + } else { + // 6-alpha block. + // Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated. +#ifdef ASPM_GPU + alpha[2] = + (CGU_UINT8)((4 * alpha[0] + 1 * alpha[1] + 2) / 5); // Bit code 010 + alpha[3] = + (CGU_UINT8)((3 * alpha[0] + 2 * alpha[1] + 2) / 5); // Bit code 011 + alpha[4] = + (CGU_UINT8)((2 * alpha[0] + 3 * alpha[1] + 2) / 5); // Bit code 100 + alpha[5] = + (CGU_UINT8)((1 * alpha[0] + 4 * alpha[1] + 2) / 5); // Bit code 101 +#else + alpha[2] = static_cast((4 * alpha[0] + 1 * alpha[1] + 2) / + 5); // Bit code 010 + alpha[3] = static_cast((3 * alpha[0] + 2 * alpha[1] + 2) / + 5); // Bit code 011 + alpha[4] = static_cast((2 * alpha[0] + 3 * alpha[1] + 2) / + 5); // Bit code 100 + alpha[5] = static_cast((1 * alpha[0] + 4 * alpha[1] + 2) / + 5); // Bit code 101 +#endif + alpha[6] = 0; // Bit code 110 + alpha[7] = 255; // Bit code 111 + } +} +#endif // !BC2 + +#if !defined(BC2_ENCODE_KERNEL_H) +static void DecompressAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], + const CGU_UINT32 compressedBlock[2]) { + CGU_UINT8 alpha[8]; + GetCompressedAlphaRamp(alpha, compressedBlock); + + for (int i = 0; i < BLOCK_SIZE_4X4; i++) { + CGU_UINT32 index; + if (i < 5) + index = (compressedBlock[0] & (0x7 << (16 + (i * 3)))) >> (16 + (i * 3)); + else if (i > 5) + index = (compressedBlock[1] & (0x7 << (2 + (i - 6) * 3))) >> + (2 + (i - 6) * 3); + else { + index = (compressedBlock[0] & 0x80000000) >> 31; + index |= (compressedBlock[1] & 0x3) << 1; + } + + alphaBlock[i] = alpha[index]; + } +} +#endif // !BC2 +#endif // !BC1 + +#endif diff --git a/extern/CMP_Core/shaders/Common_Def.h b/extern/CMP_Core/shaders/Common_Def.h new file mode 100644 index 0000000..ed9e94a --- /dev/null +++ b/extern/CMP_Core/shaders/Common_Def.h @@ -0,0 +1,300 @@ +#ifndef _COMMON_DEFINITIONS_H +#define _COMMON_DEFINITIONS_H + +//=============================================================================== +// Copyright (c) 2007-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2004-2006 ATI Technologies Inc. +//=============================================================================== +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// +// File Name: Common_Def.h +// Description: common definitions used for CPU/HPC/GPU +// +////////////////////////////////////////////////////////////////////////////// + + +// Features +#ifdef _WIN32 +//#define USE_ASPM_CODE +#endif + +// Proxy ISPC compiler (Warning! Not all ASPM features will be available : expect build errors for specialized ASPM code! +#ifdef ISPC +#define ASPM +#endif + +// Using OpenCL Compiler +#ifdef __OPENCL_VERSION__ +#define ASPM_GPU +#endif + + +#ifdef _LINUX +#undef ASPM_GPU +#include +#include +#include +#include "cmp_math_vec4.h" +#endif + +#ifndef CMP_MAX +#define CMP_MAX(x, y) (((x) > (y)) ? (x) : (y)) +#endif + +#ifndef CMP_MIN +#define CMP_MIN(x, y) (((x) < (y)) ? (x) : (y)) +#endif + +#define CMP_SET_BC13_DECODER_RGBA // Sets mapping BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha + // RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3] + // BC4 alpha always maps as AAAA to channels [0,1,2,3] + // BC5 decoded (Red&Green) maps R,G,B=0,A=255 to [0,1,2,3] else maps [B=0,G,R,A=255] to [0,1,2,3] + +//#define USE_BLOCK_LINEAR + +#define CMP_FLOAT_MAX 3.402823466e+38F // max value used to detect an Error in processing +#define CMP_FLOAT_MAX_EXP 38 +#define USE_PROCESS_SEPERATE_ALPHA // Enable this to use higher quality code using CompressDualIndexBlock +#define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes +#define MAX_DIMENSION_BIG 4 // Max number of channels (RGBA) +#define MAX_SUBSETS 3 // Maximum number of possible subsets +#define MAX_SUBSET_SIZE 16 // Largest possible size for an individual subset +#define BLOCK_SIZE_4X4X4 64 +#define BLOCK_SIZE_4X4 16 +#define BlockX 4 +#define BlockY 4 +//#define USE_BLOCK_LINEAR // Source Data is organized in linear form for each block : Experimental Code not fully developed +//#define USE_DOUBLE // Default is to use float, enable to use double data types only for float definitions + +typedef enum { + CGU_CORE_OK = 0, // No errors, call was successfull + CGU_CORE_ERR_UNKOWN, // An unknown error occurred + CGU_CORE_ERR_NEWMEM, // New Memory Allocation Failed + CGU_CORE_ERR_INVALIDPTR, // The pointer value used is invalid or null + CGU_CORE_ERR_RANGERED, // values for Red Channel is out of range (too high or too low) + CGU_CORE_ERR_RANGEGREEN, // values for Green Channel is out of range (too high or too low) + CGU_CORE_ERR_RANGEBLUE, // values for Blue Channel is out of range (too high or too low) +} CGU_ERROR_CODES; + + +//--------------------------------------------- +// Predefinitions for GPU and CPU compiled code +//--------------------------------------------- + +#ifdef ASPM_GPU // GPU Based code + // ==== Vectors ==== + typedef float2 CGU_Vec2f; + typedef float2 CGV_Vec2f; + typedef float3 CMP_Vec3f; + typedef float3 CGU_Vec3f; + typedef float3 CGV_Vec3f; + typedef uchar3 CGU_Vec3uc; + typedef uchar3 CGV_Vec3uc; + typedef uchar4 CMP_Vec4uc; + typedef uchar4 CGU_Vec4uc; + typedef uchar4 CGV_Vec4uc; + + #define USE_BC7_SP_ERR_IDX + #define ASPM_PRINT(args) printf args + #define BC7_ENCODECLASS + + #define CMP_EXPORT + #define INLINE + #define uniform + #define varying + #define CMP_GLOBAL __global + #define CMP_KERNEL __kernel + #define CMP_CONSTANT __constant + #define CMP_STATIC + + + typedef unsigned int CGU_DWORD; //32bits + typedef int CGU_INT; //32bits + typedef int CGU_BOOL; + typedef unsigned short CGU_SHORT; //16bits + typedef float CGU_FLOAT; + typedef unsigned int uint32; // need to remove this def + + typedef int CGV_INT; + typedef unsigned int CGU_UINT; + typedef int CGUV_INT; + typedef int CGV_BOOL; + + typedef char CGU_INT8; + typedef unsigned char CGU_UINT8; + typedef short CGU_INT16; + typedef unsigned short CGU_UINT16; + typedef int CGU_INT32; + typedef unsigned int CGU_UINT32; + typedef unsigned long CGU_UINT64; + + typedef char CGV_INT8; + typedef unsigned char CGV_UINT8; + typedef short CGV_INT16; + typedef unsigned short CGV_UINT16; + typedef int CGV_INT32; + typedef unsigned int CGV_UINT32; + typedef unsigned long CGV_UINT64; + + typedef float CGV_FLOAT; + + #define TRUE 1 + #define FALSE 0 + #define CMP_CDECL + +#else + // CPU & ASPM definitions + + #ifdef ASPM // SPMD ,SIMD CPU code + // using hybrid (CPU/GPU) aspm compiler + #define ASPM_PRINT(args) print args + #define CMP_USE_FOREACH_ASPM + #define __ASPM__ + #define BC7_ENCODECLASS + + #define USE_BC7_SP_ERR_IDX + //#define USE_BC7_RAMP + + #define CMP_EXPORT export + #define TRUE true + #define FALSE false + typedef uniform bool CGU_BOOL; + typedef bool CGV_BOOL; + + typedef unsigned int8 uint8; + typedef unsigned int16 uint16; + typedef unsigned int32 uint32; + typedef unsigned int64 uint64; + typedef uniform float CGU_FLOAT; + typedef varying float CGV_FLOAT; + typedef uniform uint8 CGU_UINT8; + typedef varying uint8 CGV_UINT8; + + + typedef CGV_UINT8<4> CGV_Vec4uc; + typedef CGU_UINT8<4> CGU_Vec4uc; + + typedef CGU_FLOAT<3> CGU_Vec3f; + typedef CGV_FLOAT<3> CGV_Vec3f; + + typedef CGU_FLOAT<2> CGU_Vec2f; + typedef CGV_FLOAT<2> CGV_Vec2f; + + #define CMP_CDECL + + #else // standard CPU code + #include + #include + #include "cmp_math_vec4.h" + + // using CPU compiler + #define ASPM_PRINT(args) printf args + #define USE_BC7_RAMP + #define USE_BC7_SP_ERR_IDX + + #define CMP_EXPORT + #define BC7_ENCODECLASS BC7_EncodeClass:: + #define TRUE 1 + #define FALSE 0 + #define uniform + #define varying + + typedef char int8; + typedef short int16; + typedef int int32; + typedef long int64; + typedef unsigned char uint8; + typedef unsigned short uint16; + typedef unsigned int uint32; + typedef unsigned long uint64; + + typedef int8 CGV_BOOL; + typedef int8 CGU_BOOL; + typedef int16 CGU_WORD; + typedef uint8 CGU_SHORT; + typedef int64 CGU_LONG; + typedef uint64 CGU_ULONG; + + typedef uniform float CGU_FLOAT; + typedef varying float CGV_FLOAT; + typedef uniform uint8 CGU_UINT8; + typedef varying uint8 CGV_UINT8; + #if defined(WIN32) || defined(_WIN64) + #define CMP_CDECL __cdecl + #else + #define CMP_CDECL + #endif + #endif + + // Common CPU & ASPM definitions + #define CMP_ASSERT(arg) + + #define CMP_GLOBAL + + #define CMP_KERNEL + #define __local const + #define __constant const + #define CMP_CONSTANT const + #define INLINE inline + #define CMP_STATIC static + + + typedef uniform int32 CGU_DWORD; + typedef uniform uint8 CGU_UBYTE; + typedef uniform int CGU_INT; + typedef uniform int8 CGU_INT8; + + typedef uniform int16 CGU_INT16; + typedef uniform uint16 CGU_UINT16; + typedef uniform int32 CGU_INT32; + typedef uniform uint32 CGU_UINT32; + typedef uniform uint64 CGU_UINT64; + + typedef int CGV_INT; + typedef int8 CGV_INT8; + typedef int16 CGV_INT16; + typedef int32 CGV_INT32; + typedef uint16 CGV_UINT16; + typedef uint32 CGV_UINT32; + typedef uint64 CGV_UINT64; +#endif // ASPM_GPU + + +typedef struct +{ + CGU_UINT32 m_src_width; + CGU_UINT32 m_src_height; + CGU_UINT32 m_width_in_blocks; + CGU_UINT32 m_height_in_blocks; + CGU_FLOAT m_fquality; +} Source_Info; + +// Ref Compute_CPU_HPC +struct texture_surface +{ + CGU_UINT8* ptr; + CGU_INT width, + height, + stride; + CGU_INT channels; +}; + +#endif diff --git a/extern/CMP_Core/shaders/CopyFiles.bat b/extern/CMP_Core/shaders/CopyFiles.bat new file mode 100644 index 0000000..fc125e9 --- /dev/null +++ b/extern/CMP_Core/shaders/CopyFiles.bat @@ -0,0 +1,50 @@ +REM ==================================== +REM Hybrid Codecs: Full support in v4.0 +REM ==================================== + +REM gets the output dir +set BUILD_OUTDIR=%1 + +REM get the batch files dir +SET mypath=%~dp0 +echo %mypath:~0,-1% + +IF NOT EXIST "%outpath%"\Plugins mkdir %BUILD_OUTDIR%Plugins +IF NOT EXIST "%outpath%"\Plugins\Compute mkdir %BUILD_OUTDIR%Plugins\Compute + +REM Build Vulkan Shader Binary +REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1.comp -o %BUILD_OUTDIR%\Plugins\Compute\BC1.spv +REM IF %ERRORLEVEL% GTR 0 exit 123 + +REM Enabled in v4.0 +REM +REM del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_Kernel.cpp.cmp +REM del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_Kernel.cpp.cmp +REM del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_Kernel.cpp.cmp +REM del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_Kernel.cpp.cmp +REM del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_Kernel.cpp.cmp +REM del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_Kernel.cpp.cmp +REM del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp + +XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ + +echo "Dependencies copied done" + + + + diff --git a/extern/CMP_Core/source/CMP_Core.h b/extern/CMP_Core/source/CMP_Core.h new file mode 100644 index 0000000..d54dc27 --- /dev/null +++ b/extern/CMP_Core/source/CMP_Core.h @@ -0,0 +1,153 @@ +//===================================================================== +// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +/// \file CMP_Core.h +// +//===================================================================== + +#ifndef CMP_CORE_H +#define CMP_CORE_H + +#include +#ifdef _WIN32 +#define CMP_CDECL __cdecl +#else +#define CMP_CDECL +#endif + +//==================================================================================== +// API Definitions for Core API +//------------------------------------------------------------------------------------ +// All API return 0 on success else error codes > 0 +// See Common_Def.h CGU_CORE_ values for the error codes +//===================================================================================== + +//====================================================================================================== +// Block level setting option: Create and Destroy Reference Pointers +//====================================================================================================== +// Context create and destroy to use for BCn codec settings, where n is the set [1,2,3,4,5,6,7] +// All codecs will use default max quality settings, users can create multiple contexts to +// set quality levels, masks , channel mapping, etc... + +int CMP_CDECL CreateOptionsBC1(void **optionsBC1); +int CMP_CDECL CreateOptionsBC2(void **optionsBC2); +int CMP_CDECL CreateOptionsBC3(void **optionsBC3); +int CMP_CDECL CreateOptionsBC4(void **optionsBC4); +int CMP_CDECL CreateOptionsBC5(void **optionsBC5); +int CMP_CDECL CreateOptionsBC6(void **optionsBC6); +int CMP_CDECL CreateOptionsBC7(void **optionsBC7); + +int CMP_CDECL DestroyOptionsBC1(void *optionsBC1); +int CMP_CDECL DestroyOptionsBC2(void *optionsBC2); +int CMP_CDECL DestroyOptionsBC3(void *optionsBC3); +int CMP_CDECL DestroyOptionsBC4(void *optionsBC4); +int CMP_CDECL DestroyOptionsBC5(void *optionsBC5); +int CMP_CDECL DestroyOptionsBC6(void *optionsBC6); +int CMP_CDECL DestroyOptionsBC7(void *optionsBC7); + + +//====================================================================================================== +// Block level settings using the options Reference Pointers +//====================================================================================================== + +// Setting channel Weights : Applies to BC1, BC2 and BC3 valid ranges are [0..1.0f] Default is {1.0f, 1.0f , 1.0f} +// Use channel weightings. With swizzled formats the weighting applies to the data within the specified channel not the channel itself. +int CMP_CDECL SetChannelWeightsBC1(void *options, float WeightRed, float WeightGreen, float WeightBlue); +int CMP_CDECL SetChannelWeightsBC2(void *options, float WeightRed, float WeightGreen, float WeightBlue); +int CMP_CDECL SetChannelWeightsBC3(void *options, float WeightRed, float WeightGreen, float WeightBlue); + + +// True sets mapping CMP_Core BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha as +// RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3] +// Default is set to true. +int CMP_CDECL SetDecodeChannelMapping(void *options, bool mapRGBA); + +int CMP_CDECL SetQualityBC1(void *options, float fquality); +int CMP_CDECL SetQualityBC2(void *options, float fquality); +int CMP_CDECL SetQualityBC3(void *options, float fquality); +int CMP_CDECL SetQualityBC4(void *options, float fquality); +int CMP_CDECL SetQualityBC5(void *options, float fquality); +int CMP_CDECL SetQualityBC6(void *options, float fquality); +int CMP_CDECL SetQualityBC7(void *options, float fquality); + + +int CMP_CDECL SetAlphaThresholdBC1(void *options, unsigned char alphaThreshold); + +int CMP_CDECL SetMaskBC6(void *options, unsigned int mask); +int CMP_CDECL SetMaskBC7(void *options, unsigned char mask); + +int CMP_CDECL SetAlphaOptionsBC7(void *options, bool imageNeedsAlpha, bool colourRestrict, bool alphaRestrict); +int CMP_CDECL SetErrorThresholdBC7(void *options, float minThreshold, float maxThreshold); + +//====================================================================================================== +// (4x4) Block level 4 channel source CompressBlock and DecompressBlock API for BCn Codecs +//====================================================================================================== +// The options parameter for these API can be set to null in the calls if defaults settings is sufficient +// Example: CompressBlockBC1(srcBlock,16,cmpBlock,NULL); For "C" call +// CompressBlockBC1(srcBlock,16,cmpBlock); For "C++" calls +// +// To use this parameter first create the options context using the CreateOptions call +// then use the Set Options to set various codec settings and pass them to the appropriate +// Compress or Decompress API. +// The source (srcBlock) channel format is expected to be RGBA:8888 by default for LDR Codecs +// for BC6H the format is RGBA Half float (16 bits per channel) +//------------------------------------------------------------------------------------------------------ +#ifdef __cplusplus +#define CMP_DEFAULTNULL =NULL +#else +#define CMP_DEFAULTNULL +#endif + +//========================================================================================================= +// 4 channel Sources, default format RGBA:8888 is processed as a 4x4 block starting at srcBlock location +// where each row of the block is calculated from srcStride +//========================================================================================================= +int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[8 ], const void *options CMP_DEFAULTNULL); +int CMP_CDECL CompressBlockBC2(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL); +int CMP_CDECL CompressBlockBC3(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL); +int CMP_CDECL CompressBlockBC7(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL); + +int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8 ], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL); +int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL); +int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL); +int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL); + +//================================================ +// 1 channel Source 4x4 8 bits per block +//================================================ +int CMP_CDECL CompressBlockBC4(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[8], const void *options CMP_DEFAULTNULL); +int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8], unsigned char srcBlock[16], const void *options CMP_DEFAULTNULL); + +//================================================ +// 2 channel Source 2x(4x4 8 bits) +//================================================ +int CMP_CDECL CompressBlockBC5(const unsigned char *srcBlock1, unsigned int srcStrideInBytes1, + const unsigned char *srcBlock2, unsigned int srcStrideInBytes2, + unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL); +int CMP_CDECL DecompressBlockBC5(const unsigned char cmpBlock[16], unsigned char srcBlock1[16], unsigned char srcBlock2[16], const void *options CMP_DEFAULTNULL); + +//======================================================================================== +// For 3 channel Source RGB_16, Note srcStride is in unsigned short steps (2 bytes each) +//======================================================================================== +int CMP_CDECL CompressBlockBC6(const unsigned short *srcBlock, unsigned int srcStrideInShorts, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL); +int CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16], unsigned short srcBlock[48], const void *options CMP_DEFAULTNULL); + +#endif // CMP_CORE diff --git a/extern/CMP_Core/source/cmp_math_vec4.h b/extern/CMP_Core/source/cmp_math_vec4.h new file mode 100644 index 0000000..d92080e --- /dev/null +++ b/extern/CMP_Core/source/cmp_math_vec4.h @@ -0,0 +1,417 @@ +//===================================================================== +// Copyright 2019 (c), Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef CMP_MATH_VEC4_H +#define CMP_MATH_VEC4_H + +//==================================================== +// Vector Class definitions for CPU & Intrinsics +//==================================================== + +#if defined (_LINUX) || defined (_WIN32) + +//============================================= VEC2 ================================================== +template +class Vec2 +{ +public: + + T x; + T y; + + // ***************************************** + // Constructors + // ***************************************** + + /// Default constructor + Vec2() : x((T)0), y((T)0) {}; + + /// Value constructor + Vec2(const T& vx, const T& vy) : x(vx), y(vy) {}; + + /// Copy constructor + Vec2(const Vec2& val) : x(val.x), y(val.y) {}; + + /// Single value constructor. Sets all components to the given value + Vec2(const T& v) : x(v), y(v) {}; + + + // ***************************************** + // Conversions/Assignment/Indexing + // ***************************************** + + /// cast to T* + operator const T* () const { return (const T*)this; }; + + /// cast to T* + operator T* () { return (T*)this; }; + + /// Indexing + const T& operator[](int i) const { return ((const T*)this)[i]; }; + T& operator[](int i) { return ((T*)this)[i]; }; + + /// Assignment + const Vec2& operator=(const Vec2& rhs) { x = rhs.x; y = rhs.y; return *this; }; + + // ***************************************** + // Comparison + // ***************************************** + + /// Equality comparison + bool operator==(const Vec2& rhs) const { return (x == rhs.x && y == rhs.y); }; + + /// Inequality comparision + bool operator!=(const Vec2& rhs) const { return (x != rhs.x || y != rhs.y); }; + + // ***************************************** + // Arithmetic + // ***************************************** + + /// Addition + const Vec2 operator+(const Vec2& rhs) const { return Vec2(x + rhs.x, y + rhs.y); }; + + /// Subtraction + const Vec2 operator-(const Vec2& rhs) const { return Vec2(x - rhs.x, y - rhs.y); }; + + /// Multiply by scalar + const Vec2 operator*(const T& v) const { return Vec2(x * v, y * v); }; + + /// Divide by scalar + const Vec2 operator/(const T& v) const { return Vec2(x / v, y / v); }; + + /// Addition in-place + Vec2& operator+= (const Vec2& rhs) { x += rhs.x; y += rhs.y; return *this; }; + + /// Subtract in-place + Vec2& operator-= (const Vec2& rhs) { x -= rhs.x; y -= rhs.y; return *this; }; + + /// Scalar multiply in-place + Vec2& operator*= (const T& v) { x *= v; y *= v; return *this; }; + + /// Scalar divide in-place + Vec2& operator/= (const T& v) { x /= v; y /= v; return *this; }; + + +}; + +typedef Vec2 CMP_Vec2f; +typedef Vec2 CGU_Vec2f; +typedef Vec2 CGV_Vec2f; +typedef Vec2 CMP_Vec2d; +typedef Vec2 CMP_Vec2i; + +//} + + + + +//============================================= VEC3 ================================================== +template +class Vec3 +{ +public: + + T x; + T y; + T z; + + // ***************************************** + // Constructors + // ***************************************** + + /// Default constructor + Vec3() : x((T)0), y((T)0), z((T)0) {}; + + /// Value constructor + Vec3(const T& vx, const T& vy, const T& vz) : x(vx), y(vy), z(vz) {}; + + /// Copy constructor + Vec3(const Vec3& val) : x(val.x), y(val.y), z(val.z) {}; + + /// Single value constructor. Sets all components to the given value + Vec3(const T& v) : x(v), y(v), z(v) {}; + + /// Array constructor. Assumes a 3-component array + Vec3(const T* v) : x(v[0]), y(v[1]), z(v[2]) {}; + + // ***************************************** + // Conversions/Assignment/Indexing + // ***************************************** + + /// cast to T* + operator const T* () const { return (const T*)this; }; + + /// cast to T* + operator T* () { return (T*)this; }; + + /// Assignment + const Vec3& operator=(const Vec3& rhs) { x = rhs.x; y = rhs.y; z = rhs.z; return *this; }; + + // ***************************************** + // Comparison + // ***************************************** + + /// Equality comparison + bool operator==(const Vec3& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); }; + + /// Inequality comparision + bool operator!=(const Vec3& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z); }; + + // ***************************************** + // Arithmetic + // ***************************************** + + /// Addition + const Vec3 operator+(const Vec3& rhs) const { return Vec3(x + rhs.x, y + rhs.y, z + rhs.z); }; + + /// Subtraction + const Vec3 operator-(const Vec3& rhs) const { return Vec3(x - rhs.x, y - rhs.y, z - rhs.z); }; + + /// Multiply by scalar + const Vec3 operator*(const T& v) const { return Vec3(x * v, y * v, z * v); }; + + /// Divide by scalar + const Vec3 operator/(const T& v) const { return Vec3(x / v, y / v, z / v); }; + + /// Divide by vector + const Vec3 operator/(const Vec3& rhs) const { return Vec3(x / rhs.x, y / rhs.y, z / rhs.z); }; + + /// Addition in-place + Vec3& operator+= (const Vec3& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; }; + + /// Subtract in-place + Vec3& operator-= (const Vec3& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; return *this; }; + + /// Scalar multiply in-place + Vec3& operator*= (const T& v) { x *= v; y *= v; z *= v; return *this; }; + + /// Scalar divide in-place + Vec3& operator/= (const T& v) { x /= v; y /= v; z /= v; return *this; }; +}; + +typedef Vec3 CGU_Vec3f; +typedef Vec3 CGV_Vec3f; +typedef Vec3 CGU_Vec3uc; +typedef Vec3 CGV_Vec3uc; + +typedef Vec3 CMP_Vec3f; +typedef Vec3 CMP_Vec3d; +typedef Vec3 CMP_Vec3i; +typedef Vec3 CMP_Vec3uc; + +//============================================= VEC4 ================================================== +template +class Vec4 +{ +public: + + T x; + T y; + T z; + T w; + + // ***************************************** + // Constructors + // ***************************************** + + /// Default constructor + Vec4() : x((T)0), y((T)0), z((T)0), w((T)0) {}; + + /// Value constructor + Vec4(const T& vx, const T& vy, const T& vz, const T& vw) : x(vx), y(vy), z(vz), w(vw) {}; + + /// Copy constructor + Vec4(const Vec4& val) : x(val.x), y(val.y), z(val.z), w(val.w) {}; + + /// Single value constructor. Sets all components to the given value + Vec4(const T& v) : x(v), y(v), z(v), w(v) {}; + + /// Array constructor. Assumes a 4-component array + Vec4(const T* v) : x(v[0]), y(v[1]), z(v[2]), w(v[3]) {}; + + // ***************************************** + // Conversions/Assignment/Indexing + // ***************************************** + + /// cast to T* + operator const T* () const { return (const T*)this; }; + + /// cast to T* + operator T* () { return (T*)this; }; + + /// Assignment + const Vec4& operator=(const Vec4& rhs) { x = rhs.x; y = rhs.y; z = rhs.z; w = rhs.w; return *this; }; + + // ***************************************** + // Comparison + // ***************************************** + + /// Equality comparison + bool operator==(const Vec4& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w); }; + + /// Inequality comparision + bool operator!=(const Vec4& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z || w != rhs.w); }; + + // ***************************************** + // Arithmetic + // ***************************************** + + /// Addition + const Vec4 operator+(const Vec4& rhs) const { return Vec4(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); }; + + /// Subtraction + const Vec4 operator-(const Vec4& rhs) const { return Vec4(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); }; + + /// Multiply by scalar + const Vec4 operator*(const T& v) const { return Vec4(x * v, y * v, z * v, w * v); }; + + /// Divide by scalar + const Vec4 operator/(const T& v) const { return Vec4(x / v, y / v, z / v, w / v); }; + + /// Divide by vector + const Vec4 operator/(const Vec4& rhs) const { return Vec4(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); }; + + /// Addition in-place + Vec4& operator+= (const Vec4& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; }; + + /// Subtract in-place + Vec4& operator-= (const Vec4& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; w -= rhs.w; return *this; }; + + /// Scalar multiply in-place + Vec4& operator*= (const T& v) { x *= v; y *= v; z *= v; w *= v; return *this; }; + + /// Scalar divide in-place + Vec4& operator/= (const T& v) { x /= v; y /= v; z /= v; w /= v; return *this; }; +}; + +#include +#include "xmmintrin.h" +#include +#include + +// SSE Vec4 +#ifdef _LINUX +class CMP_SSEVec4f +#else +#include "intrin.h" +class __declspec(align(16)) CMP_SSEVec4f +#endif +{ +public: + + union + { + __m128 vec128; // float Vector 128 bits in total (16 Bytes) = array of 4 floats +#ifdef _LINUX + float f32[4]; +#endif + }; + + // constructors + inline CMP_SSEVec4f() {}; + inline CMP_SSEVec4f(float x, float y, float z, float w) : vec128(_mm_setr_ps(x, y, z, w)) {}; + inline CMP_SSEVec4f(__m128 vec) : vec128(vec) {} + inline CMP_SSEVec4f(const float* data) : vec128(_mm_load_ps(data)) {}; + inline CMP_SSEVec4f(float scalar) : vec128(_mm_load1_ps(&scalar)) {}; + + // copy and assignment + inline CMP_SSEVec4f(const CMP_SSEVec4f& init) : vec128(init.vec128) {}; + inline const CMP_SSEVec4f& operator=(const CMP_SSEVec4f& lhs) { vec128 = lhs.vec128; return *this; }; + + // conversion to m128 type for direct use in _mm intrinsics + inline operator __m128() { return vec128; }; + inline operator const __m128() const { return vec128; }; + + // indexing +#ifdef _LINUX + inline const float& operator[](int i) const { return f32[i]; }; + inline float& operator[](int i) { return f32[i]; }; +#else + inline const float& operator[](int i) const { return vec128.m128_f32[i]; }; + inline float& operator[](int i) { return vec128.m128_f32[i]; }; +#endif + + // addition + inline CMP_SSEVec4f operator+(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_add_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f& operator+=(const CMP_SSEVec4f& rhs) { vec128 = _mm_add_ps(vec128, rhs.vec128); return *this; }; + + // multiplication + inline CMP_SSEVec4f operator*(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_mul_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f& operator*=(const CMP_SSEVec4f& rhs) { vec128 = _mm_mul_ps(vec128, rhs.vec128); return *this; }; + + // scalar multiplication + //inline CMP_SSEVec4f operator*( float rhs ) const { return CMP_SSEVec4f( _mm_mul_ps(vec128, _mm_load1_ps(&rhs)) ); }; + //inline CMP_SSEVec4f& operator*=( float rhs ) { vec128 = _mm_mul_ps(vec128, _mm_load1_ps(&rhs)); return *this; }; + + + // subtraction + inline CMP_SSEVec4f operator-(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_sub_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f& operator-= (const CMP_SSEVec4f& rhs) { vec128 = _mm_sub_ps(vec128, rhs.vec128); return *this; }; + + // division + inline CMP_SSEVec4f operator/(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_div_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f& operator/= (const CMP_SSEVec4f& rhs) { vec128 = _mm_div_ps(vec128, rhs.vec128); return *this; }; + + // scalar division + inline CMP_SSEVec4f operator/(float rhs) const { return CMP_SSEVec4f(_mm_div_ps(vec128, _mm_load1_ps(&rhs))); }; + inline CMP_SSEVec4f& operator/=(float rhs) { vec128 = _mm_div_ps(vec128, _mm_load1_ps(&rhs)); return *this; }; + + // comparison + // these return 0 or 0xffffffff in each component + inline CMP_SSEVec4f operator< (const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmplt_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f operator> (const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpgt_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f operator<=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmple_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f operator>=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpge_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f operator==(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpeq_ps(vec128, rhs.vec128)); }; + + // bitwise operators + inline CMP_SSEVec4f operator|(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_or_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f operator&(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_and_ps(vec128, rhs.vec128)); }; + inline CMP_SSEVec4f operator^(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_xor_ps(vec128, rhs.vec128)); }; + inline const CMP_SSEVec4f& operator|=(const CMP_SSEVec4f& rhs) { vec128 = _mm_or_ps(vec128, rhs.vec128); return *this; }; + inline const CMP_SSEVec4f& operator&=(const CMP_SSEVec4f& rhs) { vec128 = _mm_and_ps(vec128, rhs.vec128); return *this; }; + + // for some horrible reason,there's no bitwise not instruction for SSE, + // so we have to do xor with 0xfffffff in order to fake it. + // TO get a 0xffffffff, we execute 0=0 + inline CMP_SSEVec4f operator~() const + { + __m128 zero = _mm_setzero_ps(); + __m128 is_true = _mm_cmpeq_ps(zero, zero); + return _mm_xor_ps(is_true, vec128); + }; + +}; + +typedef Vec4 CMP_Vec4f; +typedef Vec4 CMP_Vec4d; +typedef Vec4 CMP_Vec4i; +typedef Vec4 CMP_Vec4ui; // unsigned 16 bit x,y,x,w +typedef Vec4 CMP_Vec4uc; // unsigned 8 bit x,y,x,w + +typedef Vec4 CGU_Vec4uc; // unsigned 8 bit x,y,x,w +typedef Vec4 CGV_Vec4uc; // unsigned 8 bit x,y,x,w + +#endif // not ASPM_GPU + +#endif // Header Guard + diff --git a/extern/CMP_Core/test/BlockConstants.h b/extern/CMP_Core/test/BlockConstants.h new file mode 100644 index 0000000..e1c5232 --- /dev/null +++ b/extern/CMP_Core/test/BlockConstants.h @@ -0,0 +1,228 @@ +#ifndef BLOCKCONSTANTS_H +#define BLOCKCONSTANTS_H +#include +#include +struct Block { const unsigned char* data; const unsigned char* color; }; + +static const unsigned char BC1_Red_Ignore_Alpha [] {0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC1_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_White_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Black_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Red_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Red_Green_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Green_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Red_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Green_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_White_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Green_Ignore_Alpha [] {0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC1_Black_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Red_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Red_Green_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Green_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Blue_Ignore_Alpha [] {0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC1_White_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC1_Black_Ignore_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC1_Red_Blue_Ignore_Alpha [] {0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC1_Red_Green_Ignore_Alpha [] {0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC1_Green_Blue_Ignore_Alpha [] {0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC1_Red_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC1_Green_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const unsigned char BC2_Red_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_White_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Black_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Red_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Red_Green_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Green_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Red_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Green_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_White_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Green_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Black_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Red_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Red_Green_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Green_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_White_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Black_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Red_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Red_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC2_Green_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_White_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Black_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Green_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_White_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_White_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Black_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Red_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC3_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; + +Block BC1_Red_Ignore_Alpha_Block = {BC1_Red_Ignore_Alpha, nullptr}; +Block BC1_Blue_Half_Alpha_Block = {BC1_Blue_Half_Alpha, nullptr}; +Block BC1_White_Half_Alpha_Block = {BC1_White_Half_Alpha, nullptr}; +Block BC1_Black_Half_Alpha_Block = {BC1_Black_Half_Alpha, nullptr}; +Block BC1_Red_Blue_Half_Alpha_Block = {BC1_Red_Blue_Half_Alpha, nullptr}; +Block BC1_Red_Green_Half_Alpha_Block = {BC1_Red_Green_Half_Alpha, nullptr}; +Block BC1_Green_Blue_Half_Alpha_Block = {BC1_Green_Blue_Half_Alpha, nullptr}; +Block BC1_Red_Full_Alpha_Block = {BC1_Red_Full_Alpha, nullptr}; +Block BC1_Green_Full_Alpha_Block = {BC1_Green_Full_Alpha, nullptr}; +Block BC1_Blue_Full_Alpha_Block = {BC1_Blue_Full_Alpha, nullptr}; +Block BC1_White_Full_Alpha_Block = {BC1_White_Full_Alpha, nullptr}; +Block BC1_Green_Ignore_Alpha_Block = {BC1_Green_Ignore_Alpha, nullptr}; +Block BC1_Black_Full_Alpha_Block = {BC1_Black_Full_Alpha, nullptr}; +Block BC1_Red_Blue_Full_Alpha_Block = {BC1_Red_Blue_Full_Alpha, nullptr}; +Block BC1_Red_Green_Full_Alpha_Block = {BC1_Red_Green_Full_Alpha, nullptr}; +Block BC1_Green_Blue_Full_Alpha_Block = {BC1_Green_Blue_Full_Alpha, nullptr}; +Block BC1_Blue_Ignore_Alpha_Block = {BC1_Blue_Ignore_Alpha, nullptr}; +Block BC1_White_Ignore_Alpha_Block = {BC1_White_Ignore_Alpha, nullptr}; +Block BC1_Black_Ignore_Alpha_Block = {BC1_Black_Ignore_Alpha, nullptr}; +Block BC1_Red_Blue_Ignore_Alpha_Block = {BC1_Red_Blue_Ignore_Alpha, nullptr}; +Block BC1_Red_Green_Ignore_Alpha_Block = {BC1_Red_Green_Ignore_Alpha, nullptr}; +Block BC1_Green_Blue_Ignore_Alpha_Block = {BC1_Green_Blue_Ignore_Alpha, nullptr}; +Block BC1_Red_Half_Alpha_Block = {BC1_Red_Half_Alpha, nullptr}; +Block BC1_Green_Half_Alpha_Block = {BC1_Green_Half_Alpha, nullptr}; +Block BC2_Red_Ignore_Alpha_Block = {BC2_Red_Ignore_Alpha, nullptr}; +Block BC2_Blue_Half_Alpha_Block = {BC2_Blue_Half_Alpha, nullptr}; +Block BC2_White_Half_Alpha_Block = {BC2_White_Half_Alpha, nullptr}; +Block BC2_Black_Half_Alpha_Block = {BC2_Black_Half_Alpha, nullptr}; +Block BC2_Red_Blue_Half_Alpha_Block = {BC2_Red_Blue_Half_Alpha, nullptr}; +Block BC2_Red_Green_Half_Alpha_Block = {BC2_Red_Green_Half_Alpha, nullptr}; +Block BC2_Green_Blue_Half_Alpha_Block = {BC2_Green_Blue_Half_Alpha, nullptr}; +Block BC2_Red_Full_Alpha_Block = {BC2_Red_Full_Alpha, nullptr}; +Block BC2_Green_Full_Alpha_Block = {BC2_Green_Full_Alpha, nullptr}; +Block BC2_Blue_Full_Alpha_Block = {BC2_Blue_Full_Alpha, nullptr}; +Block BC2_White_Full_Alpha_Block = {BC2_White_Full_Alpha, nullptr}; +Block BC2_Green_Ignore_Alpha_Block = {BC2_Green_Ignore_Alpha, nullptr}; +Block BC2_Black_Full_Alpha_Block = {BC2_Black_Full_Alpha, nullptr}; +Block BC2_Red_Blue_Full_Alpha_Block = {BC2_Red_Blue_Full_Alpha, nullptr}; +Block BC2_Red_Green_Full_Alpha_Block = {BC2_Red_Green_Full_Alpha, nullptr}; +Block BC2_Green_Blue_Full_Alpha_Block = {BC2_Green_Blue_Full_Alpha, nullptr}; +Block BC2_Blue_Ignore_Alpha_Block = {BC2_Blue_Ignore_Alpha, nullptr}; +Block BC2_White_Ignore_Alpha_Block = {BC2_White_Ignore_Alpha, nullptr}; +Block BC2_Black_Ignore_Alpha_Block = {BC2_Black_Ignore_Alpha, nullptr}; +Block BC2_Red_Blue_Ignore_Alpha_Block = {BC2_Red_Blue_Ignore_Alpha, nullptr}; +Block BC2_Red_Green_Ignore_Alpha_Block = {BC2_Red_Green_Ignore_Alpha, nullptr}; +Block BC2_Green_Blue_Ignore_Alpha_Block = {BC2_Green_Blue_Ignore_Alpha, nullptr}; +Block BC2_Red_Half_Alpha_Block = {BC2_Red_Half_Alpha, nullptr}; +Block BC2_Green_Half_Alpha_Block = {BC2_Green_Half_Alpha, nullptr}; +Block BC3_Red_Ignore_Alpha_Block = {BC3_Red_Ignore_Alpha, nullptr}; +Block BC3_Blue_Half_Alpha_Block = {BC3_Blue_Half_Alpha, nullptr}; +Block BC3_White_Half_Alpha_Block = {BC3_White_Half_Alpha, nullptr}; +Block BC3_Black_Half_Alpha_Block = {BC3_Black_Half_Alpha, nullptr}; +Block BC3_Red_Blue_Half_Alpha_Block = {BC3_Red_Blue_Half_Alpha, nullptr}; +Block BC3_Red_Green_Half_Alpha_Block = {BC3_Red_Green_Half_Alpha, nullptr}; +Block BC3_Green_Blue_Half_Alpha_Block = {BC3_Green_Blue_Half_Alpha, nullptr}; +Block BC3_Red_Full_Alpha_Block = {BC3_Red_Full_Alpha, nullptr}; +Block BC3_Green_Full_Alpha_Block = {BC3_Green_Full_Alpha, nullptr}; +Block BC3_Blue_Full_Alpha_Block = {BC3_Blue_Full_Alpha, nullptr}; +Block BC3_White_Full_Alpha_Block = {BC3_White_Full_Alpha, nullptr}; +Block BC3_Green_Ignore_Alpha_Block = {BC3_Green_Ignore_Alpha, nullptr}; +Block BC3_Black_Full_Alpha_Block = {BC3_Black_Full_Alpha, nullptr}; +Block BC3_Red_Blue_Full_Alpha_Block = {BC3_Red_Blue_Full_Alpha, nullptr}; +Block BC3_Red_Green_Full_Alpha_Block = {BC3_Red_Green_Full_Alpha, nullptr}; +Block BC3_Green_Blue_Full_Alpha_Block = {BC3_Green_Blue_Full_Alpha, nullptr}; +Block BC3_Blue_Ignore_Alpha_Block = {BC3_Blue_Ignore_Alpha, nullptr}; +Block BC3_White_Ignore_Alpha_Block = {BC3_White_Ignore_Alpha, nullptr}; +Block BC3_Black_Ignore_Alpha_Block = {BC3_Black_Ignore_Alpha, nullptr}; +Block BC3_Red_Blue_Ignore_Alpha_Block = {BC3_Red_Blue_Ignore_Alpha, nullptr}; +Block BC3_Red_Green_Ignore_Alpha_Block = {BC3_Red_Green_Ignore_Alpha, nullptr}; +Block BC3_Green_Blue_Ignore_Alpha_Block = {BC3_Green_Blue_Ignore_Alpha, nullptr}; +Block BC3_Red_Half_Alpha_Block = {BC3_Red_Half_Alpha, nullptr}; +Block BC3_Green_Half_Alpha_Block = {BC3_Green_Half_Alpha, nullptr}; + +static std::unordered_map blocks { + { "BC1_Red_Ignore_Alpha", BC1_Red_Ignore_Alpha_Block}, + { "BC1_Blue_Half_Alpha", BC1_Blue_Half_Alpha_Block}, + { "BC1_White_Half_Alpha", BC1_White_Half_Alpha_Block}, + { "BC1_Black_Half_Alpha", BC1_Black_Half_Alpha_Block}, + { "BC1_Red_Blue_Half_Alpha", BC1_Red_Blue_Half_Alpha_Block}, + { "BC1_Red_Green_Half_Alpha", BC1_Red_Green_Half_Alpha_Block}, + { "BC1_Green_Blue_Half_Alpha", BC1_Green_Blue_Half_Alpha_Block}, + { "BC1_Red_Full_Alpha", BC1_Red_Full_Alpha_Block}, + { "BC1_Green_Full_Alpha", BC1_Green_Full_Alpha_Block}, + { "BC1_Blue_Full_Alpha", BC1_Blue_Full_Alpha_Block}, + { "BC1_White_Full_Alpha", BC1_White_Full_Alpha_Block}, + { "BC1_Green_Ignore_Alpha", BC1_Green_Ignore_Alpha_Block}, + { "BC1_Black_Full_Alpha", BC1_Black_Full_Alpha_Block}, + { "BC1_Red_Blue_Full_Alpha", BC1_Red_Blue_Full_Alpha_Block}, + { "BC1_Red_Green_Full_Alpha", BC1_Red_Green_Full_Alpha_Block}, + { "BC1_Green_Blue_Full_Alpha", BC1_Green_Blue_Full_Alpha_Block}, + { "BC1_Blue_Ignore_Alpha", BC1_Blue_Ignore_Alpha_Block}, + { "BC1_White_Ignore_Alpha", BC1_White_Ignore_Alpha_Block}, + { "BC1_Black_Ignore_Alpha", BC1_Black_Ignore_Alpha_Block}, + { "BC1_Red_Blue_Ignore_Alpha", BC1_Red_Blue_Ignore_Alpha_Block}, + { "BC1_Red_Green_Ignore_Alpha", BC1_Red_Green_Ignore_Alpha_Block}, + { "BC1_Green_Blue_Ignore_Alpha", BC1_Green_Blue_Ignore_Alpha_Block}, + { "BC1_Red_Half_Alpha", BC1_Red_Half_Alpha_Block}, + { "BC1_Green_Half_Alpha", BC1_Green_Half_Alpha_Block}, + { "BC2_Red_Ignore_Alpha", BC2_Red_Ignore_Alpha_Block}, + { "BC2_Blue_Half_Alpha", BC2_Blue_Half_Alpha_Block}, + { "BC2_White_Half_Alpha", BC2_White_Half_Alpha_Block}, + { "BC2_Black_Half_Alpha", BC2_Black_Half_Alpha_Block}, + { "BC2_Red_Blue_Half_Alpha", BC2_Red_Blue_Half_Alpha_Block}, + { "BC2_Red_Green_Half_Alpha", BC2_Red_Green_Half_Alpha_Block}, + { "BC2_Green_Blue_Half_Alpha", BC2_Green_Blue_Half_Alpha_Block}, + { "BC2_Red_Full_Alpha", BC2_Red_Full_Alpha_Block}, + { "BC2_Green_Full_Alpha", BC2_Green_Full_Alpha_Block}, + { "BC2_Blue_Full_Alpha", BC2_Blue_Full_Alpha_Block}, + { "BC2_White_Full_Alpha", BC2_White_Full_Alpha_Block}, + { "BC2_Green_Ignore_Alpha", BC2_Green_Ignore_Alpha_Block}, + { "BC2_Black_Full_Alpha", BC2_Black_Full_Alpha_Block}, + { "BC2_Red_Blue_Full_Alpha", BC2_Red_Blue_Full_Alpha_Block}, + { "BC2_Red_Green_Full_Alpha", BC2_Red_Green_Full_Alpha_Block}, + { "BC2_Green_Blue_Full_Alpha", BC2_Green_Blue_Full_Alpha_Block}, + { "BC2_Blue_Ignore_Alpha", BC2_Blue_Ignore_Alpha_Block}, + { "BC2_White_Ignore_Alpha", BC2_White_Ignore_Alpha_Block}, + { "BC2_Black_Ignore_Alpha", BC2_Black_Ignore_Alpha_Block}, + { "BC2_Red_Blue_Ignore_Alpha", BC2_Red_Blue_Ignore_Alpha_Block}, + { "BC2_Red_Green_Ignore_Alpha", BC2_Red_Green_Ignore_Alpha_Block}, + { "BC2_Green_Blue_Ignore_Alpha", BC2_Green_Blue_Ignore_Alpha_Block}, + { "BC2_Red_Half_Alpha", BC2_Red_Half_Alpha_Block}, + { "BC2_Green_Half_Alpha", BC2_Green_Half_Alpha_Block}, + { "BC3_Red_Ignore_Alpha", BC3_Red_Ignore_Alpha_Block}, + { "BC3_Blue_Half_Alpha", BC3_Blue_Half_Alpha_Block}, + { "BC3_White_Half_Alpha", BC3_White_Half_Alpha_Block}, + { "BC3_Black_Half_Alpha", BC3_Black_Half_Alpha_Block}, + { "BC3_Red_Blue_Half_Alpha", BC3_Red_Blue_Half_Alpha_Block}, + { "BC3_Red_Green_Half_Alpha", BC3_Red_Green_Half_Alpha_Block}, + { "BC3_Green_Blue_Half_Alpha", BC3_Green_Blue_Half_Alpha_Block}, + { "BC3_Red_Full_Alpha", BC3_Red_Full_Alpha_Block}, + { "BC3_Green_Full_Alpha", BC3_Green_Full_Alpha_Block}, + { "BC3_Blue_Full_Alpha", BC3_Blue_Full_Alpha_Block}, + { "BC3_White_Full_Alpha", BC3_White_Full_Alpha_Block}, + { "BC3_Green_Ignore_Alpha", BC3_Green_Ignore_Alpha_Block}, + { "BC3_Black_Full_Alpha", BC3_Black_Full_Alpha_Block}, + { "BC3_Red_Blue_Full_Alpha", BC3_Red_Blue_Full_Alpha_Block}, + { "BC3_Red_Green_Full_Alpha", BC3_Red_Green_Full_Alpha_Block}, + { "BC3_Green_Blue_Full_Alpha", BC3_Green_Blue_Full_Alpha_Block}, + { "BC3_Blue_Ignore_Alpha", BC3_Blue_Ignore_Alpha_Block}, + { "BC3_White_Ignore_Alpha", BC3_White_Ignore_Alpha_Block}, + { "BC3_Black_Ignore_Alpha", BC3_Black_Ignore_Alpha_Block}, + { "BC3_Red_Blue_Ignore_Alpha", BC3_Red_Blue_Ignore_Alpha_Block}, + { "BC3_Red_Green_Ignore_Alpha", BC3_Red_Green_Ignore_Alpha_Block}, + { "BC3_Green_Blue_Ignore_Alpha", BC3_Green_Blue_Ignore_Alpha_Block}, + { "BC3_Red_Half_Alpha", BC3_Red_Half_Alpha_Block}, + { "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block} +}; + +#endif \ No newline at end of file diff --git a/extern/CMP_Core/test/CMakeLists.txt b/extern/CMP_Core/test/CMakeLists.txt new file mode 100644 index 0000000..710e8fa --- /dev/null +++ b/extern/CMP_Core/test/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.5) +project(CMP_Core_Tests) + +add_executable(Tests TestsMain.cpp) +add_subdirectory(../../../Common/Lib/Ext/Catch2 + Common/Lib/Ext/Catch2/bin) +target_sources(Tests + PRIVATE + CompressonatorTests.cpp + CompressonatorTests.h + BlockConstants.h + ) +target_link_libraries(Tests Catch2::Catch2 CMP_Core) diff --git a/extern/CMP_Core/test/CompressonatorTests.cpp b/extern/CMP_Core/test/CompressonatorTests.cpp new file mode 100644 index 0000000..a75c268 --- /dev/null +++ b/extern/CMP_Core/test/CompressonatorTests.cpp @@ -0,0 +1,1143 @@ +#include +#include +#include "../source/CMP_Core.h" +// incudes all compressed 4x4 blocks +#include "BlockConstants.h" +#include "../../../Common/Lib/Ext/Catch2/catch.hpp" +#include "CompressonatorTests.h" + +static const int BC1_BLOCK_SIZE = 8; +static const int BC2_BLOCK_SIZE = 16; +static const int BC3_BLOCK_SIZE = 16; +static const int DECOMPRESSED_BLOCK_SIZE = 64; +static const int STRIDE_DECOMPRESSED = 16; + +static const std::map> colorValues{ + { "Red_Ignore_Alpha", { 0xff, 0x0, 0x0, 0xff }}, + { "Green_Ignore_Alpha" , { 0x0, 0xff, 0x0, 0xff }}, + { "Blue_Ignore_Alpha" , { 0x0, 0x0, 0xff, 0xff }}, + { "White_Ignore_Alpha" , { 0xff, 0xff, 0xff, 0xff }}, + { "Black_Ignore_Alpha" , { 0x0, 0x0, 0x0, 0xff }}, + { "Red_Blue_Ignore_Alpha" , { 0xff, 0x0, 0xff, 0xff }}, + { "Red_Green_Ignore_Alpha" , { 0xff, 0xff, 0x0, 0xff }}, + { "Green_Blue_Ignore_Alpha", { 0x0, 0xff, 0xff, 0xff }}, + + { "Red_Half_Alpha" , { 0xff, 0x0, 0x0, 0x7b }}, + { "Green_Half_Alpha" , { 0x0, 0xff, 0x0, 0x7b }}, + { "Blue_Half_Alpha" , { 0x0, 0x0, 0xff, 0x7b }}, + { "White_Half_Alpha" , { 0xff, 0xff, 0xff, 0x7b }}, + { "Black_Half_Alpha" , { 0x0, 0x0, 0x0, 0x7b }}, + { "Red_Blue_Half_Alpha" , { 0xff, 0x0, 0xff, 0x7b }}, + { "Red_Green_Half_Alpha", { 0xff, 0xff, 0x0, 0x7b }}, + { "Green_Blue_Half_Alpha" , { 0x0, 0xff, 0xff, 0x7b }}, + + { "Red_Full_Alpha" , { 0xff, 0x0, 0x0, 0x0 }}, + { "Green_Full_Alpha" , { 0x0, 0xff, 0x0, 0x0 }}, + { "Blue_Full_Alpha" , { 0x0, 0x0, 0xff, 0x0 }}, + { "White_Full_Alpha" , { 0xff, 0xff, 0xff, 0x0 }}, + { "Black_Full_Alpha" , { 0x0, 0x0, 0x0, 0x0 }}, + { "Red_Blue_Full_Alpha" , { 0xff, 0x0, 0xff, 0x0 }}, + { "Red_Green_Full_Alpha", { 0xff, 0xff, 0x0, 0x0 }}, + { "Green_Blue_Full_Alpha" , { 0x0, 0xff, 0xff, 0x0 }} +}; + +//block storage format: [R, G, B, W, Black, RB, RG, GB]. Alpha: 100%, 50%, 0% +enum ColorEnum { + Red, Green, Blue, White, Black, Red_Blue, Red_Green, Green_Blue +}; +enum AlphaEnum { + Ignore_Alpha, Half_Alpha, Full_Alpha +}; +enum CompEnum { + BC1, BC2, BC3 +}; + +std::string BlockKeyName(CompEnum compression, ColorEnum color, AlphaEnum alpha) +{ + std::string result = ""; + switch (compression) { + case BC1: result += "BC1"; break; + case BC2: result += "BC2"; break; + case BC3: result += "BC3"; break; + } + switch (color) { + case Red: result += "_Red_"; break; + case Green: result += "_Green_"; break; + case Blue: result += "_Blue_"; break; + case White: result += "_White_"; break; + case Black: result += "_Black_"; break; + case Red_Blue: result += "_Red_Blue_"; break; + case Red_Green: result += "_Red_Green_"; break; + case Green_Blue: result += "_Green_Blue_"; break; + } + switch (alpha) { + case Ignore_Alpha: result += "Ignore_Alpha"; break; + case Half_Alpha: result += "Half_Alpha"; break; + case Full_Alpha: result += "Full_Alpha"; break; + } + return result; +} + +void AssignExpectedColorsToBlocks() +{ + ColorEnum color = Red; + CompEnum comp = BC1; + AlphaEnum alpha = Ignore_Alpha; + for (int i = 0; i < blocks.size(); ++i) { + if (i % 24 == 0 && i > 0) { + comp = static_cast(comp + 1); + } + if (i % 8 == 0 && i > 0) { + alpha = static_cast((alpha + 1) % 3); + } + const std::string keyBlocks = BlockKeyName(comp, color, alpha); + std::string keyColor = keyBlocks; + // string keyColor is in format BCn_color_alpha. To use it as key to access colorValues, delete the BCn_ part. + keyColor.erase(0, 4); + ((blocks.find(keyBlocks))->second).color = ((colorValues.find(keyColor))->second).data(); + color = static_cast((color + 1) % 8); + } +} + +bool ColorMatches(unsigned char* buffer, const unsigned char* expectedColor, bool ignoreAlpha) +{ + unsigned char expectedColorBuffer[64]; + // handle formats that do not support alpha. + if (ignoreAlpha) { + // if alpha is ignored, BC should set all values to 0. Except the alpha value which can be 0 or 0xff only. + if (buffer[3] != 0 && buffer[3] != 255) { + return false; + } + unsigned char expColorWithoutAlpha[4] = { 0 }; + // Only when the alpha value is 0xff colors are stored. Otherwise the RGB colors were set to 0 by during compression. + if (expectedColor[3] == 0xff) { + memcpy(expColorWithoutAlpha, expectedColor, 4); + } + // Set alpha value to the alpha value in the first pixel of the decompressed buffer. + // The buffer contains only one color, so all pixels should have the same values. + expColorWithoutAlpha[3] = buffer[3]; + + for (int idx = 0; idx < DECOMPRESSED_BLOCK_SIZE / 4; ++idx) { + memcpy(expectedColorBuffer + (idx * 4), expColorWithoutAlpha, 4); + } + return memcmp(&expectedColorBuffer, buffer, DECOMPRESSED_BLOCK_SIZE) == 0; + } + + for (int idx = 0; idx < DECOMPRESSED_BLOCK_SIZE / 4; ++idx) { + memcpy(expectedColorBuffer + (idx * 4), expectedColor, 4); + } + return memcmp(&expectedColorBuffer, buffer, DECOMPRESSED_BLOCK_SIZE) == 0; +} + +//*************************************************************************************** + +TEST_CASE("BC1_Red_Ignore_Alpha", "[BC1_Red_Ignore_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Blue_Half_Alpha", "[BC1_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC1_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_White_Half_Alpha", "[BC1_White_Half_Alpha]") +{ + const auto block = blocks.find("BC1_White_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Black_Half_Alpha", "[BC1_Black_Half_Alpha]") +{ + const auto block = blocks.find("BC1_Black_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Red_Blue_Half_Alpha", "[BC1_Red_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Red_Green_Half_Alpha", "[BC1_Red_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Green_Blue_Half_Alpha", "[BC1_Green_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC1_Green_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Red_Full_Alpha", "[BC1_Red_Full_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Green_Full_Alpha", "[BC1_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC1_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Blue_Full_Alpha", "[BC1_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC1_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_White_Full_Alpha", "[BC1_White_Full_Alpha]") +{ + const auto block = blocks.find("BC1_White_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Green_Ignore_Alpha", "[BC1_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC1_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Black_Full_Alpha", "[BC1_Black_Full_Alpha]") +{ + const auto block = blocks.find("BC1_Black_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Red_Blue_Full_Alpha", "[BC1_Red_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Red_Green_Full_Alpha", "[BC1_Red_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Green_Blue_Full_Alpha", "[BC1_Green_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC1_Green_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Blue_Ignore_Alpha", "[BC1_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC1_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_White_Ignore_Alpha", "[BC1_White_Ignore_Alpha]") +{ + const auto block = blocks.find("BC1_White_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Black_Ignore_Alpha", "[BC1_Black_Ignore_Alpha]") +{ + const auto block = blocks.find("BC1_Black_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Red_Blue_Ignore_Alpha", "[BC1_Red_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Red_Green_Ignore_Alpha", "[BC1_Red_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Green_Blue_Ignore_Alpha", "[BC1_Green_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC1_Green_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Red_Half_Alpha", "[BC1_Red_Half_Alpha]") +{ + const auto block = blocks.find("BC1_Red_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC1_Green_Half_Alpha", "[BC1_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC1_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC1(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,true)); + unsigned char compBlock[8]; + unsigned char decompCompBlock [64]; + CompressBlockBC1(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC1(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,true)); +} +TEST_CASE("BC2_Red_Ignore_Alpha", "[BC2_Red_Ignore_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Blue_Half_Alpha", "[BC2_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC2_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_White_Half_Alpha", "[BC2_White_Half_Alpha]") +{ + const auto block = blocks.find("BC2_White_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Black_Half_Alpha", "[BC2_Black_Half_Alpha]") +{ + const auto block = blocks.find("BC2_Black_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Red_Blue_Half_Alpha", "[BC2_Red_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Red_Green_Half_Alpha", "[BC2_Red_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Green_Blue_Half_Alpha", "[BC2_Green_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC2_Green_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Red_Full_Alpha", "[BC2_Red_Full_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Green_Full_Alpha", "[BC2_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC2_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Blue_Full_Alpha", "[BC2_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC2_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_White_Full_Alpha", "[BC2_White_Full_Alpha]") +{ + const auto block = blocks.find("BC2_White_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Green_Ignore_Alpha", "[BC2_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC2_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Black_Full_Alpha", "[BC2_Black_Full_Alpha]") +{ + const auto block = blocks.find("BC2_Black_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Red_Blue_Full_Alpha", "[BC2_Red_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Red_Green_Full_Alpha", "[BC2_Red_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Green_Blue_Full_Alpha", "[BC2_Green_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC2_Green_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Blue_Ignore_Alpha", "[BC2_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC2_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_White_Ignore_Alpha", "[BC2_White_Ignore_Alpha]") +{ + const auto block = blocks.find("BC2_White_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Black_Ignore_Alpha", "[BC2_Black_Ignore_Alpha]") +{ + const auto block = blocks.find("BC2_Black_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Red_Blue_Ignore_Alpha", "[BC2_Red_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Red_Green_Ignore_Alpha", "[BC2_Red_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Green_Blue_Ignore_Alpha", "[BC2_Green_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC2_Green_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Red_Half_Alpha", "[BC2_Red_Half_Alpha]") +{ + const auto block = blocks.find("BC2_Red_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC2_Green_Half_Alpha", "[BC2_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC2_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC2(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC2(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC2(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Ignore_Alpha", "[BC3_Red_Ignore_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Blue_Half_Alpha", "[BC3_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC3_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_White_Half_Alpha", "[BC3_White_Half_Alpha]") +{ + const auto block = blocks.find("BC3_White_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Black_Half_Alpha", "[BC3_Black_Half_Alpha]") +{ + const auto block = blocks.find("BC3_Black_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Blue_Half_Alpha", "[BC3_Red_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Green_Half_Alpha", "[BC3_Red_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Green_Blue_Half_Alpha", "[BC3_Green_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC3_Green_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Full_Alpha", "[BC3_Red_Full_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Green_Full_Alpha", "[BC3_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC3_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Blue_Full_Alpha", "[BC3_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC3_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_White_Full_Alpha", "[BC3_White_Full_Alpha]") +{ + const auto block = blocks.find("BC3_White_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Green_Ignore_Alpha", "[BC3_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC3_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Black_Full_Alpha", "[BC3_Black_Full_Alpha]") +{ + const auto block = blocks.find("BC3_Black_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Blue_Full_Alpha", "[BC3_Red_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Green_Full_Alpha", "[BC3_Red_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Green_Blue_Full_Alpha", "[BC3_Green_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC3_Green_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Blue_Ignore_Alpha", "[BC3_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC3_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_White_Ignore_Alpha", "[BC3_White_Ignore_Alpha]") +{ + const auto block = blocks.find("BC3_White_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Black_Ignore_Alpha", "[BC3_Black_Ignore_Alpha]") +{ + const auto block = blocks.find("BC3_Black_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Blue_Ignore_Alpha", "[BC3_Red_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Green_Ignore_Alpha", "[BC3_Red_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Green_Blue_Ignore_Alpha", "[BC3_Green_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC3_Green_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Red_Half_Alpha", "[BC3_Red_Half_Alpha]") +{ + const auto block = blocks.find("BC3_Red_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} +TEST_CASE("BC3_Green_Half_Alpha", "[BC3_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC3_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock [64]; + DecompressBlockBC3(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor,false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock [64]; + CompressBlockBC3(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC3(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor,false)); +} + +//*************************************************************************************** \ No newline at end of file diff --git a/extern/CMP_Core/test/CompressonatorTests.h b/extern/CMP_Core/test/CompressonatorTests.h new file mode 100644 index 0000000..f070a4f --- /dev/null +++ b/extern/CMP_Core/test/CompressonatorTests.h @@ -0,0 +1,6 @@ +#ifndef COMPRESSONATOR_TESTS_H +#define COMPRESSONATOR_TESTS_H + +void AssignExpectedColorsToBlocks(); + +#endif \ No newline at end of file diff --git a/extern/CMP_Core/test/TestsMain.cpp b/extern/CMP_Core/test/TestsMain.cpp new file mode 100644 index 0000000..99f12a5 --- /dev/null +++ b/extern/CMP_Core/test/TestsMain.cpp @@ -0,0 +1,10 @@ +#define CATCH_CONFIG_RUNNER +#include "../../../Common/Lib/Ext/Catch2/catch.hpp" +#include "CompressonatorTests.h" + +int main(int argc, char* argv[]) { + AssignExpectedColorsToBlocks(); + int result = Catch::Session().run(argc, argv); + + return result; +} \ No newline at end of file diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt index 7e41986..4e812f4 100644 --- a/extern/CMakeLists.txt +++ b/extern/CMakeLists.txt @@ -9,5 +9,9 @@ ADD_SUBDIRECTORY(EtcLib) ADD_SUBDIRECTORY(rg_etc1_v104) #ADD_SUBDIRECTORY(etcpack) -ADD_SUBDIRECTORY(butteraugli) +#ADD_SUBDIRECTORY(butteraugli) + +ADD_SUBDIRECTORY(libsquish-1.15) + +ADD_SUBDIRECTORY(CMP_Core) diff --git a/extern/libsquish-1.15/CMakeLists.txt b/extern/libsquish-1.15/CMakeLists.txt new file mode 100644 index 0000000..a36e574 --- /dev/null +++ b/extern/libsquish-1.15/CMakeLists.txt @@ -0,0 +1,117 @@ +# cmake build file for squish +# by Stefan Roettger (snroettg@gmail.com) +# updated by Simon Brown (si@sjbrown.co.uk) + +# features: +# uses -fopenmp when available +# use BUILD_SQUISH_WITH_OPENMP to override +# Xcode: builds universal binaries, uses SSE2 on i386 and Altivec on ppc +# Unix and VS: SSE2 support is enabled by default +# use BUILD_SQUISH_WITH_SSE2 and BUILD_SQUISH_WITH_ALTIVEC to override + +PROJECT(squish) + +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.3) + +OPTION(BUILD_SQUISH_WITH_OPENMP "Build with OpenMP." ON) + +OPTION(BUILD_SQUISH_WITH_SSE2 "Build with SSE2." ON) +OPTION(BUILD_SQUISH_WITH_ALTIVEC "Build with Altivec." OFF) + +OPTION(BUILD_SHARED_LIBS "Build shared libraries." OFF) + +OPTION(BUILD_SQUISH_EXTRA "Build extra source code." OFF) + +IF (BUILD_SQUISH_WITH_OPENMP) + FIND_PACKAGE(OpenMP) + IF (OPENMP_FOUND) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + ADD_DEFINITIONS(-DSQUISH_USE_OPENMP) + ENDIF() +ENDIF() + +IF (CMAKE_GENERATOR STREQUAL "Xcode") + SET(CMAKE_OSX_ARCHITECTURES "i386;ppc") +ELSE (CMAKE_GENERATOR STREQUAL "Xcode") + IF (BUILD_SQUISH_WITH_SSE2 AND NOT WIN32) + ADD_DEFINITIONS(-DSQUISH_USE_SSE=2 -msse2) + ENDIF (BUILD_SQUISH_WITH_SSE2 AND NOT WIN32) + IF (BUILD_SQUISH_WITH_ALTIVEC AND NOT WIN32) + ADD_DEFINITIONS(-DSQUISH_USE_ALTIVEC=1 -maltivec) + ENDIF (BUILD_SQUISH_WITH_ALTIVEC AND NOT WIN32) +ENDIF (CMAKE_GENERATOR STREQUAL "Xcode") + +SET(SQUISH_HDRS + squish.h + ) + +SET(SQUISH_SRCS + alpha.cpp + alpha.h + clusterfit.cpp + clusterfit.h + colourblock.cpp + colourblock.h + colourfit.cpp + colourfit.h + colourset.cpp + colourset.h + maths.cpp + maths.h + rangefit.cpp + rangefit.h + simd.h + simd_float.h + simd_sse.h + simd_ve.h + singlecolourfit.cpp + singlecolourfit.h + singlecolourlookup.inl + squish.cpp + ) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +ADD_LIBRARY(squish ${SQUISH_SRCS} ${SQUISH_HDRS}) + +SET_TARGET_PROPERTIES( + squish PROPERTIES + PUBLIC_HEADER "${SQUISH_HDRS}" + VERSION 0.0 + SOVERSION 0.0 + DEBUG_POSTFIX "d" + XCODE_ATTRIBUTE_GCC_PREPROCESSOR_DEFINITIONS "$(SQUISH_CPP_$(CURRENT_ARCH))" + XCODE_ATTRIBUTE_OTHER_CFLAGS "$(SQUISH_CFLAGS_$(CURRENT_ARCH))" + XCODE_ATTRIBUTE_SQUISH_CPP_i386 "SQUISH_USE_SSE=2" + XCODE_ATTRIBUTE_SQUISH_CFLAGS_i386 "" + XCODE_ATTRIBUTE_SQUISH_CPP_ppc "SQUISH_USE_ALTIVEC=1" + XCODE_ATTRIBUTE_SQUISH_CFLAGS_ppc "-maltivec" + ) + +IF (BUILD_SQUISH_EXTRA) + SET(SQUISHTEST_SRCS extra/squishtest.cpp) + + ADD_EXECUTABLE(squishtest ${SQUISHTEST_SRCS}) + SET_TARGET_PROPERTIES(squishtest PROPERTIES DEBUG_POSTFIX "d") + TARGET_LINK_LIBRARIES(squishtest squish) + + SET(SQUISHPNG_SRCS extra/squishpng.cpp) + + FIND_PACKAGE(PNG) + + IF (PNG_FOUND) + SET(CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES) + INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR}) + ADD_EXECUTABLE(squishpng ${SQUISHPNG_SRCS}) + SET_TARGET_PROPERTIES(squishpng PROPERTIES DEBUG_POSTFIX "d") + TARGET_LINK_LIBRARIES(squishpng squish ${PNG_LIBRARIES}) + ENDIF (PNG_FOUND) +ENDIF (BUILD_SQUISH_EXTRA) + +INSTALL( + TARGETS squish + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + PUBLIC_HEADER DESTINATION include + ) diff --git a/extern/libsquish-1.15/CMakeModules/FindlibSquish.cmake b/extern/libsquish-1.15/CMakeModules/FindlibSquish.cmake new file mode 100644 index 0000000..a8d7cfe --- /dev/null +++ b/extern/libsquish-1.15/CMakeModules/FindlibSquish.cmake @@ -0,0 +1,14 @@ +# Defines +# LIBSQUISH_FOUND +# LIBSQUISH_INCLUDE_DIR +# LIBSQUISH_LIBRARIES + +FIND_PATH(LIBSQUISH_INCLUDE_DIR squish.h PATHS . squish .. ../squish DOC "Directory containing libSquish headers") +FIND_LIBRARY(LIBSQUISH_LIBRARY NAMES squish libsquish PATHS . squish .. ../squish PATH_SUFFIXES lib lib64 release minsizerel relwithdebinfo DOC "Path to libSquish library") + +SET(LIBSQUISH_LIBRARIES ${LIBSQUISH_LIBRARY}) + +IF (LIBSQUISH_LIBRARY AND LIBSQUISH_INCLUDE_DIR) + SET(LIBSQUISH_FOUND TRUE) + MESSAGE(STATUS "Found libSquish: ${LIBSQUISH_LIBRARY}") +ENDIF (LIBSQUISH_LIBRARY AND LIBSQUISH_INCLUDE_DIR) diff --git a/extern/libsquish-1.15/ChangeLog.txt b/extern/libsquish-1.15/ChangeLog.txt new file mode 100644 index 0000000..f6c8c6d --- /dev/null +++ b/extern/libsquish-1.15/ChangeLog.txt @@ -0,0 +1,66 @@ +1.15 +* parallel compression using openmp with cmake (Marian Krivos / Stefan Roettger) +* parallel decompression using openmp with cmake (Stefan Roettger) + +1.14 +* backport BGRA support +* backport BC4 and BC5 support +* backport BlockMSE support + +1.11-1.13 +* added support for CMake and QMake (Stefan Roettger) +* misc. minor changes on the build system (Stefan Roettger) +* added svg icon (Stefan Roettger) + +1.10 +* Iterative cluster fit is now considered to be a new compression mode +* The core cluster fit is now 4x faster using contributions by Ignacio +Castano from NVIDIA +* The single colour lookup table has been halved by exploiting symmetry + +1.9 +* Added contributed SSE1 truncate implementation +* Changed use of SQUISH_USE_SSE to be 1 for SSE and 2 for SSE2 instructions +* Cluster fit is now iterative to further reduce image error + +1.8 +* Switched from using floor to trunc for much better SSE performance (again) +* Xcode build now expects libpng in /usr/local for extra/squishpng + +1.7 +* Fixed floating-point equality issue in clusterfit sort (x86 affected only) +* Implemented proper SSE(2) floor function for 50% speedup on SSE builds +* The range fit implementation now uses the correct colour metric + +1.6 +* Fixed bug in CompressImage where masked pixels were not skipped over +* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels +* Fixed major DXT1 bug that can generate unexpected transparent pixels + +1.5 +* Added CompressMasked function to handle incomplete DXT blocks more cleanly +* Added kWeightColourByAlpha flag for better quality images when alpha blending + +1.4 +* Fixed stack overflow in rangefit + +1.3 +* Worked around SSE floor implementation bug, proper fix needed! +* This release has visual studio and makefile builds that work + +1.2 +* Added provably optimal single colour compressor +* Added extra/squishgen.cpp that generates single colour lookup tables + +1.1 +* Fixed a DXT1 colour output bug +* Changed argument order for Decompress function to match Compress +* Added GetStorageRequirements function +* Added CompressImage function +* Added DecompressImage function +* Moved squishtool.cpp to extra/squishpng.cpp +* Added extra/squishtest.cpp + +1.0 +* Initial release + diff --git a/extern/libsquish-1.15/Doxyfile b/extern/libsquish-1.15/Doxyfile new file mode 100644 index 0000000..3c54d29 --- /dev/null +++ b/extern/libsquish-1.15/Doxyfile @@ -0,0 +1,214 @@ +# Doxyfile 1.4.6 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +PROJECT_NAME = squish +PROJECT_NUMBER = 1.14 +OUTPUT_DIRECTORY = docs +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +BUILTIN_STL_SUPPORT = NO +DISTRIBUTE_GROUP_DOC = NO +SUBGROUPING = YES +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = YES +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = NO +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +FILE_VERSION_FILTER = +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = YES +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = squish.h +FILE_PATTERNS = +RECURSIVE = NO +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = YES +REFERENCES_RELATION = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = NO +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +TREEVIEW_WIDTH = 250 +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = NO +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4wide +EXTRA_PACKAGES = +LATEX_HEADER = +PDF_HYPERLINKS = NO +USE_PDFLATEX = NO +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = YES +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = NO +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +DOTFILE_DIRS = +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- +SEARCHENGINE = NO diff --git a/extern/libsquish-1.15/LICENSE.txt b/extern/libsquish-1.15/LICENSE.txt new file mode 100644 index 0000000..ed1c78d --- /dev/null +++ b/extern/libsquish-1.15/LICENSE.txt @@ -0,0 +1,20 @@ + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/extern/libsquish-1.15/Makefile b/extern/libsquish-1.15/Makefile new file mode 100644 index 0000000..fd7d6c8 --- /dev/null +++ b/extern/libsquish-1.15/Makefile @@ -0,0 +1,65 @@ +include config + +VER = 1.15 +SOVER = 0 + +SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp + +HDR = alpha.h clusterfit.h colourblock.h colourfit.h colourset.h maths.h rangefit.h singlecolourfit.h squish.h +HDR += config.h simd.h simd_float.h simd_sse.h simd_ve.h singlecolourlookup.inl + +OBJ = $(SRC:%.cpp=%.o) + +SOLIB = libsquish.so.$(SOVER) +LIB = $(SOLIB).0 +CPPFLAGS += -fPIC +LIBA = libsquish.a + +.PHONY: all install uninstall docs tgz clean + +all: $(LIB) $(LIBA) docs libsquish.pc + +install: $(LIB) $(LIBA) libsquish.pc + $(INSTALL_DIRECTORY) $(INSTALL_DIR)/include $(INSTALL_DIR)/$(LIB_PATH) + $(INSTALL_FILE) squish.h $(INSTALL_DIR)/include + $(INSTALL_FILE) $(LIBA) $(INSTALL_DIR)/$(LIB_PATH) +ifneq ($(USE_SHARED),0) + $(INSTALL_FILE) $(LIB) $(INSTALL_DIR)/$(LIB_PATH) + ln -s $(LIB) $(INSTALL_DIR)/$(LIB_PATH)/$(SOLIB) + ln -s $(LIB) $(INSTALL_DIR)/$(LIB_PATH)/libsquish.so + $(INSTALL_DIRECTORY) $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig + $(INSTALL_FILE) libsquish.pc $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig +endif + +uninstall: + $(RM) $(INSTALL_DIR)/include/squish.h + $(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(LIBA) + -$(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(LIB) + -$(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(SOLIB) + -$(RM) $(INSTALL_DIR)/$(LIB_PATH)/libsquish.so + -$(RM) $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig/libsquish.pc + +$(LIB): $(OBJ) +ifneq ($(USE_SHARED),0) + $(CXX) $(LDFLAGS) -shared -Wl,-soname,$(SOLIB) -o $@ $(OBJ) +endif + +$(LIBA): $(OBJ) + $(AR) cr $@ $? + @ranlib $@ + +docs: $(SRC) $(HDR) + @if [ -x "`command -v doxygen`" ]; then doxygen; fi + +libsquish.pc: libsquish.pc.in + @sed 's|@PREFIX@|$(PREFIX)|;s|@LIB_PATH@|$(LIB_PATH)|' $@.in > $@ + +tgz: clean + tar zcf libsquish-$(VER).tgz $(SRC) $(HDR) Makefile config CMakeLists.txt CMakeModules libSquish.* README.txt LICENSE.txt ChangeLog.txt Doxyfile libsquish.pc.in extra --exclude \*.svn\* + +%.o: %.cpp + $(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o $@ -c $< + +clean: + $(RM) $(OBJ) $(LIB) $(LIBA) libsquish.pc + @-$(RM) -rf docs diff --git a/extern/libsquish-1.15/README.txt b/extern/libsquish-1.15/README.txt new file mode 100644 index 0000000..60380ee --- /dev/null +++ b/extern/libsquish-1.15/README.txt @@ -0,0 +1,18 @@ +LICENSE +------- + +The squish library is distributed under the terms and conditions of the MIT +license. This license is specified at the top of each source file and must be +preserved in its entirety. + +BUILDING AND INSTALLING THE LIBRARY +----------------------------------- + +The preferred way to install the library on Unix/Mac (and Windows) is via cmake: + cmake . && make && sudo make install + +REPORTING BUGS OR FEATURE REQUESTS +---------------------------------- + +Feedback can be sent to Simon Brown (the developer) at si@sjbrown.co.uk +Feedback can also be sent to Stefan Roettger (the maintainer) at snroettg@gmail.com diff --git a/extern/libsquish-1.15/alpha.cpp b/extern/libsquish-1.15/alpha.cpp new file mode 100644 index 0000000..7039c1a --- /dev/null +++ b/extern/libsquish-1.15/alpha.cpp @@ -0,0 +1,350 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include "alpha.h" + +#include +#include + +namespace squish { + +static int FloatToInt( float a, int limit ) +{ + // use ANSI round-to-zero behaviour to get round-to-nearest + int i = ( int )( a + 0.5f ); + + // clamp to the limit + if( i < 0 ) + i = 0; + else if( i > limit ) + i = limit; + + // done + return i; +} + +void CompressAlphaDxt3( u8 const* rgba, int mask, void* block ) +{ + u8* bytes = reinterpret_cast< u8* >( block ); + + // quantise and pack the alpha values pairwise + for( int i = 0; i < 8; ++i ) + { + // quantise down to 4 bits + float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f ); + float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f ); + int quant1 = FloatToInt( alpha1, 15 ); + int quant2 = FloatToInt( alpha2, 15 ); + + // set alpha to zero where masked + int bit1 = 1 << ( 2*i ); + int bit2 = 1 << ( 2*i + 1 ); + if( ( mask & bit1 ) == 0 ) + quant1 = 0; + if( ( mask & bit2 ) == 0 ) + quant2 = 0; + + // pack into the byte + bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) ); + } +} + +void DecompressAlphaDxt3( u8* rgba, void const* block ) +{ + u8 const* bytes = reinterpret_cast< u8 const* >( block ); + + // unpack the alpha values pairwise + for( int i = 0; i < 8; ++i ) + { + // quantise down to 4 bits + u8 quant = bytes[i]; + + // unpack the values + u8 lo = quant & 0x0f; + u8 hi = quant & 0xf0; + + // convert back up to bytes + rgba[8*i + 3] = lo | ( lo << 4 ); + rgba[8*i + 7] = hi | ( hi >> 4 ); + } +} + +static void FixRange( int& min, int& max, int steps ) +{ + if( max - min < steps ) + max = std::min( min + steps, 255 ); + if( max - min < steps ) + min = std::max( 0, max - steps ); +} + +static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices ) +{ + // fit each alpha value to the codebook + int err = 0; + for( int i = 0; i < 16; ++i ) + { + // check this pixel is valid + int bit = 1 << i; + if( ( mask & bit ) == 0 ) + { + // use the first code + indices[i] = 0; + continue; + } + + // find the least error and corresponding index + int value = rgba[4*i + 3]; + int least = INT_MAX; + int index = 0; + for( int j = 0; j < 8; ++j ) + { + // get the squared error from this code + int dist = ( int )value - ( int )codes[j]; + dist *= dist; + + // compare with the best so far + if( dist < least ) + { + least = dist; + index = j; + } + } + + // save this index and accumulate the error + indices[i] = ( u8 )index; + err += least; + } + + // return the total error + return err; +} + +static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block ) +{ + u8* bytes = reinterpret_cast< u8* >( block ); + + // write the first two bytes + bytes[0] = ( u8 )alpha0; + bytes[1] = ( u8 )alpha1; + + // pack the indices with 3 bits each + u8* dest = bytes + 2; + u8 const* src = indices; + for( int i = 0; i < 2; ++i ) + { + // pack 8 3-bit values + int value = 0; + for( int j = 0; j < 8; ++j ) + { + int index = *src++; + value |= ( index << 3*j ); + } + + // store in 3 bytes + for( int j = 0; j < 3; ++j ) + { + int byte = ( value >> 8*j ) & 0xff; + *dest++ = ( u8 )byte; + } + } +} + +static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block ) +{ + // check the relative values of the endpoints + if( alpha0 > alpha1 ) + { + // swap the indices + u8 swapped[16]; + for( int i = 0; i < 16; ++i ) + { + u8 index = indices[i]; + if( index == 0 ) + swapped[i] = 1; + else if( index == 1 ) + swapped[i] = 0; + else if( index <= 5 ) + swapped[i] = 7 - index; + else + swapped[i] = index; + } + + // write the block + WriteAlphaBlock( alpha1, alpha0, swapped, block ); + } + else + { + // write the block + WriteAlphaBlock( alpha0, alpha1, indices, block ); + } +} + +static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block ) +{ + // check the relative values of the endpoints + if( alpha0 < alpha1 ) + { + // swap the indices + u8 swapped[16]; + for( int i = 0; i < 16; ++i ) + { + u8 index = indices[i]; + if( index == 0 ) + swapped[i] = 1; + else if( index == 1 ) + swapped[i] = 0; + else + swapped[i] = 9 - index; + } + + // write the block + WriteAlphaBlock( alpha1, alpha0, swapped, block ); + } + else + { + // write the block + WriteAlphaBlock( alpha0, alpha1, indices, block ); + } +} + +void CompressAlphaDxt5( u8 const* rgba, int mask, void* block ) +{ + // get the range for 5-alpha and 7-alpha interpolation + int min5 = 255; + int max5 = 0; + int min7 = 255; + int max7 = 0; + for( int i = 0; i < 16; ++i ) + { + // check this pixel is valid + int bit = 1 << i; + if( ( mask & bit ) == 0 ) + continue; + + // incorporate into the min/max + int value = rgba[4*i + 3]; + if( value < min7 ) + min7 = value; + if( value > max7 ) + max7 = value; + if( value != 0 && value < min5 ) + min5 = value; + if( value != 255 && value > max5 ) + max5 = value; + } + + // handle the case that no valid range was found + if( min5 > max5 ) + min5 = max5; + if( min7 > max7 ) + min7 = max7; + + // fix the range to be the minimum in each case + FixRange( min5, max5, 5 ); + FixRange( min7, max7, 7 ); + + // set up the 5-alpha code book + u8 codes5[8]; + codes5[0] = ( u8 )min5; + codes5[1] = ( u8 )max5; + for( int i = 1; i < 5; ++i ) + codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 ); + codes5[6] = 0; + codes5[7] = 255; + + // set up the 7-alpha code book + u8 codes7[8]; + codes7[0] = ( u8 )min7; + codes7[1] = ( u8 )max7; + for( int i = 1; i < 7; ++i ) + codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 ); + + // fit the data to both code books + u8 indices5[16]; + u8 indices7[16]; + int err5 = FitCodes( rgba, mask, codes5, indices5 ); + int err7 = FitCodes( rgba, mask, codes7, indices7 ); + + // save the block with least error + if( err5 <= err7 ) + WriteAlphaBlock5( min5, max5, indices5, block ); + else + WriteAlphaBlock7( min7, max7, indices7, block ); +} + +void DecompressAlphaDxt5( u8* rgba, void const* block ) +{ + // get the two alpha values + u8 const* bytes = reinterpret_cast< u8 const* >( block ); + int alpha0 = bytes[0]; + int alpha1 = bytes[1]; + + // compare the values to build the codebook + u8 codes[8]; + codes[0] = ( u8 )alpha0; + codes[1] = ( u8 )alpha1; + if( alpha0 <= alpha1 ) + { + // use 5-alpha codebook + for( int i = 1; i < 5; ++i ) + codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 ); + codes[6] = 0; + codes[7] = 255; + } + else + { + // use 7-alpha codebook + for( int i = 1; i < 7; ++i ) + codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 ); + } + + // decode the indices + u8 indices[16]; + u8 const* src = bytes + 2; + u8* dest = indices; + for( int i = 0; i < 2; ++i ) + { + // grab 3 bytes + int value = 0; + for( int j = 0; j < 3; ++j ) + { + int byte = *src++; + value |= ( byte << 8*j ); + } + + // unpack 8 3-bit values from it + for( int j = 0; j < 8; ++j ) + { + int index = ( value >> 3*j ) & 0x7; + *dest++ = ( u8 )index; + } + } + + // write out the indexed codebook values + for( int i = 0; i < 16; ++i ) + rgba[4*i + 3] = codes[indices[i]]; +} + +} // namespace squish diff --git a/extern/libsquish-1.15/alpha.h b/extern/libsquish-1.15/alpha.h new file mode 100644 index 0000000..a1fffd4 --- /dev/null +++ b/extern/libsquish-1.15/alpha.h @@ -0,0 +1,41 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_ALPHA_H +#define SQUISH_ALPHA_H + +#include "squish.h" + +namespace squish { + +void CompressAlphaDxt3( u8 const* rgba, int mask, void* block ); +void CompressAlphaDxt5( u8 const* rgba, int mask, void* block ); + +void DecompressAlphaDxt3( u8* rgba, void const* block ); +void DecompressAlphaDxt5( u8* rgba, void const* block ); + +} // namespace squish + +#endif // ndef SQUISH_ALPHA_H diff --git a/extern/libsquish-1.15/clusterfit.cpp b/extern/libsquish-1.15/clusterfit.cpp new file mode 100644 index 0000000..1610ecb --- /dev/null +++ b/extern/libsquish-1.15/clusterfit.cpp @@ -0,0 +1,392 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + Copyright (c) 2007 Ignacio Castano icastano@nvidia.com + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include "clusterfit.h" +#include "colourset.h" +#include "colourblock.h" +#include + +namespace squish { + +ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) + : ColourFit( colours, flags ) +{ + // set the iteration count + m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1; + + // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f) + if( metric ) + m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f ); + else + m_metric = VEC4_CONST( 1.0f ); + + // initialise the best error + m_besterror = VEC4_CONST( FLT_MAX ); + + // cache some values + int const count = m_colours->GetCount(); + Vec3 const* values = m_colours->GetPoints(); + + // get the covariance matrix + Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() ); + + // compute the principle component + m_principle = ComputePrincipleComponent( covariance ); +} + +bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration ) +{ + // cache some values + int const count = m_colours->GetCount(); + Vec3 const* values = m_colours->GetPoints(); + + // build the list of dot products + float dps[16]; + u8* order = ( u8* )m_order + 16*iteration; + for( int i = 0; i < count; ++i ) + { + dps[i] = Dot( values[i], axis ); + order[i] = ( u8 )i; + } + + // stable sort using them + for( int i = 0; i < count; ++i ) + { + for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j ) + { + std::swap( dps[j], dps[j - 1] ); + std::swap( order[j], order[j - 1] ); + } + } + + // check this ordering is unique + for( int it = 0; it < iteration; ++it ) + { + u8 const* prev = ( u8* )m_order + 16*it; + bool same = true; + for( int i = 0; i < count; ++i ) + { + if( order[i] != prev[i] ) + { + same = false; + break; + } + } + if( same ) + return false; + } + + // copy the ordering and weight all the points + Vec3 const* unweighted = m_colours->GetPoints(); + float const* weights = m_colours->GetWeights(); + m_xsum_wsum = VEC4_CONST( 0.0f ); + for( int i = 0; i < count; ++i ) + { + int j = order[i]; + Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f ); + Vec4 w( weights[j] ); + Vec4 x = p*w; + m_points_weights[i] = x; + m_xsum_wsum += x; + } + return true; +} + +void ClusterFit::Compress3( void* block ) +{ + // declare variables + int const count = m_colours->GetCount(); + Vec4 const two = VEC4_CONST( 2.0 ); + Vec4 const one = VEC4_CONST( 1.0f ); + Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f ); + Vec4 const zero = VEC4_CONST( 0.0f ); + Vec4 const half = VEC4_CONST( 0.5f ); + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); + + // prepare an ordering using the principle axis + ConstructOrdering( m_principle, 0 ); + + // check all possible clusters and iterate on the total order + Vec4 beststart = VEC4_CONST( 0.0f ); + Vec4 bestend = VEC4_CONST( 0.0f ); + Vec4 besterror = m_besterror; + u8 bestindices[16]; + int bestiteration = 0; + int besti = 0, bestj = 0; + + // loop over iterations (we avoid the case that all points in first or last cluster) + for( int iterationIndex = 0;; ) + { + // first cluster [0,i) is at the start + Vec4 part0 = VEC4_CONST( 0.0f ); + for( int i = 0; i < count; ++i ) + { + // second cluster [i,j) is half along + Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f ); + int jmin = ( i == 0 ) ? 1 : i; + for( int j = jmin;; ) + { + // last cluster [j,count) is at the end + Vec4 part2 = m_xsum_wsum - part1 - part0; + + // compute least squares terms directly + Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 ); + Vec4 alpha2_sum = alphax_sum.SplatW(); + + Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 ); + Vec4 beta2_sum = betax_sum.SplatW(); + + Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW(); + + // compute the least-squares optimal points + Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) ); + Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor; + Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor; + + // clamp to the grid + a = Min( one, Max( zero, a ) ); + b = Min( one, Max( zero, b ) ); + a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp; + b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp; + + // compute the error (we skip the constant xxsum) + Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); + Vec4 e4 = MultiplyAdd( two, e3, e1 ); + + // apply the metric to the error term + Vec4 e5 = e4*m_metric; + Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); + + // keep the solution if it wins + if( CompareAnyLessThan( error, besterror ) ) + { + beststart = a; + bestend = b; + besti = i; + bestj = j; + besterror = error; + bestiteration = iterationIndex; + } + + // advance + if( j == count ) + break; + part1 += m_points_weights[j]; + ++j; + } + + // advance + part0 += m_points_weights[i]; + } + + // stop if we didn't improve in this iteration + if( bestiteration != iterationIndex ) + break; + + // advance if possible + ++iterationIndex; + if( iterationIndex == m_iterationCount ) + break; + + // stop if a new iteration is an ordering that has already been tried + Vec3 axis = ( bestend - beststart ).GetVec3(); + if( !ConstructOrdering( axis, iterationIndex ) ) + break; + } + + // save the block if necessary + if( CompareAnyLessThan( besterror, m_besterror ) ) + { + // remap the indices + u8 const* order = ( u8* )m_order + 16*bestiteration; + + u8 unordered[16]; + for( int m = 0; m < besti; ++m ) + unordered[order[m]] = 0; + for( int m = besti; m < bestj; ++m ) + unordered[order[m]] = 2; + for( int m = bestj; m < count; ++m ) + unordered[order[m]] = 1; + + m_colours->RemapIndices( unordered, bestindices ); + + // save the block + WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); + + // save the error + m_besterror = besterror; + } +} + +void ClusterFit::Compress4( void* block ) +{ + // declare variables + int const count = m_colours->GetCount(); + Vec4 const two = VEC4_CONST( 2.0f ); + Vec4 const one = VEC4_CONST( 1.0f ); + Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); + Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); + Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f ); + Vec4 const zero = VEC4_CONST( 0.0f ); + Vec4 const half = VEC4_CONST( 0.5f ); + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); + + // prepare an ordering using the principle axis + ConstructOrdering( m_principle, 0 ); + + // check all possible clusters and iterate on the total order + Vec4 beststart = VEC4_CONST( 0.0f ); + Vec4 bestend = VEC4_CONST( 0.0f ); + Vec4 besterror = m_besterror; + u8 bestindices[16]; + int bestiteration = 0; + int besti = 0, bestj = 0, bestk = 0; + + // loop over iterations (we avoid the case that all points in first or last cluster) + for( int iterationIndex = 0;; ) + { + // first cluster [0,i) is at the start + Vec4 part0 = VEC4_CONST( 0.0f ); + for( int i = 0; i < count; ++i ) + { + // second cluster [i,j) is one third along + Vec4 part1 = VEC4_CONST( 0.0f ); + for( int j = i;; ) + { + // third cluster [j,k) is two thirds along + Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f ); + int kmin = ( j == 0 ) ? 1 : j; + for( int k = kmin;; ) + { + // last cluster [k,count) is at the end + Vec4 part3 = m_xsum_wsum - part2 - part1 - part0; + + // compute least squares terms directly + Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) ); + Vec4 const alpha2_sum = alphax_sum.SplatW(); + + Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) ); + Vec4 const beta2_sum = betax_sum.SplatW(); + + Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW(); + + // compute the least-squares optimal points + Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) ); + Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor; + Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor; + + // clamp to the grid + a = Min( one, Max( zero, a ) ); + b = Min( one, Max( zero, b ) ); + a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp; + b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp; + + // compute the error (we skip the constant xxsum) + Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); + Vec4 e4 = MultiplyAdd( two, e3, e1 ); + + // apply the metric to the error term + Vec4 e5 = e4*m_metric; + Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); + + // keep the solution if it wins + if( CompareAnyLessThan( error, besterror ) ) + { + beststart = a; + bestend = b; + besterror = error; + besti = i; + bestj = j; + bestk = k; + bestiteration = iterationIndex; + } + + // advance + if( k == count ) + break; + part2 += m_points_weights[k]; + ++k; + } + + // advance + if( j == count ) + break; + part1 += m_points_weights[j]; + ++j; + } + + // advance + part0 += m_points_weights[i]; + } + + // stop if we didn't improve in this iteration + if( bestiteration != iterationIndex ) + break; + + // advance if possible + ++iterationIndex; + if( iterationIndex == m_iterationCount ) + break; + + // stop if a new iteration is an ordering that has already been tried + Vec3 axis = ( bestend - beststart ).GetVec3(); + if( !ConstructOrdering( axis, iterationIndex ) ) + break; + } + + // save the block if necessary + if( CompareAnyLessThan( besterror, m_besterror ) ) + { + // remap the indices + u8 const* order = ( u8* )m_order + 16*bestiteration; + + u8 unordered[16]; + for( int m = 0; m < besti; ++m ) + unordered[order[m]] = 0; + for( int m = besti; m < bestj; ++m ) + unordered[order[m]] = 2; + for( int m = bestj; m < bestk; ++m ) + unordered[order[m]] = 3; + for( int m = bestk; m < count; ++m ) + unordered[order[m]] = 1; + + m_colours->RemapIndices( unordered, bestindices ); + + // save the block + WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); + + // save the error + m_besterror = besterror; + } +} + +} // namespace squish diff --git a/extern/libsquish-1.15/clusterfit.h b/extern/libsquish-1.15/clusterfit.h new file mode 100644 index 0000000..999396b --- /dev/null +++ b/extern/libsquish-1.15/clusterfit.h @@ -0,0 +1,61 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + Copyright (c) 2007 Ignacio Castano icastano@nvidia.com + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_CLUSTERFIT_H +#define SQUISH_CLUSTERFIT_H + +#include "squish.h" +#include "maths.h" +#include "simd.h" +#include "colourfit.h" + +namespace squish { + +class ClusterFit : public ColourFit +{ +public: + ClusterFit( ColourSet const* colours, int flags, float* metric ); + +private: + bool ConstructOrdering( Vec3 const& axis, int iteration ); + + virtual void Compress3( void* block ); + virtual void Compress4( void* block ); + + enum { kMaxIterations = 8 }; + + int m_iterationCount; + Vec3 m_principle; + u8 m_order[16*kMaxIterations]; + Vec4 m_points_weights[16]; + Vec4 m_xsum_wsum; + Vec4 m_metric; + Vec4 m_besterror; +}; + +} // namespace squish + +#endif // ndef SQUISH_CLUSTERFIT_H diff --git a/extern/libsquish-1.15/colourblock.cpp b/extern/libsquish-1.15/colourblock.cpp new file mode 100644 index 0000000..af8b980 --- /dev/null +++ b/extern/libsquish-1.15/colourblock.cpp @@ -0,0 +1,214 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include "colourblock.h" + +namespace squish { + +static int FloatToInt( float a, int limit ) +{ + // use ANSI round-to-zero behaviour to get round-to-nearest + int i = ( int )( a + 0.5f ); + + // clamp to the limit + if( i < 0 ) + i = 0; + else if( i > limit ) + i = limit; + + // done + return i; +} + +static int FloatTo565( Vec3::Arg colour ) +{ + // get the components in the correct range + int r = FloatToInt( 31.0f*colour.X(), 31 ); + int g = FloatToInt( 63.0f*colour.Y(), 63 ); + int b = FloatToInt( 31.0f*colour.Z(), 31 ); + + // pack into a single value + return ( r << 11 ) | ( g << 5 ) | b; +} + +static void WriteColourBlock( int a, int b, u8* indices, void* block ) +{ + // get the block as bytes + u8* bytes = ( u8* )block; + + // write the endpoints + bytes[0] = ( u8 )( a & 0xff ); + bytes[1] = ( u8 )( a >> 8 ); + bytes[2] = ( u8 )( b & 0xff ); + bytes[3] = ( u8 )( b >> 8 ); + + // write the indices + for( int i = 0; i < 4; ++i ) + { + u8 const* ind = indices + 4*i; + bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 ); + } +} + +void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block ) +{ + // get the packed values + int a = FloatTo565( start ); + int b = FloatTo565( end ); + + // remap the indices + u8 remapped[16]; + if( a <= b ) + { + // use the indices directly + for( int i = 0; i < 16; ++i ) + remapped[i] = indices[i]; + } + else + { + // swap a and b + std::swap( a, b ); + for( int i = 0; i < 16; ++i ) + { + if( indices[i] == 0 ) + remapped[i] = 1; + else if( indices[i] == 1 ) + remapped[i] = 0; + else + remapped[i] = indices[i]; + } + } + + // write the block + WriteColourBlock( a, b, remapped, block ); +} + +void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block ) +{ + // get the packed values + int a = FloatTo565( start ); + int b = FloatTo565( end ); + + // remap the indices + u8 remapped[16]; + if( a < b ) + { + // swap a and b + std::swap( a, b ); + for( int i = 0; i < 16; ++i ) + remapped[i] = ( indices[i] ^ 0x1 ) & 0x3; + } + else if( a == b ) + { + // use index 0 + for( int i = 0; i < 16; ++i ) + remapped[i] = 0; + } + else + { + // use the indices directly + for( int i = 0; i < 16; ++i ) + remapped[i] = indices[i]; + } + + // write the block + WriteColourBlock( a, b, remapped, block ); +} + +static int Unpack565( u8 const* packed, u8* colour ) +{ + // build the packed value + int value = ( int )packed[0] | ( ( int )packed[1] << 8 ); + + // get the components in the stored range + u8 red = ( u8 )( ( value >> 11 ) & 0x1f ); + u8 green = ( u8 )( ( value >> 5 ) & 0x3f ); + u8 blue = ( u8 )( value & 0x1f ); + + // scale up to 8 bits + colour[0] = ( red << 3 ) | ( red >> 2 ); + colour[1] = ( green << 2 ) | ( green >> 4 ); + colour[2] = ( blue << 3 ) | ( blue >> 2 ); + colour[3] = 255; + + // return the value + return value; +} + +void DecompressColour( u8* rgba, void const* block, bool isDxt1 ) +{ + // get the block bytes + u8 const* bytes = reinterpret_cast< u8 const* >( block ); + + // unpack the endpoints + u8 codes[16]; + int a = Unpack565( bytes, codes ); + int b = Unpack565( bytes + 2, codes + 4 ); + + // generate the midpoints + for( int i = 0; i < 3; ++i ) + { + int c = codes[i]; + int d = codes[4 + i]; + + if( isDxt1 && a <= b ) + { + codes[8 + i] = ( u8 )( ( c + d )/2 ); + codes[12 + i] = 0; + } + else + { + codes[8 + i] = ( u8 )( ( 2*c + d )/3 ); + codes[12 + i] = ( u8 )( ( c + 2*d )/3 ); + } + } + + // fill in alpha for the intermediate values + codes[8 + 3] = 255; + codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255; + + // unpack the indices + u8 indices[16]; + for( int i = 0; i < 4; ++i ) + { + u8* ind = indices + 4*i; + u8 packed = bytes[4 + i]; + + ind[0] = packed & 0x3; + ind[1] = ( packed >> 2 ) & 0x3; + ind[2] = ( packed >> 4 ) & 0x3; + ind[3] = ( packed >> 6 ) & 0x3; + } + + // store out the colours + for( int i = 0; i < 16; ++i ) + { + u8 offset = 4*indices[i]; + for( int j = 0; j < 4; ++j ) + rgba[4*i + j] = codes[offset + j]; + } +} + +} // namespace squish diff --git a/extern/libsquish-1.15/colourblock.h b/extern/libsquish-1.15/colourblock.h new file mode 100644 index 0000000..fee2cd7 --- /dev/null +++ b/extern/libsquish-1.15/colourblock.h @@ -0,0 +1,41 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_COLOURBLOCK_H +#define SQUISH_COLOURBLOCK_H + +#include "squish.h" +#include "maths.h" + +namespace squish { + +void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block ); +void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block ); + +void DecompressColour( u8* rgba, void const* block, bool isDxt1 ); + +} // namespace squish + +#endif // ndef SQUISH_COLOURBLOCK_H diff --git a/extern/libsquish-1.15/colourfit.cpp b/extern/libsquish-1.15/colourfit.cpp new file mode 100644 index 0000000..e45b656 --- /dev/null +++ b/extern/libsquish-1.15/colourfit.cpp @@ -0,0 +1,54 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include "colourfit.h" +#include "colourset.h" + +namespace squish { + +ColourFit::ColourFit( ColourSet const* colours, int flags ) + : m_colours( colours ), + m_flags( flags ) +{ +} + +ColourFit::~ColourFit() +{ +} + +void ColourFit::Compress( void* block ) +{ + bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 ); + if( isDxt1 ) + { + Compress3( block ); + if( !m_colours->IsTransparent() ) + Compress4( block ); + } + else + Compress4( block ); +} + +} // namespace squish diff --git a/extern/libsquish-1.15/colourfit.h b/extern/libsquish-1.15/colourfit.h new file mode 100644 index 0000000..e73dceb --- /dev/null +++ b/extern/libsquish-1.15/colourfit.h @@ -0,0 +1,56 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_COLOURFIT_H +#define SQUISH_COLOURFIT_H + +#include "squish.h" +#include "maths.h" + +#include + +namespace squish { + +class ColourSet; + +class ColourFit +{ +public: + ColourFit( ColourSet const* colours, int flags ); + virtual ~ColourFit(); + + void Compress( void* block ); + +protected: + virtual void Compress3( void* block ) = 0; + virtual void Compress4( void* block ) = 0; + + ColourSet const* m_colours; + int m_flags; +}; + +} // namespace squish + +#endif // ndef SQUISH_COLOURFIT_H diff --git a/extern/libsquish-1.15/colourset.cpp b/extern/libsquish-1.15/colourset.cpp new file mode 100644 index 0000000..e900556 --- /dev/null +++ b/extern/libsquish-1.15/colourset.cpp @@ -0,0 +1,121 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include "colourset.h" + +namespace squish { + +ColourSet::ColourSet( u8 const* rgba, int mask, int flags ) + : m_count( 0 ), + m_transparent( false ) +{ + // check the compression mode for dxt1 + bool isDxt1 = ( ( flags & kDxt1 ) != 0 ); + bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 ); + + // create the minimal set + for( int i = 0; i < 16; ++i ) + { + // check this pixel is enabled + int bit = 1 << i; + if( ( mask & bit ) == 0 ) + { + m_remap[i] = -1; + continue; + } + + // check for transparent pixels when using dxt1 + if( isDxt1 && rgba[4*i + 3] < 128 ) + { + m_remap[i] = -1; + m_transparent = true; + continue; + } + + // loop over previous points for a match + for( int j = 0;; ++j ) + { + // allocate a new point + if( j == i ) + { + // normalise coordinates to [0,1] + float x = ( float )rgba[4*i] / 255.0f; + float y = ( float )rgba[4*i + 1] / 255.0f; + float z = ( float )rgba[4*i + 2] / 255.0f; + + // ensure there is always non-zero weight even for zero alpha + float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f; + + // add the point + m_points[m_count] = Vec3( x, y, z ); + m_weights[m_count] = ( weightByAlpha ? w : 1.0f ); + m_remap[i] = m_count; + + // advance + ++m_count; + break; + } + + // check for a match + int oldbit = 1 << j; + bool match = ( ( mask & oldbit ) != 0 ) + && ( rgba[4*i] == rgba[4*j] ) + && ( rgba[4*i + 1] == rgba[4*j + 1] ) + && ( rgba[4*i + 2] == rgba[4*j + 2] ) + && ( rgba[4*j + 3] >= 128 || !isDxt1 ); + if( match ) + { + // get the index of the match + int index = m_remap[j]; + + // ensure there is always non-zero weight even for zero alpha + float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f; + + // map to this point and increase the weight + m_weights[index] += ( weightByAlpha ? w : 1.0f ); + m_remap[i] = index; + break; + } + } + } + + // square root the weights + for( int i = 0; i < m_count; ++i ) + m_weights[i] = std::sqrt( m_weights[i] ); +} + +void ColourSet::RemapIndices( u8 const* source, u8* target ) const +{ + for( int i = 0; i < 16; ++i ) + { + int j = m_remap[i]; + if( j == -1 ) + target[i] = 3; + else + target[i] = source[j]; + } +} + +} // namespace squish diff --git a/extern/libsquish-1.15/colourset.h b/extern/libsquish-1.15/colourset.h new file mode 100644 index 0000000..e13bb6f --- /dev/null +++ b/extern/libsquish-1.15/colourset.h @@ -0,0 +1,58 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_COLOURSET_H +#define SQUISH_COLOURSET_H + +#include "squish.h" +#include "maths.h" + +namespace squish { + +/*! @brief Represents a set of block colours +*/ +class ColourSet +{ +public: + ColourSet( u8 const* rgba, int mask, int flags ); + + int GetCount() const { return m_count; } + Vec3 const* GetPoints() const { return m_points; } + float const* GetWeights() const { return m_weights; } + bool IsTransparent() const { return m_transparent; } + + void RemapIndices( u8 const* source, u8* target ) const; + +private: + int m_count; + Vec3 m_points[16]; + float m_weights[16]; + int m_remap[16]; + bool m_transparent; +}; + +} // namespace sqish + +#endif // ndef SQUISH_COLOURSET_H diff --git a/extern/libsquish-1.15/config b/extern/libsquish-1.15/config new file mode 100644 index 0000000..da6de8d --- /dev/null +++ b/extern/libsquish-1.15/config @@ -0,0 +1,38 @@ +# config file for GNUmake + +# define to 1 to use OpenMP parallelization +USE_OPENMP ?= 0 + +# define to 1 to install shared library +USE_SHARED ?= 0 + +# define to 1 to use Altivec instructions +USE_ALTIVEC ?= 0 + +# define to 1 to use SSE2 instructions +USE_SSE ?= 0 + +# default flags +CXXFLAGS ?= -O2 -Wall +ifeq ($(USE_OPENMP),1) + CPPFLAGS += -DSQUISH_USE_OPENMP + CXXFLAGS += -fopenmp +endif +ifeq ($(USE_ALTIVEC),1) + CPPFLAGS += -DSQUISH_USE_ALTIVEC=1 + CXXFLAGS += -maltivec +endif +ifeq ($(USE_SSE),1) + CPPFLAGS += -DSQUISH_USE_SSE=2 + CXXFLAGS += -msse +endif + +# install options +INSTALL = install +INSTALL_FILE = $(INSTALL) -p -m 644 +INSTALL_PROGRAM = $(INSTALL) -p -m 755 +INSTALL_DIRECTORY = $(INSTALL) -d -m 755 + +# where should we install to +INSTALL_DIR ?= /usr/local +LIB_PATH ?= lib diff --git a/extern/libsquish-1.15/config.h b/extern/libsquish-1.15/config.h new file mode 100644 index 0000000..9f1f4b1 --- /dev/null +++ b/extern/libsquish-1.15/config.h @@ -0,0 +1,49 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_CONFIG_H +#define SQUISH_CONFIG_H + +// Set to 1 when building squish to use Altivec instructions. +#ifndef SQUISH_USE_ALTIVEC +#define SQUISH_USE_ALTIVEC 0 +#endif + +// Set to 1 or 2 when building squish to use SSE or SSE2 instructions. +#ifndef SQUISH_USE_SSE +#define SQUISH_USE_SSE 2 +#endif + +// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available. +#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE +#error "Cannot enable both Altivec and SSE!" +#endif +#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE +#define SQUISH_USE_SIMD 1 +#else +#define SQUISH_USE_SIMD 0 +#endif + +#endif // ndef SQUISH_CONFIG_H diff --git a/extern/libsquish-1.15/extra/squishgen.cpp b/extern/libsquish-1.15/extra/squishgen.cpp new file mode 100644 index 0000000..1fcbd2a --- /dev/null +++ b/extern/libsquish-1.15/extra/squishgen.cpp @@ -0,0 +1,151 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include + +struct SourceBlock +{ + int start; + int end; + int error; +}; + +struct TargetValue +{ + SourceBlock sources[2]; +}; + +static void GenerateData( std::string const& name, int bits, int colours ) +{ + TargetValue values[256]; + + // initialise the data + for( int target = 0; target < 256; ++target ) + for( int index = 0; index < colours; ++index ) + values[target].sources[index].error = 255; + + // loop over all possible source points + int count = ( 1 << bits ); + for( int value1 = 0; value1 < count; ++value1 ) + { + for( int value2 = 0; value2 < count; ++value2 ) + { + // compute the 8-bit endpoints + int a = ( value1 << ( 8 - bits ) ) | ( value1 >> ( 2*bits - 8 ) ); + int b = ( value2 << ( 8 - bits ) ) | ( value2 >> ( 2*bits - 8 ) ); + + // fill in the codebook with the these and intermediates + int codes[2]; + codes[0] = a; + if( colours == 3 ) + codes[1] = ( a + b )/2; + else + codes[1] = ( 2*a + b )/3; + + // mark each target point with the endpoints and index needed for it + for( int index = 0; index < 2; ++index ) + { + int target = codes[index]; + + SourceBlock& block = values[target].sources[index]; + if( block.error != 0 ) + { + block.start = value1; + block.end = value2; + block.error = 0; + } + } + } + } + + // iteratively fill in the missing values + for( ;; ) + { + bool stable = true; + for( int index = 0; index < 2; ++index ) + { + for( int target = 0; target < 256; ++target ) + { + if( target != 255 ) + { + SourceBlock& current = values[target].sources[index]; + SourceBlock& next = values[target + 1].sources[index]; + if( current.error > next.error + 1 ) + { + current.start = next.start; + current.end = next.end; + current.error = next.error + 1; + stable = false; + } + } + if( target != 0 ) + { + SourceBlock& current = values[target].sources[index]; + SourceBlock& previous = values[target - 1].sources[index]; + if( current.error > previous.error + 1 ) + { + current.start = previous.start; + current.end = previous.end; + current.error = previous.error + 1; + stable = false; + } + } + } + } + if( stable ) + break; + } + + // debug + std::cout << "\nstatic SingleColourLookup const " << name << "[] = \n{\n"; + for( int i = 0;; ) + { + std::cout << "\t{ { "; + for( int j = 0;; ) + { + SourceBlock const& block = values[i].sources[j]; + if( j < colours ) + std::cout << "{ " << block.start << ", " << block.end << ", " << block.error << " }"; + else + std::cout << "{ 0, 0, 0 }"; + if( ++j == 2 ) + break; + std::cout << ", "; + } + std::cout << " } }"; + if( ++i == 256 ) + break; + std::cout << ",\n"; + } + std::cout << "\n};\n"; +} + +int main() +{ + GenerateData( "lookup_5_3", 5, 3 ); + GenerateData( "lookup_6_3", 6, 3 ); + GenerateData( "lookup_5_4", 5, 4 ); + GenerateData( "lookup_6_4", 6, 4 ); +} diff --git a/extern/libsquish-1.15/extra/squishpng.cpp b/extern/libsquish-1.15/extra/squishpng.cpp new file mode 100644 index 0000000..5d45b0c --- /dev/null +++ b/extern/libsquish-1.15/extra/squishpng.cpp @@ -0,0 +1,546 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +/*! @file + + @brief Test program that compresses images loaded using the PNG format. + + This program requires libpng for PNG input and output, and is designed to + test the RMS error for DXT compression for a set of test images. + + This program uses the high-level image compression and decompression + functions that process an entire image at a time. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning( disable: 4511 4512 ) +#endif // def _MSC_VER + +using namespace squish; + +//! Simple exception class. +class Error : public std::exception +{ +public: + Error( std::string const& excuse ) : m_excuse( excuse ) {} + ~Error() throw() {} + + virtual char const* what() const throw() { return m_excuse.c_str(); } + +private: + std::string m_excuse; +}; + +//! Base class to make derived classes non-copyable +class NonCopyable +{ +public: + NonCopyable() {} + +private: + NonCopyable( NonCopyable const& ); + NonCopyable& operator=( NonCopyable const& ); +}; + +//! Memory object. +class Mem : NonCopyable +{ +public: + Mem() : m_p( 0 ) {} + explicit Mem( int size ) : m_p( new u8[size] ) {} + ~Mem() { delete[] m_p; } + + void Reset( int size ) + { + u8 *p = new u8[size]; + delete m_p; + m_p = p; + } + + u8* Get() const { return m_p; } + +private: + u8* m_p; +}; + +//! File object. +class File : NonCopyable +{ +public: + explicit File( FILE* fp ) : m_fp( fp ) {} + ~File() { if( m_fp ) fclose( m_fp ); } + + bool IsValid() const { return m_fp != 0; } + FILE* Get() const { return m_fp; } + +private: + FILE* m_fp; +}; + +//! PNG read object. +class PngReadStruct : NonCopyable +{ +public: + PngReadStruct() + : m_png( 0 ), + m_info( 0 ), + m_end( 0 ) + { + m_png = png_create_read_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 ); + if( !m_png ) + throw Error( "failed to create png read struct" ); + + m_info = png_create_info_struct( m_png ); + m_end = png_create_info_struct( m_png ); + if( !m_info || !m_end ) + { + png_infopp info = m_info ? &m_info : 0; + png_infopp end = m_end ? &m_end : 0; + png_destroy_read_struct( &m_png, info, end ); + throw Error( "failed to create png info structs" ); + } + } + + ~PngReadStruct() + { + png_destroy_read_struct( &m_png, &m_info, &m_end ); + } + + png_structp GetPng() const { return m_png; } + png_infop GetInfo() const { return m_info; } + +private: + png_structp m_png; + png_infop m_info, m_end; +}; + +//! PNG write object. +class PngWriteStruct : NonCopyable +{ +public: + PngWriteStruct() + : m_png( 0 ), + m_info( 0 ) + { + m_png = png_create_write_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 ); + if( !m_png ) + throw Error( "failed to create png read struct" ); + + m_info = png_create_info_struct( m_png ); + if( !m_info ) + { + png_infopp info = m_info ? &m_info : 0; + png_destroy_write_struct( &m_png, info ); + throw Error( "failed to create png info structs" ); + } + } + + ~PngWriteStruct() + { + png_destroy_write_struct( &m_png, &m_info ); + } + + png_structp GetPng() const { return m_png; } + png_infop GetInfo() const { return m_info; } + +private: + png_structp m_png; + png_infop m_info; +}; + +//! PNG rows object. +class PngRows : NonCopyable +{ +public: + PngRows( int pitch, int height ) : m_height( height ) + { + m_rows = new png_bytep[m_height]; + for( int i = 0; i < m_height; ++i ) + m_rows[i] = new png_byte[pitch]; + } + + ~PngRows() + { + for( int i = 0; i < m_height; ++i ) + delete[] m_rows[i]; + delete[] m_rows; + } + + png_bytep* Get() const { return m_rows; } + + png_bytep operator[](int y) const { return m_rows[y]; } + +private: + png_bytep* m_rows; + int m_height; +}; + +//! Represents a DXT compressed image in memory. +struct DxtData +{ + int width; + int height; + int format; //!< Either kDxt1, kDxt3 or kDxt5. + Mem data; + bool isColour; + bool isAlpha; +}; + +//! Represents an uncompressed RGBA image in memory. +class Image +{ +public: + Image(); + + void LoadPng( std::string const& fileName ); + void SavePng( std::string const& fileName ) const; + + void Decompress( DxtData const& dxt ); + void Compress( DxtData& dxt, int flags ) const; + + double GetRmsError( Image const& image ) const; + +private: + int m_width; + int m_height; + bool m_isColour; //!< Either colour or luminance. + bool m_isAlpha; //!< Either alpha or not. + Mem m_pixels; +}; + +Image::Image() + : m_width( 0 ), + m_height( 0 ), + m_isColour( false ), + m_isAlpha( false ) +{ +} + +void Image::LoadPng( std::string const& fileName ) +{ + // open the source file + File file( fopen( fileName.c_str(), "rb" ) ); + if( !file.IsValid() ) + { + std::ostringstream oss; + oss << "failed to open \"" << fileName << "\" for reading"; + throw Error( oss.str() ); + } + + // check the signature bytes + png_byte header[8]; + size_t check = fread( header, 1, 8, file.Get() ); + if( check != 8 ) + throw Error( "file read error" ); + if( png_sig_cmp( header, 0, 8 ) ) + { + std::ostringstream oss; + oss << "\"" << fileName << "\" does not look like a png file"; + throw Error( oss.str() ); + } + + // read the image into memory + PngReadStruct png; + png_init_io( png.GetPng(), file.Get() ); + png_set_sig_bytes( png.GetPng(), 8 ); + png_read_png( png.GetPng(), png.GetInfo(), PNG_TRANSFORM_EXPAND, 0 ); + + // get the image info + png_uint_32 width; + png_uint_32 height; + int bitDepth; + int colourType; + png_get_IHDR( png.GetPng(), png.GetInfo(), &width, &height, &bitDepth, &colourType, 0, 0, 0 ); + + // check the image is 8 bit + if( bitDepth != 8 ) + { + std::ostringstream oss; + oss << "cannot process " << bitDepth << "-bit image (bit depth must be 8)"; + throw Error( oss.str() ); + } + + // copy the data into a contiguous array + m_width = width; + m_height = height; + m_isColour = ( ( colourType & PNG_COLOR_MASK_COLOR ) != 0 ); + m_isAlpha = ( ( colourType & PNG_COLOR_MASK_ALPHA ) != 0 ); + m_pixels.Reset(4*width*height); + + // get the image rows + png_bytep const *rows = png_get_rows( png.GetPng(), png.GetInfo() ); + if( !rows ) + throw Error( "failed to get image rows" ); + + // copy the pixels into the storage + u8 *dest = m_pixels.Get(); + for( int y = 0; y < m_height; ++y ) + { + u8 const *src = rows[y]; + for( int x = 0; x < m_width; ++x ) + { + if( m_isColour ) + { + dest[0] = src[0]; + dest[1] = src[1]; + dest[2] = src[2]; + src += 3; + } + else + { + u8 lum = *src++; + dest[0] = lum; + dest[1] = lum; + dest[2] = lum; + } + + if( m_isAlpha ) + dest[3] = *src++; + else + dest[3] = 255; + + dest += 4; + } + } +} + +void Image::SavePng( std::string const& fileName ) const +{ + // create the target rows + int const pixelSize = ( m_isColour ? 3 : 1 ) + ( m_isAlpha ? 1 : 0 ); + PngRows rows( m_width*pixelSize, m_height ); + + // fill the rows with pixel data + u8 const *src = m_pixels.Get(); + for( int y = 0; y < m_height; ++y ) + { + u8 *dest = rows[y]; + for( int x = 0; x < m_width; ++x ) + { + if( m_isColour ) + { + dest[0] = src[0]; + dest[1] = src[1]; + dest[2] = src[2]; + dest += 3; + } + else + *dest++ = src[1]; + + if( m_isAlpha ) + *dest++ = src[3]; + + src += 4; + } + } + + // set up the image + PngWriteStruct png; + png_set_IHDR( + png.GetPng(), png.GetInfo(), m_width, m_height, + 8, ( m_isColour ? PNG_COLOR_MASK_COLOR : 0) | ( m_isAlpha ? PNG_COLOR_MASK_ALPHA : 0 ), + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT + ); + + // open the target file + File file( fopen( fileName.c_str(), "wb" ) ); + if( !file.IsValid() ) + { + std::ostringstream oss; + oss << "failed to open \"" << fileName << "\" for writing"; + throw Error( oss.str() ); + } + + // write the image + png_set_rows( png.GetPng(), png.GetInfo(), rows.Get() ); + png_init_io( png.GetPng(), file.Get() ); + png_write_png( png.GetPng(), png.GetInfo(), PNG_TRANSFORM_IDENTITY, 0 ); +} + +void Image::Decompress( DxtData const& dxt ) +{ + // allocate storage + m_width = dxt.width; + m_height = dxt.height; + m_isColour = dxt.isColour; + m_isAlpha = dxt.isAlpha; + m_pixels.Reset( 4*m_width*m_height ); + + // use the whole image decompression function to do the work + DecompressImage( m_pixels.Get(), m_width, m_height, dxt.data.Get(), dxt.format ); +} + +void Image::Compress( DxtData& dxt, int flags ) const +{ + // work out how much memory we need + int storageSize = GetStorageRequirements( m_width, m_height, flags ); + + // set the structure fields and allocate it + dxt.width = m_width; + dxt.height = m_height; + dxt.format = flags & ( kDxt1 | kDxt3 | kDxt5 ); + dxt.isColour = m_isColour; + dxt.isAlpha = m_isAlpha; + dxt.data.Reset( storageSize ); + + // use the whole image compression function to do the work + CompressImage( m_pixels.Get(), m_width, m_height, dxt.data.Get(), flags ); +} + +double Image::GetRmsError( Image const& image ) const +{ + if( m_width != image.m_width || m_height != image.m_height ) + throw Error( "image dimensions mismatch when computing RMS error" ); + + // accumulate colour error + double difference = 0; + u8 const *a = m_pixels.Get(); + u8 const *b = image.m_pixels.Get(); + for( int y = 0; y < m_height; ++y ) + { + for( int x = 0; x < m_width; ++x ) + { + int d0 = ( int )a[0] - ( int )b[0]; + int d1 = ( int )a[1] - ( int )b[1]; + int d2 = ( int )a[2] - ( int )b[2]; + difference += ( double )( d0*d0 + d1*d1 + d2*d2 ); + a += 4; + b += 4; + } + } + return std::sqrt( difference/( double )( m_width*m_height ) ); +} + +int main( int argc, char* argv[] ) +{ + try + { + // parse the command-line + std::string sourceFileName; + std::string targetFileName; + int format = kDxt1; + int fit = kColourClusterFit; + int extra = 0; + bool help = false; + bool arguments = true; + bool error = false; + for( int i = 1; i < argc; ++i ) + { + // check for options + char const* word = argv[i]; + if( arguments && word[0] == '-' ) + { + for( int j = 1; word[j] != '\0'; ++j ) + { + switch( word[j] ) + { + case 'h': help = true; break; + case '1': format = kDxt1; break; + case '3': format = kDxt3; break; + case '5': format = kDxt5; break; + case 'r': fit = kColourRangeFit; break; + case 'i': fit = kColourIterativeClusterFit; break; + case 'w': extra = kWeightColourByAlpha; break; + case '-': arguments = false; break; + default: + std::cerr << "squishpng error: unknown option '" << word[j] << "'" << std::endl; + error = true; + } + } + } + else + { + if( sourceFileName.empty() ) + sourceFileName.assign( word ); + else if( targetFileName.empty() ) + targetFileName.assign( word ); + else + { + std::cerr << "squishpng error: unexpected argument \"" << word << "\"" << std::endl; + error = true; + } + } + } + + // check arguments + if( sourceFileName.empty() ) + { + std::cerr << "squishpng error: no source file given" << std::endl; + error = true; + } + if( help || error ) + { + std::cout + << "SYNTAX" << std::endl + << "\tsquishpng [-135riw] []" << std::endl + << "OPTIONS" << std::endl + << "\t-h\tPrint this help message" << std::endl + << "\t-135\tSpecifies whether to use DXT1 (default), DXT3 or DXT5 compression" << std::endl + << "\t-r\tUse the fast but inferior range-based colour compressor" << std::endl + << "\t-i\tUse the very slow but slightly better iterative colour compressor" << std::endl + << "\t-w\tWeight colour values by alpha in the cluster colour compressor" << std::endl + ; + + return error ? -1 : 0; + } + + // load the source image + Image sourceImage; + sourceImage.LoadPng( sourceFileName ); + + // compress to DXT + DxtData dxt; + sourceImage.Compress( dxt, format | fit | extra ); + + // decompress back + Image targetImage; + targetImage.Decompress( dxt ); + + // compare the images + double rmsError = sourceImage.GetRmsError( targetImage ); + std::cout << sourceFileName << " " << rmsError << std::endl; + + // save the target image if necessary + if( !targetFileName.empty() ) + targetImage.SavePng( targetFileName ); + } + catch( std::exception& excuse ) + { + // complain + std::cerr << "squishpng error: " << excuse.what() << std::endl; + return -1; + } + + // done + return 0; +} diff --git a/extern/libsquish-1.15/extra/squishtest.cpp b/extern/libsquish-1.15/extra/squishtest.cpp new file mode 100644 index 0000000..e4362fe --- /dev/null +++ b/extern/libsquish-1.15/extra/squishtest.cpp @@ -0,0 +1,206 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +/*! @file + + @brief This program tests the error for 1 and 2-colour DXT compression. + + This tests the effectiveness of the DXT compression algorithm for all + possible 1 and 2-colour blocks of pixels. +*/ + +#include +#include +#include +#include +#include + +using namespace squish; + +double GetColourError( u8 const* a, u8 const* b ) +{ + double error = 0.0; + for( int i = 0; i < 16; ++i ) + { + for( int j = 0; j < 3; ++j ) + { + int index = 4*i + j; + int diff = ( int )a[index] - ( int )b[index]; + error += ( double )( diff*diff ); + } + } + return error / 16.0; +} + +void TestOneColour( int flags ) +{ + u8 input[4*16]; + u8 output[4*16]; + u8 block[16]; + + double avg = 0.0, min = DBL_MAX, max = -DBL_MAX; + int counter = 0; + + // test all single-channel colours + for( int i = 0; i < 16*4; ++i ) + input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0; + for( int channel = 0; channel < 3; ++channel ) + { + for( int value = 0; value < 255; ++value ) + { + // set the channnel value + for( int i = 0; i < 16; ++i ) + input[4*i + channel] = ( u8 )value; + + // compress and decompress + Compress( input, block, flags ); + Decompress( output, block, flags ); + + // test the results + double rm = GetColourError( input, output ); + double rms = std::sqrt( rm ); + + // accumulate stats + min = std::min( min, rms ); + max = std::max( max, rms ); + avg += rm; + ++counter; + } + + // reset the channel value + for( int i = 0; i < 16; ++i ) + input[4*i + channel] = 0; + } + + // finish stats + avg = std::sqrt( avg/counter ); + + // show stats + std::cout << "one colour error (min, max, avg): " + << min << ", " << max << ", " << avg << std::endl; +} + +void TestOneColourRandom( int flags ) +{ + u8 input[4*16]; + u8 output[4*16]; + u8 block[16]; + + double avg = 0.0, min = DBL_MAX, max = -DBL_MAX; + int counter = 0; + + // test all single-channel colours + for( int test = 0; test < 1000; ++test ) + { + // set a constant random colour + for( int channel = 0; channel < 3; ++channel ) + { + u8 value = ( u8 )( rand() & 0xff ); + for( int i = 0; i < 16; ++i ) + input[4*i + channel] = value; + } + for( int i = 0; i < 16; ++i ) + input[4*i + 3] = 255; + + // compress and decompress + Compress( input, block, flags ); + Decompress( output, block, flags ); + + // test the results + double rm = GetColourError( input, output ); + double rms = std::sqrt( rm ); + + // accumulate stats + min = std::min( min, rms ); + max = std::max( max, rms ); + avg += rm; + ++counter; + } + + // finish stats + avg = std::sqrt( avg/counter ); + + // show stats + std::cout << "random one colour error (min, max, avg): " + << min << ", " << max << ", " << avg << std::endl; +} + +void TestTwoColour( int flags ) +{ + u8 input[4*16]; + u8 output[4*16]; + u8 block[16]; + + double avg = 0.0, min = DBL_MAX, max = -DBL_MAX; + int counter = 0; + + // test all single-channel colours + for( int i = 0; i < 16*4; ++i ) + input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0; + for( int channel = 0; channel < 3; ++channel ) + { + for( int value1 = 0; value1 < 255; ++value1 ) + { + for( int value2 = value1 + 1; value2 < 255; ++value2 ) + { + // set the channnel value + for( int i = 0; i < 16; ++i ) + input[4*i + channel] = ( u8 )( ( i < 8 ) ? value1 : value2 ); + + // compress and decompress + Compress( input, block, flags ); + Decompress( output, block, flags ); + + // test the results + double rm = GetColourError( input, output ); + double rms = std::sqrt( rm ); + + // accumulate stats + min = std::min( min, rms ); + max = std::max( max, rms ); + avg += rm; + ++counter; + } + } + + // reset the channel value + for( int i = 0; i < 16; ++i ) + input[4*i + channel] = 0; + } + + // finish stats + avg = std::sqrt( avg/counter ); + + // show stats + std::cout << "two colour error (min, max, avg): " + << min << ", " << max << ", " << avg << std::endl; +} + +int main() +{ + TestOneColourRandom( kDxt1 | kColourRangeFit ); + TestOneColour( kDxt1 ); + TestTwoColour( kDxt1 ); +} diff --git a/extern/libsquish-1.15/libSquish.png b/extern/libsquish-1.15/libSquish.png new file mode 100644 index 0000000000000000000000000000000000000000..7f37a4e0589c6eb3ef300bb7bfee45b0013fbcd0 GIT binary patch literal 17907 zcmeHv_dnHt6#v_nO@*wi;zmd^Zbo!Pwo92IvR7922<1v$L`hbuODd7QX-FX>BCai2 z**oidZhgM%KluLey$^}|c;DCiHO_gS=Xsv<7HgoV!AQqNhad>!MNJGAK`1m4gzPje zHM~QgA+!kpQTnJ|G^T}D2<Y7Eshlo>g6hD4bpZ&dU%hMohb&bv8c<(3Ki+|S6ksr{aO{&p3ep*t8HuXzO+JV-D zXa=*}zkf@e`_cLVU1X%KdJ!UX=8PG_g9spO5uZKlD2DbI zvsC8oYQx*eHOZ!$Jwq472vsqY;TnWn0<#U}AO=-DxI#tJB zBIQ-sB+fC%9gl5v`R{gJzOZ8_SFIYd$VC`?0Ro! z`%lEi($kGI$WQOe_z)f1o9Fl=&hhh;-;9uCp`{JaY@b{q>!`GpNNbt+W%xFU% zVa{XVE1lXp<%?F5O=-E;Y2HgG@Nze7pSFJ3dURtuL+5zX^!&V@NYWP_=~RqhB1h^lh?O0cSqi)POR2R!l8%uTjDKrlu_>@U`>$?&h<* z$oik%y(?$AtqKWcH%_Q;lqco^E6G$>vb$_Y_AYrDy6iPlK(&vc6_8b zL+BfVx8@|Z^6vKSU)i!zC7FPCUQSMq7Q%?&+a|n=jqXyncNbT&f1aI9e&)=X7y0?>y1MuH z;8v4UozJ@9Eiz(R{`~y>LpRy#OIlmiHd31wh(3z&Sa)(=@ShC{fDIi|(?8(yIps+u z;y@Mk?xOjujjOAxOM9l$-e9%A$!}tLd+)%&-Cw^Po?~kkvhb}78_%k$xL|lrm9UWc zsb}%?k7S&&2DGg04#nYgF(34i&4Upp$yl?lz(G7!EXffyA3uKVg0zxLi9OKzs-4@l z68X+1ibb#+wyxBF!D|1_mFoA~>vJm`51+hwBhmFX4|B=H#BTZLmq%znm!hXn1p=&GtJJup0A9JG{DSKm{1_x4V5adFuuiRd$t;~b0oa^H_GhLde7 z1zkvOuHyf99K32qMoG*K*N|eeRxLOZ=lu9$@mF=#)n}{y7lu~xyl2y|RNv^URToXw z;nLHUkrL9t|K_!yd3?(5!b>>t6usPiPP$usWQ(qZZuYJtG2HZQq+^cA$vJ+&ChT~U zS@+;z=-XuP%hIVjb8x!Vq1)>nuqCfaQ(hRVJr?enz(W4e{$1Hz-C6|GfOt#f@InlH zwNFO1Pi}vHtXAvFv{pB_D_6VCQxU#k@x+n!howY}o9uRCd7m45s!qB}=Yj3~ZIb~&Bl_2Mnb zCJoJ8TwIEf^!s0~u@kU)apEnHOC8%YyIl)vFE6bVJ!#53hw;8>ceI1*^W+yVUYxVH zKX&582^OZ9?PgtYNQjL$x#j$!6+c(^h!o`? zc6XaX#Zy{TT|wP4VvZ$U z{AhdX4LNP6$*im;i810QyHG3e~;C$ z@AL3k->ZU&rY>eZC@{X+u-&!Z4;E!^2iyj$zJKp>&_-?3=`RppzC6+0(_{PdbK(nd zXwgHF$wi)7Luy?X(QZU#r|8KB!%T|dg_DdEKR-M(iV3Rs|Aj8Q*$Gq0(MgR|@cAVie|EmY7oc`=D@3J8e3n}) zZ$o!djQy{Vo^S?$6v^$HZUKxhRl|7|)bCY=ZB3+*=IZs8;H^e$49?R1!VnOk@zE(e z0~3>&nHiU8H38Fy-!~S5wDlLVvulOWwH9!PYRh}K^%p*O-Lw!)Jg3OFAZAo)`KCa} z&(BYQJ??7nn=3y`T{+Cn%{##5Vag^PYZUkRl(>8*+rFlIeL6B|J}bi0v=xb+IowrA z9rxG!wDgROx}x1=gSC$*IqL|sJx#F+Lak1!>?;(y6J_lj7k0x`F5r3kOd>zUAC31K z|MWB`hZ;YQAmS}*mo8lz9_BR-YxXkEHRd(Vx&K8NjYf9>F8JqJ9ZymX4nBKhVd&l* zv3%ojE@?h`^hX(}PlfO83cfZV zC2ROIgJw{{00SQ$e<%{~{{H>y`{)AzS!{Upj3{i~#si)vFM}&r?y(A8XaFk}{78}wh-Hktyu3Mf9~L&oqYvtJa9Cbf zwba_ekITPG#H91`@zGZYxw(PLuKgI5ou@P3O{{qzwj@WsTFN`kGjyawr?LL-c_S*O+8Ixa>ehGA}7NIIrskVYr>j0O%nipKPLe?UdZX1Zg-M;;M3noB1l>-Nl+x%IbCQU%?DGR&_QR`lRzq#?{%j;JpG-T4h zP5Ta0vew2Obd*kwPtFNYRCY^+`0BOtRj#S&?va&bcVY^Pp!n8S`Y*TzYJpAsFh1dz z%VN6~Cs{YO{kh>5Grqo}RB> zs1y4(y&9#jw3fX4GfCgPu`c>E1gob{$yR=S{cx&@hXQr%xL!K$86HhHdi~MHfy7Ay z7WI3${^&yO1|K*j>0A|8T2BYemG&lRGJHkiS;$)+p466=k=c3gmEMw!eIvpD?^J`} zRBv9B-l}bDYt!ZnntFOC9}e9ZbgkdtU5F{_b2H>ChP7!w6n__e;Glt-S?r%bLEYWm{4BJGCYEArFs}FT zc&6V2l?SVD%&CF(NQN!ZD{lWTR-cRfRGXjmyH3R_Wl6H zu&>Nvi&3g6LRsxGl|GZUU+y1!k(c-GcVF=-mu^ujw1&Mt?M-kOn~ylwM`v8w$Cl(j z-Q$jllc=H%-x)u;1iQ}^6d6-iCR?-mP5a$JJ^9gMWY5Gfl9>!o3MYxKKp!qW@0y0? z_-85oh~*@GJk6u(000qbKrsHe#yyKyu6h!BFcvO9XP+o^nDMW>R+?!)r4nI} z6G&+}?9Rv`_~)Ffs~8>c;CJ)$6#*Wh|4l~VW1X5Ob^C;O!1z76#kR8xhZGJQ2F5ko zY6`OjY|oX`_YcptFnB%oBys4I^~3eSQpNVWQyMDoKgJ#$m1jDxpK1B=8fJpz-1;iJ zGJGM6s@J4bBl)6){^qe%MdGd|@;nlozCJ+g249$!mDCq~}MKpV;1J zlgij=%+XH|Pg>B0iMJFbv`koTfB4Kw%89&B3VbAa94~U=&>zmZxw(qT%;4+qs5qYF z=Q9>Ne0^WxH9j(S@CWA2HweilR)JItb=wl)z5*qsrHnI6uw6HSo0n@Y-TB8{2Y&Yg zH+-m^#SCF`gyzdN>CI{V@H>-P;e3F#he>GS+?8{6>|<;?dHR&pptGy+g@P)vqXw>o?$C# zoTnWa+En&KNO%rjLV{$7o?nVE_L6+@cBBYh=FYu7KN#0^G@PfPcHP`iOIgClM_-yBNSFV2e{_aNHje#Pd*@`=hA82DfJUZ>jq^B@(2qK{X57V={ zI=<4<(n9f9rurH`&COk5C$199+<%mN9)(c62!Jp;kz)M`_~`AQ3L<*xkCL&cza<>t8&N1tD<&LLy5A}`~1f5c+QNr#&m_Qx5Zz4g&43X#kJL5J2yWPrZE9M(~v&5w=<~%`xp8-YyXiQ{vy2q-_d1}6Dnc191iYl@Pws`x znyi1fUE^sot9RRv=hNabV=G%yH-4LffjuQnFh}Pdj#Y?18pH&Q zx;hnP5-fDLM&y~`nD`rfzWWALS~6X^as|>|_;PY}JrAr8n|G+B2Y&rg=_3Niqf4S& zXK(zze(A^`srun~IFVOkoDBHOT8+As3Oswp53@YU9~qC-g)>J`V`?NMc z=-49Y$wK@%v)UQ&t6{8F+c#$G$pWymnxphDSQ5p4l78|-UI^b0?(6Klif+^R z>8&$Gd}ERs`CUe zmd?JuSa)Js=ubPW7O<%G!!N3;&^&(KhA-Z{xgf9PrC93_V{uNa>)~ncj~t1tL-P69}MsJ z_tsqoC)L&9f@GXJKR@5e-Cb#{o{^F~l*ygC_`|XpnXCHB> zHB7F&Ti6idWDw+JP}Axw*Z0gr7up;UVCoN19@f~K|6uWU6<_0&A73%ytpJK(6rG-I znmR~sCMFzU?2whi7u<8Weqpy}mHjz|wjqdh5QZTcV>+Q!>)AqBW%x))vFg8S))Pc9 zE}s_Hcn>UNB8RJ?{T8mR1O3en&A`ey@MmrI=QPW6Srj@%Q~kZamNKew7gHs9dvulWOXN{nUrX z7v&733^7r2)jiL6=1G3J6mLb~obpjQ^xo#U&v;Y(ut;>k58sD-5Z|)BjVVSXRtK^$ z7(U<04F3xNo^JvnC!DT~S6s%~q2o=~@aYVbwGuS=%b}YmBkHg|Ht!U!8^_exuTFJV z{F!-m5ZkPsjLqB>TplwLJYiB!$I5Gmrery{yi+zc2F`K{y$07$UY1t~-m5 z*3<(S1f_LQYq`M?JJv6~x~y~^;@$CD@q8q*uv=0JLFrO~d^um2EyvHpUwh?E&Gj#C$>#ipt!reg917E}R7$ zP>9CJ++{y$p8QxPd~X}uT>Jo=k+Z@;4ci?!n59F*FSuc#ReBNCUJJ)N4z`pt&hp^a zEqaa|h;OY|u+3DEjdTV6RTOGGHZRKw3+fwa)uISBzaZ1q1*mw8KX3dEXiM_%vBtQK zizs~~tLk<^;dpD9sei4n3cIprU6Mm z>zLtp`I9euHkK6Q#VyLj!K@Ut^5k?UQL-c~bX2qvJmIC42dVI+qJLHPsG09p{asz^gu;CWUQ%HG&~J2kqGcQ$P>wg|v(JTlT(?SH zdq24o>r_!|VqJl^*+>WQkfrkklEwG&{7#jr8Se9LGB!K`YgO*>^)cwv`P1^4OhZsV z{APO^-HBF&;%EG_s0*;Nv?|+ zjmwaB5=s~|X`h>Cf08JoV3214PN8Q%1==P@bg4tH@Y*=2I6yO2g zLp;GovQB3>RxEvwz%{EgG;kp4ul%CBf)BDH=d>zc*VXOm_gebs=1z-aJVrLQ-`Y0b z+5GX=?&x4m;EK(0wPkUVP52cc6$cxP^M(AKc}M+ZFx?uc2M@4TiXgViNz^R9Po7r9 z9N!924w*5^x}*O{YBc4c{h%8<9tCD?d}oMk(FDnH=2g&}{hU&Qmq}w%$IL%Zp1F_B z^qI-a^-vkVdpdyaR`Y!p^4=@{S8V%Bob@lF3NC9+=r~H>0(;TwltHX%7Asgf3usxD zdVKi>z2ykT)ydwYTa#JCik?F?MM_f7m;FFZ6ahJRwWKSfmf&cH|~vCZmQc}4Zz$#CQ5#y%vI zRWPWW#qcUbrOG7ijC)H_+m$jmGdGa+!TT92ecBXqw@bS8h{p+vMyLl;8BL6T<1_85 z&X2{yX@1O4E3G_^lPy)(0FYzf+BE?L$XwGqJ6F=GFqvL#p%qw#efiawuY5%9EeRqi z7u$2r|GbC@tq`vvxn#8Lo2GKKtf3cYKZiEbX~B(zZ9JikhfPYrK>>MUrWHLtX#WNw zt_1J8zB9;MYocNK9GP>TSE9YnW(C-a)=1f=M z07(>8)SbmqFRD<@5AkDsEl;hm7&?QgbWMWFvj6t&+f&C0GQ2TP4WL98f)!WTn{k{V zSiyOGC7s8i8W~(@F8fI;)(0hD)8xX-%F5P9&$#w{jC!Xwi4))ovunaP&U}A0j%^lu zfUTFyXqx-`u@WZR;Z72|9rFZNyZ1~Zey{W8rB`D&k;y8-jaN{uva_=r@JhGW$`jF< zTp}mAi~oZh3YNrjO_T|qTWS1ex!YEQ7+*@yzvY>W7ri~6A25$ z;hd0UthrX;I4BI^Br#GXM43-7aXb3M)qxvC_ay^S-=^D)+;yJl_>@JF$D|KLBx4m& z6W>WO)~w9!h&Eg0opJe!xiX?H)Co5Ryqb82r8AmbH@kE&gowB#md-*WEqg%J*H530 z=FJu~u_i?Q&t8D~IwRVvXf{^ymiLd(xY`O`xVzZ{LRS4XbUX)?O$~c5^#is?~hqJTadeFGNN$JY1k9a#B~Lc z^y}}_IIKgwntl0BNUG5efMOH-B}rtzJlQ)HtCiOF(;fikr7O&~kl zK{C3!4K^-ihZZQkz7}&Nq{*`DY&c- zND@)J1r2uzc8Z~eGk);Q9~p2XeT13Ny${|!dgrj%*uS{1_=2PYr%22diqGm0x`{a} z$a?<1d}7(C{-~~niTpT*_*`ymw4P}G(+-R%a3Fs ztPPds8aL9I&SNGnqSlO1_TZ^@H7{Q_`|>PD0M8{cWZ)%@T6vB@erQOsYUT#nCV9jc za}DKO2$FMbiH6*=Zku)E^46y19#;muc)GkkH zb1>o{((TM!sXhBStoSVsr(6gOXdCMHb(J^f`c0)c0i!IPpbYnSz_K}DMNM#b5zD^H zodM=2{l2tO_tk^ak0PSJkG8i2?WYP<$Z$+6K1|2%Wl~w4B@bg7LLt%M-Cd#+NRQmJvPTAhF&ik(#V&#fb|}Ur}2m z#L;nbaao2M=uywUqKTRqz97-oacR0+Y84<9+d+DmH4Gvn1ssA9iE>~t*KpVqB7~YM zfR8P)f|?>z6NLngzOui33sT|wX76iEunQoHt7_$yxD&O1wPjS>gW`_r3jBQ6Mhf@+ zl}Qr`sl5Ou2)@oWlYi@%tVeQ^XCO)PBT;a$Cc#+y($c;O1-o#&6T z&^qi(K%PJU4NZ?CggFw|*MOGkfxrP2K;3sJaMCY)>|j=azE)rv_D)xIHN(H5wc%o0 z;*IetzgadoiJWl*R=^$pP|(ijlCcuTT7Hk(&L91!&HuCo0}c4>m9F_W41s+*vFM?v zpZ{O!#rYT#4~D{sH@BR}j*GqhM0w9^vx88^hOx=;O6#VfK%ExaQ=VOc?Hv7EDrXIB z0K-~2)^es5eg9Q~Fnmvwn;|YtbZS73f7ryb(;m$nT0^SdZ7olnLQU8L zI=~utKl~e7Kr7FKSSAUY=={URI1Zm(o)q-Vi)V#FYcmze%hH5yBZw>e|4sWY^yLuO z&FI<1e^lDMi@XbWEdRf-R7qP6sL~EC1Vw?ai@L~okn1ao$yY}V&E`;}%)VA|V zV5FUp7nHjPKfun2z=kyZH*zx8B^Q?c28bgb@~p>rqjgXmt`ptM8_z%OaFlcVu~+~T zx%$ue6m?LgjYY40K;UP8RZt+uo9m{4k0PZfYEKPT@n`2Z@zaXN^8e-$Z=_Z0WY(iD zcKNmDU2eHH-}E9O!bf0ub`XsNY`|rJ{dbJ~bD0MSq1%&?tlj(CED6r%42(=kPWIZo z^u%Qgi|tz7G}+K0Sfb+Jp|jcEhVA~@Pr!y%0Yx%2Gjm4keOZA5U0q0E2vn8g-G*wm zx?tK~{|%l1Mf`EqCa-K@gI-NuI!!1uHCj@hc*^qSQeB5nb$(Rs-Je4~RGc&l`g@x|0M%uacL2 z3EiL)I8FUG@dNNHYU2H?H&rh~f3>FOj8_bSW89l9sISx23cQV-xdIml{C7O8gl-Fn zwZTxk`UHB1Zj)C&)Jq3^yL5T6%_|TAVbUM|n{+E+6r{*Cz=l5pn(H)qWq_{w+Vrj9 zal+1X> z(QAD5u(TK*HnrK>>p z!MJ+fK<>c`0fOhBtxA^%uKWrX6BFC_0?i=SM#>4@R!AT7fy8Uao*GQy8M$Q@-O$Xb z|MnSEvC?*SKL~2>@eZpS?4jtvbP?CRi^6djVYwm`Xr2A6ETf^0L_UODPFOl zh|oetDP(DePBiC%7-mG$M&u(JVghNzC{)MhXQSePz3OfJZw3W&}s&oy8CM z9zt!Vn-u5A|3#6Zb~z8eFrb<|BUi^_NZ$~Mydl4?9Dy#5y?uoyVxM|(0paUHe{+y$ zh9LvbLHpJB@88#mBtknMAHHMfjBYnr0#7V<&|JlCXr@KdT#u@s7YJT$rMqTGZTxWxv!HNoj+MI00F~j3-I<1LZJ4?hu{5Zj7ig zG$tTF?9eoo)~G|z<8)~j3lFGa93L~O!e(T4%**4Yc(YkRf-#?E$X&_V6TKBF)9#DJ zpB?l?V4US>Sz?(|;BQd^MmA0NahvMU$QRFLD>Y0LIj1K&rs<>2N0&dg`c5aZRii)H&Mqq*OnM*`OQv868$dU-^BU z-4911SUQKFE+#%nY;2W26pg!_?MOBzPa>(g9 zTW_!kWapAsh7k)p#2!29w z31mM*S~2bI`Z}usI^JH5w^*qbFF{Apirpz(IWCH$qSaAFOSm^Ua*HB1zu9F-ZR9p)Z<_k8=i z1!=@x4Wq(5vQUZ-xbgT>HgijXsQk}KN+p+Y4X1LMzHbU!f{D9t6^M`8sy0Vag7)w#u08L zEUW%^!snG}93RDqAHr=|H8Eu%LAe;o(%JrqNlgX}kBJnwIi;W|va+?F_HV`~b0}z8 zOA$qK8~n65-_ijj{+Eg;Ci57^S23cAOh+c8k%er8Od$J}4voN!WWJxpp7$C3x9%1+ zQSThO6h3p;SS8eChF44=HRK~_5f3scquI}*`9epE-xks-ZEzdxXHeMOgQPQRHQbAL=JFysxIT5!Ww^#m_h z1NRh4t@hTGU%9SWFs&o<=qfr|LszxkEj&JrNzFK-UYfVT-h*AR>K#QvpYbm!?^(s; z`R(BtcRxlNb9vA#X7;d3wzXlW{^)kwk(nlZPHO|z(EbA1BdU=zU8*=`J$@FaFDZ=r zyywyQBno=AwWQp-%IS_0e2oqI1AqE8bVSovIo=4TC($@OnN*)I6Nfo1yRdhC*>(8= z#(2`Fp1r$eo0ld3g)H?(_B6uR)P6LORsSwezYH|_|9Ps!!<3de;N0LwWFMg=joz>F z;Q6VjALR!c@U~Q^oP>ken@(a%Sq!yjHT_Rfurj_~qMG+V7~y#5$Tiyh4-Fzu@_6s? zW7K;d)?0ox5^q_K0^j*EOZCM^<;m*J9rDsBGtOmen6Bw-#T;7JjOh3DY#qgg61_M0 zC6AbmgoT3xTrf;xU=_%|wWHH>NQu^Psq~hbK-Up!kZ2Iogcn&gVX;@<`yyP2BvvhW zE`h+sXz-LOulpY0)r+J3D=Du6wWK&1K+YoFH+Fbxh+UTV38v8Les3Wy4_A0@B1g$Z zl#(A6R2rjL)I7?(XJQ;d&l>9|pxh@*R!JhQi)QZEio2;K^w19Tol zS!hMe1?S_nlGIcrt{ajerFO!9Pb4zHapL@TdE%tPkR?DK<|BHFs>^E@nA= zNj7b+#61|}m@9{dW`quN$mRJL7K2omcNiQ#Jf~}rMp~kAd)7#~D{r5j{6SX3FW=D1 zXSm%y&tGM9 zN)B}lDIxju=yYWAMkLsg3|m7u)D_*)v?PuZ*HiUXn+lax3XZgl$BJr#{f`)~A$-!j zk+|mjCJz0=+2>t-c$$1~hZVbXLuY9{X~hJCggTlAZjN_V?Y08hO{oyd>|-3gV-;H` zSIz`GQ(n}HJ;&c+i>3*T=@oH~=t-0X7p1eg1vuh&cC%6_d-S{cNSMzp9Q&EiESlpZ zrPJsVeDc_DGwoZrOndZ4{=0I#4K3t18>}Sn@hjky*0y(V)y*3mSq@QcuI6r-KwNZa zmY#4!l(;af`QDdB@C5fQJpZ6|_n{9M+!S9(q3oa$vUh$K*UQ{hq&{^@8>@GqXQ$=i1h98fCG?e zFmmS$okBbxw~eyNk!?O@F{}sDM9~3tCPkm`&}7SnaNHq;iRDF7BIX+ACHU9K^+Q1t zj+(DZ^uK?oP^1B zLKtK6NBGQ6LMoC&{(_Q4*~VvN?!76^lkA32jYFqtSq)V}D3pqwynjA#a)jMJrc!m` z=MDEVcX}AHrAVCY?>bIu(tJbTCtuVG6T7H}IZAQe@Mg8i;nVc2orzAof!4Sw=1!jl z(X|>TNt(q7Sj}z+BO`I-hWuZt+%CV9cXeRqn*4hJiv9Q#!|4cG+DX-0w&Idb_u zsePRTth>Wo#X}flVRH>{WbZ(O3WOvkuva~e_Mhm3hE^ggTu6-vnzA$TT5(^rJ8{(b zggv^ssmPtX&#kNrd=PryPr!;kAa7LlQD}M%Wm@h7Bc2H6w(JqZEkFY4h?L-caZLAx z_SR>Yrc&KY{C&1}nRBxX4IK-_xwhR&HA2p4NitJcbacvucjY5@qFq06Ftp7;E7nq? zsb$vb9AI-g71==iOxAQ^Sv$#q(UApw^~K+06MSiCs=C}NKauHKJ@uPr7mz_49NIiwG9O@+`0CSCjWA!uApWSLB6PX~$St z$%asxnEWj&(<}F=V(AQvgfzDruf|hd7mNtpnlly1K5VWbec?l5S_rRF+$B6ugK|0S z9wbz?=aWGz<`8KW&?gar_q6v({W0zM8~2YKGfara-R|(Q3-x40vo^R@>~r}^>wwe? zbw2WnV5IsMnH#2km()~g-?Xr?u+mHm!NV0Q1T~F5R2Okh$091_1Fkt8N7`Rc>AYF- zNA|eR%^~Y%Vwe&rm$eBrlhPA&cxua!M}vYTr#OTMbqsoO7PxcBmY}&og?iCqb>M6f zL>>g>2OhG#kbBVZ^_fY(Rn1izFZF({{4c-LkB&OK>NmS!iWQ$U9caDx58_tczq( zNd+O>uMY|Sd9b6Ff{~$B!%#(_rYikQ{LeBqn3UpolD@cMd^O!iugfzwe1Fx5#&?~J zo{}QX9-cTb@?m1^&7$JcGS^@OQXP5op0F7y#hK`ME_gy$4WltSBIqj~^oCl#o590s zM@ik7ssH_G;jBn4h_ud#xTXxh)lTCF&*d>~XI|7SgoBd^UBeHu-#$?535H028k36C z0#0~js&aV26d7k|63Bk%>dzzMi2j%;k58hC=XH*#31DWc{}LCUWd!hpCL1{0Fi4IF zo@a@9(al3O5>m*+`rwXilb5BEDh2XWn0GlHrE?UEVQpFrIET}F^2?Iwkfk`t`+uV!nda@seK|W zPGY*dHSGl=qkg2Qk95!}o!f$%*3BXADe7Bc%u87QWlDna{7~@1S9n~`MmFsX?_?o7 zK}I^ubD3hA<{Eo~m=daU`P1UZK3-D|qG&)-XARF<^`f(5rYnc245n~dh*Wlkm*uQ` z7ukU3tY!>Lrvvhgy5d#Ra5I(TN8$F*4#gSJj2Zj!dPXjHg$;Dh(X+wEc)mG%H&=&7 zRNw$2c_d`dkx|(n#aWg`^*1yz^{xbR*5@)W1a^^GO5Dy&x`4@qX5=ya_|OUm2=V5w z`N`f3EF6{hP3XVvpW=NIt@zwe*m-d)P)b4mLugTib%D_MsKMoGIybrmdcGkD&8mc*@g3k%n)n2jAxXJw%6zQ5a1yW zh>A?=yAONnv!Cm(hP7!Rzd3(RuHL69#s8M$eV<=~-yv&=%kil&7KvDZ1YL$j4Y%vT zr`pUR+{SQ`;@21OnlniJW2Vuoo`|Uj8Z%@v+NN$d`1)W&=!f0sfN044(fP?WoZf1U zL}wHC`hrFLrbxa}J_Hd)P%k}i_^95;s=0mz6ZzE_nVePWdZe|j;n$^V#4_9O^oJOavooP-7|1f!THK z-wgzg7f9ww5~C1i8Zky@-!-oNIZv@kgA|wI-A?iTI%lrIM_ovcNb3IyfV)A;Z>p*= z^7CVTTJwmsJK`a`b!O>_{v29ZuhBys5`o86#6IyC^a0M z8x*f3L}Y4*L1P9PP?r_N>J-nHdr$|Xj(|e{>rV7oGu7Erd+WWQ!WflQ-wXkZ@+3s-DD!G^X)mTfK8<5Iyom&gK<1;$){! z(YPh>cik9)69&QXG@UH(*!Abh(1WVDq($+o@|G>L#_VUNRi1N0Co#UqBh;r*y0yfB zsHNLRXM^S?X9gi#+^|YTU!K*Z54u3vOHYc$m2goP84uLPB6(NE70q2_B7`FxRu5&D zcdVS*A;6aFP@m<=7gC{F^byFmLo8`WWL85maws?*r7aQ(9mNLo79{D<8VO6ulDbXz z-0!D4l0wVM_-yjbwp++zu@!TJr^FS*x%2HCZS1Q$6#H4`8uACl9z>QIh$f$c6+j!e z6sMakz{4PUuFD^-Z^d2~#cFgi#$r7 zP(15AJ*p<6yxHgCwRQ3KE3!jDxg|?2?xlF)4j*faVf#BIS>nRjNUt0Byo$p8Pwxs^ zcqPKF)R9MsU$-vcsPQDLvKqhYVb{<{Gzy-;D_5 z`Ux}I6io9FbY?$i={QP(Tb@PWS;c3RB{8I2lBvZj-2>r;gl!|hZ#CNKhcoZ=g{xii z^s74ddKiqcR=*qZ#@vfH|wH;BzNQ&Ly1pO?k|9SG8TCP|i zAs_I_Hbt-lPLfBUie>^nGNI3Apg+`DPrX=6eyDeYx zJX!g_DbaOBXHJLEyrfp5=Q;>Emz6|H+@yw!Wz)Yr8PFU<^;vRx6B^NnK_qp7J{4Bh zR6tZ){P-ihm*zWYS_M`Jg+as7I)*0+jpy@s>PUiKPN|2_I4v|mDQC48Bx^lxQyYT% zmob07kSBS-^M=6!Ho+kCzoQ-LnE3>0BSs=cnr{o>l1BnfR|)ftn| z$Q$-Quk6I@<(|$9ZYu7dyJ383tz@3J_4rLn6XQ!8;W*sUtd*rw+~&xi9;D$Ode#?R zK3}hlN@$xMdgsC&fDdf@ypM%lmOP2<@5#*7o zz1dKz&hZ5+E2}WC>(@KN7DrSkza$0CI}E}D47Ihjxq+GCM2~V6@jGK1=C`F*DA^Fi z1pCn5QHCoNd+ECd)UI8WLN*Hny}VAxe1;C}MriT1adw_gxlz5VXLx9rd&}0!l>7h^ z;cEDB;?;-I#TD_Eq4|yNd%L^4>m@^?`ETBQf~WFq0s{j(TuSfYHgiPjRNN}1Z&D%+ zr6*dJA9OmcdSPn9eSHt@hJ}xAZpt4|dX+&tU2K72B;<6cu0V>FHHXPELB=xWT?3s?4nyn5j|F8`s#_ zIJFkkIlE>HCFAK&pFU+Q1?+^D6L+Nr*Ke_Q@h8AWRTYU_<8g8ll9JZkOflS-F6GN| z-J5{BehX0CbA)8CSiFUuo!$K21))#lOIh$hYq@w!82p)#rohboL#)Gx)kn!qbF}!$ z5QA4m;=!zM*;tcso28<3{*+j5SSyk+kQWSp+N8l<0B(n{{e6oTRuO@L&mh9Eg9tw> q;1?2mjvPTWN#}4*@c*Gpcj)E5g&)yRC5Xcuk&Ehjm^W%Rk^cjZhPf#K literal 0 HcmV?d00001 diff --git a/extern/libsquish-1.15/libSquish.pri b/extern/libsquish-1.15/libSquish.pri new file mode 100644 index 0000000..0313db0 --- /dev/null +++ b/extern/libsquish-1.15/libSquish.pri @@ -0,0 +1,26 @@ +HEADERS += \ + squish.h + +SOURCES += \ + alpha.cpp \ + alpha.h \ + clusterfit.cpp \ + clusterfit.h \ + colourblock.cpp \ + colourblock.h \ + colourfit.cpp \ + colourfit.h \ + colourset.cpp \ + colourset.h \ + maths.cpp \ + maths.h \ + rangefit.cpp \ + rangefit.h \ + simd.h \ + simd_float.h \ + simd_sse.h \ + simd_ve.h \ + singlecolourfit.cpp \ + singlecolourfit.h \ + singlecolourlookup.inl \ + squish.cpp diff --git a/extern/libsquish-1.15/libSquish.pro b/extern/libsquish-1.15/libSquish.pro new file mode 100644 index 0000000..054faa2 --- /dev/null +++ b/extern/libsquish-1.15/libSquish.pro @@ -0,0 +1,32 @@ +TARGET = squish +TEMPLATE = lib + +include(libSquish.pri) + +QT -= gui + +CONFIG += staticlib thread +CONFIG += debug_and_release + +CONFIG(debug, debug|release) { + unix:TARGET = $$join(TARGET,,,_debug) +} + +MOC_DIR = mocs +OBJECTS_DIR = objs +RCC_DIR = rccs +UI_DIR = uics + +CONFIG(debug, debug|release) { + unix:MOC_DIR = $$join(MOC_DIR,,,_debug) + unix:OBJECTS_DIR = $$join(OBJECTS_DIR,,,_debug) + unix:RCC_DIR = $$join(RCC_DIR,,,_debug) + unix:UI_DIR = $$join(UI_DIR,,,_debug) + win32:MOC_DIR = $$join(MOC_DIR,,,d) + win32:OBJECTS_DIR = $$join(OBJECTS_DIR,,,d) + win32:RCC_DIR = $$join(RCC_DIR,,,d) + win32:UI_DIR = $$join(UI_DIR,,,d) +} + +unix:QMAKE_CXXFLAGS += -DSQUISH_USE_OPENMP -fopenmp + diff --git a/extern/libsquish-1.15/libSquish.svg b/extern/libsquish-1.15/libSquish.svg new file mode 100644 index 0000000..efdcee7 --- /dev/null +++ b/extern/libsquish-1.15/libSquish.svg @@ -0,0 +1,238 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lib + + + + + Squish + + diff --git a/extern/libsquish-1.15/libsquish.pc.in b/extern/libsquish-1.15/libsquish.pc.in new file mode 100644 index 0000000..d3b95bd --- /dev/null +++ b/extern/libsquish-1.15/libsquish.pc.in @@ -0,0 +1,13 @@ +prefix=@PREFIX@ +exec_prefix=${prefix} +libdir=${prefix}/@LIB_PATH@ +sharedlibdir=${libdir} +includedir=${prefix}/include + +Name: libsquish +Description: squish DXT library +Version: 1.14 + +Requires: +Libs: -L${libdir} -L${sharedlibdir} -llibsquish +Cflags: -I${includedir} diff --git a/extern/libsquish-1.15/maths.cpp b/extern/libsquish-1.15/maths.cpp new file mode 100644 index 0000000..4fa0bcf --- /dev/null +++ b/extern/libsquish-1.15/maths.cpp @@ -0,0 +1,259 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +/*! @file + + The symmetric eigensystem solver algorithm is from + http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf +*/ + +#include "maths.h" +#include "simd.h" +#include + +namespace squish { + +Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights ) +{ + // compute the centroid + float total = 0.0f; + Vec3 centroid( 0.0f ); + for( int i = 0; i < n; ++i ) + { + total += weights[i]; + centroid += weights[i]*points[i]; + } + if( total > FLT_EPSILON ) + centroid /= total; + + // accumulate the covariance matrix + Sym3x3 covariance( 0.0f ); + for( int i = 0; i < n; ++i ) + { + Vec3 a = points[i] - centroid; + Vec3 b = weights[i]*a; + + covariance[0] += a.X()*b.X(); + covariance[1] += a.X()*b.Y(); + covariance[2] += a.X()*b.Z(); + covariance[3] += a.Y()*b.Y(); + covariance[4] += a.Y()*b.Z(); + covariance[5] += a.Z()*b.Z(); + } + + // return it + return covariance; +} + +#if 0 + +static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue ) +{ + // compute M + Sym3x3 m; + m[0] = matrix[0] - evalue; + m[1] = matrix[1]; + m[2] = matrix[2]; + m[3] = matrix[3] - evalue; + m[4] = matrix[4]; + m[5] = matrix[5] - evalue; + + // compute U + Sym3x3 u; + u[0] = m[3]*m[5] - m[4]*m[4]; + u[1] = m[2]*m[4] - m[1]*m[5]; + u[2] = m[1]*m[4] - m[2]*m[3]; + u[3] = m[0]*m[5] - m[2]*m[2]; + u[4] = m[1]*m[2] - m[4]*m[0]; + u[5] = m[0]*m[3] - m[1]*m[1]; + + // find the largest component + float mc = std::fabs( u[0] ); + int mi = 0; + for( int i = 1; i < 6; ++i ) + { + float c = std::fabs( u[i] ); + if( c > mc ) + { + mc = c; + mi = i; + } + } + + // pick the column with this component + switch( mi ) + { + case 0: + return Vec3( u[0], u[1], u[2] ); + + case 1: + case 3: + return Vec3( u[1], u[3], u[4] ); + + default: + return Vec3( u[2], u[4], u[5] ); + } +} + +static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue ) +{ + // compute M + Sym3x3 m; + m[0] = matrix[0] - evalue; + m[1] = matrix[1]; + m[2] = matrix[2]; + m[3] = matrix[3] - evalue; + m[4] = matrix[4]; + m[5] = matrix[5] - evalue; + + // find the largest component + float mc = std::fabs( m[0] ); + int mi = 0; + for( int i = 1; i < 6; ++i ) + { + float c = std::fabs( m[i] ); + if( c > mc ) + { + mc = c; + mi = i; + } + } + + // pick the first eigenvector based on this index + switch( mi ) + { + case 0: + case 1: + return Vec3( -m[1], m[0], 0.0f ); + + case 2: + return Vec3( m[2], 0.0f, -m[0] ); + + case 3: + case 4: + return Vec3( 0.0f, -m[4], m[3] ); + + default: + return Vec3( 0.0f, -m[5], m[4] ); + } +} + +Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) +{ + // compute the cubic coefficients + float c0 = matrix[0]*matrix[3]*matrix[5] + + 2.0f*matrix[1]*matrix[2]*matrix[4] + - matrix[0]*matrix[4]*matrix[4] + - matrix[3]*matrix[2]*matrix[2] + - matrix[5]*matrix[1]*matrix[1]; + float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5] + - matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4]; + float c2 = matrix[0] + matrix[3] + matrix[5]; + + // compute the quadratic coefficients + float a = c1 - ( 1.0f/3.0f )*c2*c2; + float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0; + + // compute the root count check + float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a; + + // test the multiplicity + if( FLT_EPSILON < Q ) + { + // only one root, which implies we have a multiple of the identity + return Vec3( 1.0f ); + } + else if( Q < -FLT_EPSILON ) + { + // three distinct roots + float theta = std::atan2( std::sqrt( -Q ), -0.5f*b ); + float rho = std::sqrt( 0.25f*b*b - Q ); + + float rt = std::pow( rho, 1.0f/3.0f ); + float ct = std::cos( theta/3.0f ); + float st = std::sin( theta/3.0f ); + + float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct; + float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st ); + float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st ); + + // pick the larger + if( std::fabs( l2 ) > std::fabs( l1 ) ) + l1 = l2; + if( std::fabs( l3 ) > std::fabs( l1 ) ) + l1 = l3; + + // get the eigenvector + return GetMultiplicity1Evector( matrix, l1 ); + } + else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON ) + { + // two roots + float rt; + if( b < 0.0f ) + rt = -std::pow( -0.5f*b, 1.0f/3.0f ); + else + rt = std::pow( 0.5f*b, 1.0f/3.0f ); + + float l1 = ( 1.0f/3.0f )*c2 + rt; // repeated + float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt; + + // get the eigenvector + if( std::fabs( l1 ) > std::fabs( l2 ) ) + return GetMultiplicity2Evector( matrix, l1 ); + else + return GetMultiplicity1Evector( matrix, l2 ); + } +} + +#else + +#define POWER_ITERATION_COUNT 8 + +Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) +{ + Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f ); + Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f ); + Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f ); + Vec4 v = VEC4_CONST( 1.0f ); + for( int i = 0; i < POWER_ITERATION_COUNT; ++i ) + { + // matrix multiply + Vec4 w = row0*v.SplatX(); + w = MultiplyAdd(row1, v.SplatY(), w); + w = MultiplyAdd(row2, v.SplatZ(), w); + + // get max component from xyz in all channels + Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ())); + + // divide through and advance + v = w*Reciprocal(a); + } + return v.GetVec3(); +} + +#endif + +} // namespace squish diff --git a/extern/libsquish-1.15/maths.h b/extern/libsquish-1.15/maths.h new file mode 100644 index 0000000..59c3219 --- /dev/null +++ b/extern/libsquish-1.15/maths.h @@ -0,0 +1,233 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_MATHS_H +#define SQUISH_MATHS_H + +#include +#include +#include "config.h" + +namespace squish { + +class Vec3 +{ +public: + typedef Vec3 const& Arg; + + Vec3() + { + } + + explicit Vec3( float s ) + { + m_x = s; + m_y = s; + m_z = s; + } + + Vec3( float x, float y, float z ) + { + m_x = x; + m_y = y; + m_z = z; + } + + float X() const { return m_x; } + float Y() const { return m_y; } + float Z() const { return m_z; } + + Vec3 operator-() const + { + return Vec3( -m_x, -m_y, -m_z ); + } + + Vec3& operator+=( Arg v ) + { + m_x += v.m_x; + m_y += v.m_y; + m_z += v.m_z; + return *this; + } + + Vec3& operator-=( Arg v ) + { + m_x -= v.m_x; + m_y -= v.m_y; + m_z -= v.m_z; + return *this; + } + + Vec3& operator*=( Arg v ) + { + m_x *= v.m_x; + m_y *= v.m_y; + m_z *= v.m_z; + return *this; + } + + Vec3& operator*=( float s ) + { + m_x *= s; + m_y *= s; + m_z *= s; + return *this; + } + + Vec3& operator/=( Arg v ) + { + m_x /= v.m_x; + m_y /= v.m_y; + m_z /= v.m_z; + return *this; + } + + Vec3& operator/=( float s ) + { + float t = 1.0f/s; + m_x *= t; + m_y *= t; + m_z *= t; + return *this; + } + + friend Vec3 operator+( Arg left, Arg right ) + { + Vec3 copy( left ); + return copy += right; + } + + friend Vec3 operator-( Arg left, Arg right ) + { + Vec3 copy( left ); + return copy -= right; + } + + friend Vec3 operator*( Arg left, Arg right ) + { + Vec3 copy( left ); + return copy *= right; + } + + friend Vec3 operator*( Arg left, float right ) + { + Vec3 copy( left ); + return copy *= right; + } + + friend Vec3 operator*( float left, Arg right ) + { + Vec3 copy( right ); + return copy *= left; + } + + friend Vec3 operator/( Arg left, Arg right ) + { + Vec3 copy( left ); + return copy /= right; + } + + friend Vec3 operator/( Arg left, float right ) + { + Vec3 copy( left ); + return copy /= right; + } + + friend float Dot( Arg left, Arg right ) + { + return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z; + } + + friend Vec3 Min( Arg left, Arg right ) + { + return Vec3( + std::min( left.m_x, right.m_x ), + std::min( left.m_y, right.m_y ), + std::min( left.m_z, right.m_z ) + ); + } + + friend Vec3 Max( Arg left, Arg right ) + { + return Vec3( + std::max( left.m_x, right.m_x ), + std::max( left.m_y, right.m_y ), + std::max( left.m_z, right.m_z ) + ); + } + + friend Vec3 Truncate( Arg v ) + { + return Vec3( + v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), + v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), + v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ) + ); + } + +private: + float m_x; + float m_y; + float m_z; +}; + +inline float LengthSquared( Vec3::Arg v ) +{ + return Dot( v, v ); +} + +class Sym3x3 +{ +public: + Sym3x3() + { + } + + Sym3x3( float s ) + { + for( int i = 0; i < 6; ++i ) + m_x[i] = s; + } + + float operator[]( int index ) const + { + return m_x[index]; + } + + float& operator[]( int index ) + { + return m_x[index]; + } + +private: + float m_x[6]; +}; + +Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights ); +Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ); + +} // namespace squish + +#endif // ndef SQUISH_MATHS_H diff --git a/extern/libsquish-1.15/rangefit.cpp b/extern/libsquish-1.15/rangefit.cpp new file mode 100644 index 0000000..adc07ed --- /dev/null +++ b/extern/libsquish-1.15/rangefit.cpp @@ -0,0 +1,201 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include "rangefit.h" +#include "colourset.h" +#include "colourblock.h" +#include + +namespace squish { + +RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric ) + : ColourFit( colours, flags ) +{ + // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f) + if( metric ) + m_metric = Vec3( metric[0], metric[1], metric[2] ); + else + m_metric = Vec3( 1.0f ); + + // initialise the best error + m_besterror = FLT_MAX; + + // cache some values + int const count = m_colours->GetCount(); + Vec3 const* values = m_colours->GetPoints(); + float const* weights = m_colours->GetWeights(); + + // get the covariance matrix + Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights ); + + // compute the principle component + Vec3 principle = ComputePrincipleComponent( covariance ); + + // get the min and max range as the codebook endpoints + Vec3 start( 0.0f ); + Vec3 end( 0.0f ); + if( count > 0 ) + { + float min, max; + + // compute the range + start = end = values[0]; + min = max = Dot( values[0], principle ); + for( int i = 1; i < count; ++i ) + { + float val = Dot( values[i], principle ); + if( val < min ) + { + start = values[i]; + min = val; + } + else if( val > max ) + { + end = values[i]; + max = val; + } + } + } + + // clamp the output to [0, 1] + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); + start = Min( one, Max( zero, start ) ); + end = Min( one, Max( zero, end ) ); + + // clamp to the grid and save + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + Vec3 const half( 0.5f ); + m_start = Truncate( grid*start + half )*gridrcp; + m_end = Truncate( grid*end + half )*gridrcp; +} + +void RangeFit::Compress3( void* block ) +{ + // cache some values + int const count = m_colours->GetCount(); + Vec3 const* values = m_colours->GetPoints(); + + // create a codebook + Vec3 codes[3]; + codes[0] = m_start; + codes[1] = m_end; + codes[2] = 0.5f*m_start + 0.5f*m_end; + + // match each point to the closest code + u8 closest[16]; + float error = 0.0f; + for( int i = 0; i < count; ++i ) + { + // find the closest code + float dist = FLT_MAX; + int idx = 0; + for( int j = 0; j < 3; ++j ) + { + float d = LengthSquared( m_metric*( values[i] - codes[j] ) ); + if( d < dist ) + { + dist = d; + idx = j; + } + } + + // save the index + closest[i] = ( u8 )idx; + + // accumulate the error + error += dist; + } + + // save this scheme if it wins + if( error < m_besterror ) + { + // remap the indices + u8 indices[16]; + m_colours->RemapIndices( closest, indices ); + + // save the block + WriteColourBlock3( m_start, m_end, indices, block ); + + // save the error + m_besterror = error; + } +} + +void RangeFit::Compress4( void* block ) +{ + // cache some values + int const count = m_colours->GetCount(); + Vec3 const* values = m_colours->GetPoints(); + + // create a codebook + Vec3 codes[4]; + codes[0] = m_start; + codes[1] = m_end; + codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end; + codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end; + + // match each point to the closest code + u8 closest[16]; + float error = 0.0f; + for( int i = 0; i < count; ++i ) + { + // find the closest code + float dist = FLT_MAX; + int idx = 0; + for( int j = 0; j < 4; ++j ) + { + float d = LengthSquared( m_metric*( values[i] - codes[j] ) ); + if( d < dist ) + { + dist = d; + idx = j; + } + } + + // save the index + closest[i] = ( u8 )idx; + + // accumulate the error + error += dist; + } + + // save this scheme if it wins + if( error < m_besterror ) + { + // remap the indices + u8 indices[16]; + m_colours->RemapIndices( closest, indices ); + + // save the block + WriteColourBlock4( m_start, m_end, indices, block ); + + // save the error + m_besterror = error; + } +} + +} // namespace squish diff --git a/extern/libsquish-1.15/rangefit.h b/extern/libsquish-1.15/rangefit.h new file mode 100644 index 0000000..bdb21a9 --- /dev/null +++ b/extern/libsquish-1.15/rangefit.h @@ -0,0 +1,54 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_RANGEFIT_H +#define SQUISH_RANGEFIT_H + +#include "squish.h" +#include "colourfit.h" +#include "maths.h" + +namespace squish { + +class ColourSet; + +class RangeFit : public ColourFit +{ +public: + RangeFit( ColourSet const* colours, int flags, float* metric ); + +private: + virtual void Compress3( void* block ); + virtual void Compress4( void* block ); + + Vec3 m_metric; + Vec3 m_start; + Vec3 m_end; + float m_besterror; +}; + +} // squish + +#endif // ndef SQUISH_RANGEFIT_H diff --git a/extern/libsquish-1.15/simd.h b/extern/libsquish-1.15/simd.h new file mode 100644 index 0000000..1e02fa1 --- /dev/null +++ b/extern/libsquish-1.15/simd.h @@ -0,0 +1,40 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_SIMD_H +#define SQUISH_SIMD_H + +#include "maths.h" + +#if SQUISH_USE_ALTIVEC +#include "simd_ve.h" +#elif SQUISH_USE_SSE +#include "simd_sse.h" +#else +#include "simd_float.h" +#endif + + +#endif // ndef SQUISH_SIMD_H diff --git a/extern/libsquish-1.15/simd_float.h b/extern/libsquish-1.15/simd_float.h new file mode 100644 index 0000000..030ea70 --- /dev/null +++ b/extern/libsquish-1.15/simd_float.h @@ -0,0 +1,183 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_SIMD_FLOAT_H +#define SQUISH_SIMD_FLOAT_H + +#include + +namespace squish { + +#define VEC4_CONST( X ) Vec4( X ) + +class Vec4 +{ +public: + typedef Vec4 const& Arg; + + Vec4() {} + + explicit Vec4( float s ) + : m_x( s ), + m_y( s ), + m_z( s ), + m_w( s ) + { + } + + Vec4( float x, float y, float z, float w ) + : m_x( x ), + m_y( y ), + m_z( z ), + m_w( w ) + { + } + + Vec3 GetVec3() const + { + return Vec3( m_x, m_y, m_z ); + } + + Vec4 SplatX() const { return Vec4( m_x ); } + Vec4 SplatY() const { return Vec4( m_y ); } + Vec4 SplatZ() const { return Vec4( m_z ); } + Vec4 SplatW() const { return Vec4( m_w ); } + + Vec4& operator+=( Arg v ) + { + m_x += v.m_x; + m_y += v.m_y; + m_z += v.m_z; + m_w += v.m_w; + return *this; + } + + Vec4& operator-=( Arg v ) + { + m_x -= v.m_x; + m_y -= v.m_y; + m_z -= v.m_z; + m_w -= v.m_w; + return *this; + } + + Vec4& operator*=( Arg v ) + { + m_x *= v.m_x; + m_y *= v.m_y; + m_z *= v.m_z; + m_w *= v.m_w; + return *this; + } + + friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right ) + { + Vec4 copy( left ); + return copy += right; + } + + friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right ) + { + Vec4 copy( left ); + return copy -= right; + } + + friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right ) + { + Vec4 copy( left ); + return copy *= right; + } + + //! Returns a*b + c + friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) + { + return a*b + c; + } + + //! Returns -( a*b - c ) + friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) + { + return c - a*b; + } + + friend Vec4 Reciprocal( Vec4::Arg v ) + { + return Vec4( + 1.0f/v.m_x, + 1.0f/v.m_y, + 1.0f/v.m_z, + 1.0f/v.m_w + ); + } + + friend Vec4 Min( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( + std::min( left.m_x, right.m_x ), + std::min( left.m_y, right.m_y ), + std::min( left.m_z, right.m_z ), + std::min( left.m_w, right.m_w ) + ); + } + + friend Vec4 Max( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( + std::max( left.m_x, right.m_x ), + std::max( left.m_y, right.m_y ), + std::max( left.m_z, right.m_z ), + std::max( left.m_w, right.m_w ) + ); + } + + friend Vec4 Truncate( Vec4::Arg v ) + { + return Vec4( + v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), + v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), + v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ), + v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w ) + ); + } + + friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) + { + return left.m_x < right.m_x + || left.m_y < right.m_y + || left.m_z < right.m_z + || left.m_w < right.m_w; + } + +private: + float m_x; + float m_y; + float m_z; + float m_w; +}; + +} // namespace squish + +#endif // ndef SQUISH_SIMD_FLOAT_H + diff --git a/extern/libsquish-1.15/simd_sse.h b/extern/libsquish-1.15/simd_sse.h new file mode 100644 index 0000000..2e8be4c --- /dev/null +++ b/extern/libsquish-1.15/simd_sse.h @@ -0,0 +1,180 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_SIMD_SSE_H +#define SQUISH_SIMD_SSE_H + +#include +#if ( SQUISH_USE_SSE > 1 ) +#include +#endif + +#define SQUISH_SSE_SPLAT( a ) \ + ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) ) + +#define SQUISH_SSE_SHUF( x, y, z, w ) \ + ( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) ) + +namespace squish { + +#define VEC4_CONST( X ) Vec4( X ) + +class Vec4 +{ +public: + typedef Vec4 const& Arg; + + Vec4() {} + + explicit Vec4( __m128 v ) : m_v( v ) {} + + Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {} + + Vec4& operator=( Vec4 const& arg ) + { + m_v = arg.m_v; + return *this; + } + + explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {} + + Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {} + + Vec3 GetVec3() const + { +#ifdef __GNUC__ + __attribute__ ((__aligned__ (16))) float c[4]; +#else + __declspec(align(16)) float c[4]; +#endif + _mm_store_ps( c, m_v ); + return Vec3( c[0], c[1], c[2] ); + } + + Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); } + Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); } + Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); } + Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); } + + Vec4& operator+=( Arg v ) + { + m_v = _mm_add_ps( m_v, v.m_v ); + return *this; + } + + Vec4& operator-=( Arg v ) + { + m_v = _mm_sub_ps( m_v, v.m_v ); + return *this; + } + + Vec4& operator*=( Arg v ) + { + m_v = _mm_mul_ps( m_v, v.m_v ); + return *this; + } + + friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_add_ps( left.m_v, right.m_v ) ); + } + + friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_sub_ps( left.m_v, right.m_v ) ); + } + + friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_mul_ps( left.m_v, right.m_v ) ); + } + + //! Returns a*b + c + friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) + { + return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) ); + } + + //! Returns -( a*b - c ) + friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) + { + return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) ); + } + + friend Vec4 Reciprocal( Vec4::Arg v ) + { + // get the reciprocal estimate + __m128 estimate = _mm_rcp_ps( v.m_v ); + + // one round of Newton-Rhaphson refinement + __m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) ); + return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) ); + } + + friend Vec4 Min( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_min_ps( left.m_v, right.m_v ) ); + } + + friend Vec4 Max( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_max_ps( left.m_v, right.m_v ) ); + } + + friend Vec4 Truncate( Vec4::Arg v ) + { +#if ( SQUISH_USE_SSE == 1 ) + // convert to ints + __m128 input = v.m_v; + __m64 lo = _mm_cvttps_pi32( input ); + __m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) ); + + // convert to floats + __m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) ); + __m128 truncated = _mm_cvtpi32_ps( part, lo ); + + // clear out the MMX multimedia state to allow FP calls later + _mm_empty(); + return Vec4( truncated ); +#else + // use SSE2 instructions + return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) ); +#endif + } + + friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) + { + __m128 bits = _mm_cmplt_ps( left.m_v, right.m_v ); + int value = _mm_movemask_ps( bits ); + return value != 0; + } + +private: + __m128 m_v; +}; + +} // namespace squish + +#endif // ndef SQUISH_SIMD_SSE_H diff --git a/extern/libsquish-1.15/simd_ve.h b/extern/libsquish-1.15/simd_ve.h new file mode 100644 index 0000000..08a1537 --- /dev/null +++ b/extern/libsquish-1.15/simd_ve.h @@ -0,0 +1,166 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_SIMD_VE_H +#define SQUISH_SIMD_VE_H + +#include +#undef bool + +namespace squish { + +#define VEC4_CONST( X ) Vec4( ( vector float ){ X } ) + +class Vec4 +{ +public: + typedef Vec4 Arg; + + Vec4() {} + + explicit Vec4( vector float v ) : m_v( v ) {} + + Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {} + + Vec4& operator=( Vec4 const& arg ) + { + m_v = arg.m_v; + return *this; + } + + explicit Vec4( float s ) + { + union { vector float v; float c[4]; } u; + u.c[0] = s; + u.c[1] = s; + u.c[2] = s; + u.c[3] = s; + m_v = u.v; + } + + Vec4( float x, float y, float z, float w ) + { + union { vector float v; float c[4]; } u; + u.c[0] = x; + u.c[1] = y; + u.c[2] = z; + u.c[3] = w; + m_v = u.v; + } + + Vec3 GetVec3() const + { + union { vector float v; float c[4]; } u; + u.v = m_v; + return Vec3( u.c[0], u.c[1], u.c[2] ); + } + + Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); } + Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); } + Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); } + Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); } + + Vec4& operator+=( Arg v ) + { + m_v = vec_add( m_v, v.m_v ); + return *this; + } + + Vec4& operator-=( Arg v ) + { + m_v = vec_sub( m_v, v.m_v ); + return *this; + } + + Vec4& operator*=( Arg v ) + { + m_v = vec_madd( m_v, v.m_v, ( vector float ){ -0.0f } ); + return *this; + } + + friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( vec_add( left.m_v, right.m_v ) ); + } + + friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( vec_sub( left.m_v, right.m_v ) ); + } + + friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( vec_madd( left.m_v, right.m_v, ( vector float ){ -0.0f } ) ); + } + + //! Returns a*b + c + friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) + { + return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) ); + } + + //! Returns -( a*b - c ) + friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) + { + return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) ); + } + + friend Vec4 Reciprocal( Vec4::Arg v ) + { + // get the reciprocal estimate + vector float estimate = vec_re( v.m_v ); + + // one round of Newton-Rhaphson refinement + vector float diff = vec_nmsub( estimate, v.m_v, ( vector float ){ 1.0f } ); + return Vec4( vec_madd( diff, estimate, estimate ) ); + } + + friend Vec4 Min( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( vec_min( left.m_v, right.m_v ) ); + } + + friend Vec4 Max( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( vec_max( left.m_v, right.m_v ) ); + } + + friend Vec4 Truncate( Vec4::Arg v ) + { + return Vec4( vec_trunc( v.m_v ) ); + } + + friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) + { + return vec_any_lt( left.m_v, right.m_v ) != 0; + } + +private: + vector float m_v; +}; + +} // namespace squish + +#endif // ndef SQUISH_SIMD_VE_H diff --git a/extern/libsquish-1.15/singlecolourfit.cpp b/extern/libsquish-1.15/singlecolourfit.cpp new file mode 100644 index 0000000..cef0ebc --- /dev/null +++ b/extern/libsquish-1.15/singlecolourfit.cpp @@ -0,0 +1,172 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include "singlecolourfit.h" +#include "colourset.h" +#include "colourblock.h" + +namespace squish { + +struct SourceBlock +{ + u8 start; + u8 end; + u8 error; +}; + +struct SingleColourLookup +{ + SourceBlock sources[2]; +}; + +#include "singlecolourlookup.inl" + +static int FloatToInt( float a, int limit ) +{ + // use ANSI round-to-zero behaviour to get round-to-nearest + int i = ( int )( a + 0.5f ); + + // clamp to the limit + if( i < 0 ) + i = 0; + else if( i > limit ) + i = limit; + + // done + return i; +} + +SingleColourFit::SingleColourFit( ColourSet const* colours, int flags ) + : ColourFit( colours, flags ) +{ + // grab the single colour + Vec3 const* values = m_colours->GetPoints(); + m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 ); + m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 ); + m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 ); + + // initialise the best error + m_besterror = INT_MAX; +} + +void SingleColourFit::Compress3( void* block ) +{ + // build the table of lookups + SingleColourLookup const* const lookups[] = + { + lookup_5_3, + lookup_6_3, + lookup_5_3 + }; + + // find the best end-points and index + ComputeEndPoints( lookups ); + + // build the block if we win + if( m_error < m_besterror ) + { + // remap the indices + u8 indices[16]; + m_colours->RemapIndices( &m_index, indices ); + + // save the block + WriteColourBlock3( m_start, m_end, indices, block ); + + // save the error + m_besterror = m_error; + } +} + +void SingleColourFit::Compress4( void* block ) +{ + // build the table of lookups + SingleColourLookup const* const lookups[] = + { + lookup_5_4, + lookup_6_4, + lookup_5_4 + }; + + // find the best end-points and index + ComputeEndPoints( lookups ); + + // build the block if we win + if( m_error < m_besterror ) + { + // remap the indices + u8 indices[16]; + m_colours->RemapIndices( &m_index, indices ); + + // save the block + WriteColourBlock4( m_start, m_end, indices, block ); + + // save the error + m_besterror = m_error; + } +} + +void SingleColourFit::ComputeEndPoints( SingleColourLookup const* const* lookups ) +{ + // check each index combination (endpoint or intermediate) + m_error = INT_MAX; + for( int index = 0; index < 2; ++index ) + { + // check the error for this codebook index + SourceBlock const* sources[3]; + int error = 0; + for( int channel = 0; channel < 3; ++channel ) + { + // grab the lookup table and index for this channel + SingleColourLookup const* lookup = lookups[channel]; + int target = m_colour[channel]; + + // store a pointer to the source for this channel + sources[channel] = lookup[target].sources + index; + + // accumulate the error + int diff = sources[channel]->error; + error += diff*diff; + } + + // keep it if the error is lower + if( error < m_error ) + { + m_start = Vec3( + ( float )sources[0]->start/31.0f, + ( float )sources[1]->start/63.0f, + ( float )sources[2]->start/31.0f + ); + m_end = Vec3( + ( float )sources[0]->end/31.0f, + ( float )sources[1]->end/63.0f, + ( float )sources[2]->end/31.0f + ); + m_index = ( u8 )( 2*index ); + m_error = error; + } + } +} + +} // namespace squish diff --git a/extern/libsquish-1.15/singlecolourfit.h b/extern/libsquish-1.15/singlecolourfit.h new file mode 100644 index 0000000..974ce77 --- /dev/null +++ b/extern/libsquish-1.15/singlecolourfit.h @@ -0,0 +1,58 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_SINGLECOLOURFIT_H +#define SQUISH_SINGLECOLOURFIT_H + +#include "squish.h" +#include "colourfit.h" + +namespace squish { + +class ColourSet; +struct SingleColourLookup; + +class SingleColourFit : public ColourFit +{ +public: + SingleColourFit( ColourSet const* colours, int flags ); + +private: + virtual void Compress3( void* block ); + virtual void Compress4( void* block ); + + void ComputeEndPoints( SingleColourLookup const* const* lookups ); + + u8 m_colour[3]; + Vec3 m_start; + Vec3 m_end; + u8 m_index; + int m_error; + int m_besterror; +}; + +} // namespace squish + +#endif // ndef SQUISH_SINGLECOLOURFIT_H diff --git a/extern/libsquish-1.15/singlecolourlookup.inl b/extern/libsquish-1.15/singlecolourlookup.inl new file mode 100644 index 0000000..5b44a1e --- /dev/null +++ b/extern/libsquish-1.15/singlecolourlookup.inl @@ -0,0 +1,1064 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +static SingleColourLookup const lookup_5_3[] = +{ + { { { 0, 0, 0 }, { 0, 0, 0 } } }, + { { { 0, 0, 1 }, { 0, 0, 1 } } }, + { { { 0, 0, 2 }, { 0, 0, 2 } } }, + { { { 0, 0, 3 }, { 0, 1, 1 } } }, + { { { 0, 0, 4 }, { 0, 1, 0 } } }, + { { { 1, 0, 3 }, { 0, 1, 1 } } }, + { { { 1, 0, 2 }, { 0, 1, 2 } } }, + { { { 1, 0, 1 }, { 0, 2, 1 } } }, + { { { 1, 0, 0 }, { 0, 2, 0 } } }, + { { { 1, 0, 1 }, { 0, 2, 1 } } }, + { { { 1, 0, 2 }, { 0, 2, 2 } } }, + { { { 1, 0, 3 }, { 0, 3, 1 } } }, + { { { 1, 0, 4 }, { 0, 3, 0 } } }, + { { { 2, 0, 3 }, { 0, 3, 1 } } }, + { { { 2, 0, 2 }, { 0, 3, 2 } } }, + { { { 2, 0, 1 }, { 0, 4, 1 } } }, + { { { 2, 0, 0 }, { 0, 4, 0 } } }, + { { { 2, 0, 1 }, { 0, 4, 1 } } }, + { { { 2, 0, 2 }, { 0, 4, 2 } } }, + { { { 2, 0, 3 }, { 0, 5, 1 } } }, + { { { 2, 0, 4 }, { 0, 5, 0 } } }, + { { { 3, 0, 3 }, { 0, 5, 1 } } }, + { { { 3, 0, 2 }, { 0, 5, 2 } } }, + { { { 3, 0, 1 }, { 0, 6, 1 } } }, + { { { 3, 0, 0 }, { 0, 6, 0 } } }, + { { { 3, 0, 1 }, { 0, 6, 1 } } }, + { { { 3, 0, 2 }, { 0, 6, 2 } } }, + { { { 3, 0, 3 }, { 0, 7, 1 } } }, + { { { 3, 0, 4 }, { 0, 7, 0 } } }, + { { { 4, 0, 4 }, { 0, 7, 1 } } }, + { { { 4, 0, 3 }, { 0, 7, 2 } } }, + { { { 4, 0, 2 }, { 1, 7, 1 } } }, + { { { 4, 0, 1 }, { 1, 7, 0 } } }, + { { { 4, 0, 0 }, { 0, 8, 0 } } }, + { { { 4, 0, 1 }, { 0, 8, 1 } } }, + { { { 4, 0, 2 }, { 2, 7, 1 } } }, + { { { 4, 0, 3 }, { 2, 7, 0 } } }, + { { { 4, 0, 4 }, { 0, 9, 0 } } }, + { { { 5, 0, 3 }, { 0, 9, 1 } } }, + { { { 5, 0, 2 }, { 3, 7, 1 } } }, + { { { 5, 0, 1 }, { 3, 7, 0 } } }, + { { { 5, 0, 0 }, { 0, 10, 0 } } }, + { { { 5, 0, 1 }, { 0, 10, 1 } } }, + { { { 5, 0, 2 }, { 0, 10, 2 } } }, + { { { 5, 0, 3 }, { 0, 11, 1 } } }, + { { { 5, 0, 4 }, { 0, 11, 0 } } }, + { { { 6, 0, 3 }, { 0, 11, 1 } } }, + { { { 6, 0, 2 }, { 0, 11, 2 } } }, + { { { 6, 0, 1 }, { 0, 12, 1 } } }, + { { { 6, 0, 0 }, { 0, 12, 0 } } }, + { { { 6, 0, 1 }, { 0, 12, 1 } } }, + { { { 6, 0, 2 }, { 0, 12, 2 } } }, + { { { 6, 0, 3 }, { 0, 13, 1 } } }, + { { { 6, 0, 4 }, { 0, 13, 0 } } }, + { { { 7, 0, 3 }, { 0, 13, 1 } } }, + { { { 7, 0, 2 }, { 0, 13, 2 } } }, + { { { 7, 0, 1 }, { 0, 14, 1 } } }, + { { { 7, 0, 0 }, { 0, 14, 0 } } }, + { { { 7, 0, 1 }, { 0, 14, 1 } } }, + { { { 7, 0, 2 }, { 0, 14, 2 } } }, + { { { 7, 0, 3 }, { 0, 15, 1 } } }, + { { { 7, 0, 4 }, { 0, 15, 0 } } }, + { { { 8, 0, 4 }, { 0, 15, 1 } } }, + { { { 8, 0, 3 }, { 0, 15, 2 } } }, + { { { 8, 0, 2 }, { 1, 15, 1 } } }, + { { { 8, 0, 1 }, { 1, 15, 0 } } }, + { { { 8, 0, 0 }, { 0, 16, 0 } } }, + { { { 8, 0, 1 }, { 0, 16, 1 } } }, + { { { 8, 0, 2 }, { 2, 15, 1 } } }, + { { { 8, 0, 3 }, { 2, 15, 0 } } }, + { { { 8, 0, 4 }, { 0, 17, 0 } } }, + { { { 9, 0, 3 }, { 0, 17, 1 } } }, + { { { 9, 0, 2 }, { 3, 15, 1 } } }, + { { { 9, 0, 1 }, { 3, 15, 0 } } }, + { { { 9, 0, 0 }, { 0, 18, 0 } } }, + { { { 9, 0, 1 }, { 0, 18, 1 } } }, + { { { 9, 0, 2 }, { 0, 18, 2 } } }, + { { { 9, 0, 3 }, { 0, 19, 1 } } }, + { { { 9, 0, 4 }, { 0, 19, 0 } } }, + { { { 10, 0, 3 }, { 0, 19, 1 } } }, + { { { 10, 0, 2 }, { 0, 19, 2 } } }, + { { { 10, 0, 1 }, { 0, 20, 1 } } }, + { { { 10, 0, 0 }, { 0, 20, 0 } } }, + { { { 10, 0, 1 }, { 0, 20, 1 } } }, + { { { 10, 0, 2 }, { 0, 20, 2 } } }, + { { { 10, 0, 3 }, { 0, 21, 1 } } }, + { { { 10, 0, 4 }, { 0, 21, 0 } } }, + { { { 11, 0, 3 }, { 0, 21, 1 } } }, + { { { 11, 0, 2 }, { 0, 21, 2 } } }, + { { { 11, 0, 1 }, { 0, 22, 1 } } }, + { { { 11, 0, 0 }, { 0, 22, 0 } } }, + { { { 11, 0, 1 }, { 0, 22, 1 } } }, + { { { 11, 0, 2 }, { 0, 22, 2 } } }, + { { { 11, 0, 3 }, { 0, 23, 1 } } }, + { { { 11, 0, 4 }, { 0, 23, 0 } } }, + { { { 12, 0, 4 }, { 0, 23, 1 } } }, + { { { 12, 0, 3 }, { 0, 23, 2 } } }, + { { { 12, 0, 2 }, { 1, 23, 1 } } }, + { { { 12, 0, 1 }, { 1, 23, 0 } } }, + { { { 12, 0, 0 }, { 0, 24, 0 } } }, + { { { 12, 0, 1 }, { 0, 24, 1 } } }, + { { { 12, 0, 2 }, { 2, 23, 1 } } }, + { { { 12, 0, 3 }, { 2, 23, 0 } } }, + { { { 12, 0, 4 }, { 0, 25, 0 } } }, + { { { 13, 0, 3 }, { 0, 25, 1 } } }, + { { { 13, 0, 2 }, { 3, 23, 1 } } }, + { { { 13, 0, 1 }, { 3, 23, 0 } } }, + { { { 13, 0, 0 }, { 0, 26, 0 } } }, + { { { 13, 0, 1 }, { 0, 26, 1 } } }, + { { { 13, 0, 2 }, { 0, 26, 2 } } }, + { { { 13, 0, 3 }, { 0, 27, 1 } } }, + { { { 13, 0, 4 }, { 0, 27, 0 } } }, + { { { 14, 0, 3 }, { 0, 27, 1 } } }, + { { { 14, 0, 2 }, { 0, 27, 2 } } }, + { { { 14, 0, 1 }, { 0, 28, 1 } } }, + { { { 14, 0, 0 }, { 0, 28, 0 } } }, + { { { 14, 0, 1 }, { 0, 28, 1 } } }, + { { { 14, 0, 2 }, { 0, 28, 2 } } }, + { { { 14, 0, 3 }, { 0, 29, 1 } } }, + { { { 14, 0, 4 }, { 0, 29, 0 } } }, + { { { 15, 0, 3 }, { 0, 29, 1 } } }, + { { { 15, 0, 2 }, { 0, 29, 2 } } }, + { { { 15, 0, 1 }, { 0, 30, 1 } } }, + { { { 15, 0, 0 }, { 0, 30, 0 } } }, + { { { 15, 0, 1 }, { 0, 30, 1 } } }, + { { { 15, 0, 2 }, { 0, 30, 2 } } }, + { { { 15, 0, 3 }, { 0, 31, 1 } } }, + { { { 15, 0, 4 }, { 0, 31, 0 } } }, + { { { 16, 0, 4 }, { 0, 31, 1 } } }, + { { { 16, 0, 3 }, { 0, 31, 2 } } }, + { { { 16, 0, 2 }, { 1, 31, 1 } } }, + { { { 16, 0, 1 }, { 1, 31, 0 } } }, + { { { 16, 0, 0 }, { 4, 28, 0 } } }, + { { { 16, 0, 1 }, { 4, 28, 1 } } }, + { { { 16, 0, 2 }, { 2, 31, 1 } } }, + { { { 16, 0, 3 }, { 2, 31, 0 } } }, + { { { 16, 0, 4 }, { 4, 29, 0 } } }, + { { { 17, 0, 3 }, { 4, 29, 1 } } }, + { { { 17, 0, 2 }, { 3, 31, 1 } } }, + { { { 17, 0, 1 }, { 3, 31, 0 } } }, + { { { 17, 0, 0 }, { 4, 30, 0 } } }, + { { { 17, 0, 1 }, { 4, 30, 1 } } }, + { { { 17, 0, 2 }, { 4, 30, 2 } } }, + { { { 17, 0, 3 }, { 4, 31, 1 } } }, + { { { 17, 0, 4 }, { 4, 31, 0 } } }, + { { { 18, 0, 3 }, { 4, 31, 1 } } }, + { { { 18, 0, 2 }, { 4, 31, 2 } } }, + { { { 18, 0, 1 }, { 5, 31, 1 } } }, + { { { 18, 0, 0 }, { 5, 31, 0 } } }, + { { { 18, 0, 1 }, { 5, 31, 1 } } }, + { { { 18, 0, 2 }, { 5, 31, 2 } } }, + { { { 18, 0, 3 }, { 6, 31, 1 } } }, + { { { 18, 0, 4 }, { 6, 31, 0 } } }, + { { { 19, 0, 3 }, { 6, 31, 1 } } }, + { { { 19, 0, 2 }, { 6, 31, 2 } } }, + { { { 19, 0, 1 }, { 7, 31, 1 } } }, + { { { 19, 0, 0 }, { 7, 31, 0 } } }, + { { { 19, 0, 1 }, { 7, 31, 1 } } }, + { { { 19, 0, 2 }, { 7, 31, 2 } } }, + { { { 19, 0, 3 }, { 8, 31, 1 } } }, + { { { 19, 0, 4 }, { 8, 31, 0 } } }, + { { { 20, 0, 4 }, { 8, 31, 1 } } }, + { { { 20, 0, 3 }, { 8, 31, 2 } } }, + { { { 20, 0, 2 }, { 9, 31, 1 } } }, + { { { 20, 0, 1 }, { 9, 31, 0 } } }, + { { { 20, 0, 0 }, { 12, 28, 0 } } }, + { { { 20, 0, 1 }, { 12, 28, 1 } } }, + { { { 20, 0, 2 }, { 10, 31, 1 } } }, + { { { 20, 0, 3 }, { 10, 31, 0 } } }, + { { { 20, 0, 4 }, { 12, 29, 0 } } }, + { { { 21, 0, 3 }, { 12, 29, 1 } } }, + { { { 21, 0, 2 }, { 11, 31, 1 } } }, + { { { 21, 0, 1 }, { 11, 31, 0 } } }, + { { { 21, 0, 0 }, { 12, 30, 0 } } }, + { { { 21, 0, 1 }, { 12, 30, 1 } } }, + { { { 21, 0, 2 }, { 12, 30, 2 } } }, + { { { 21, 0, 3 }, { 12, 31, 1 } } }, + { { { 21, 0, 4 }, { 12, 31, 0 } } }, + { { { 22, 0, 3 }, { 12, 31, 1 } } }, + { { { 22, 0, 2 }, { 12, 31, 2 } } }, + { { { 22, 0, 1 }, { 13, 31, 1 } } }, + { { { 22, 0, 0 }, { 13, 31, 0 } } }, + { { { 22, 0, 1 }, { 13, 31, 1 } } }, + { { { 22, 0, 2 }, { 13, 31, 2 } } }, + { { { 22, 0, 3 }, { 14, 31, 1 } } }, + { { { 22, 0, 4 }, { 14, 31, 0 } } }, + { { { 23, 0, 3 }, { 14, 31, 1 } } }, + { { { 23, 0, 2 }, { 14, 31, 2 } } }, + { { { 23, 0, 1 }, { 15, 31, 1 } } }, + { { { 23, 0, 0 }, { 15, 31, 0 } } }, + { { { 23, 0, 1 }, { 15, 31, 1 } } }, + { { { 23, 0, 2 }, { 15, 31, 2 } } }, + { { { 23, 0, 3 }, { 16, 31, 1 } } }, + { { { 23, 0, 4 }, { 16, 31, 0 } } }, + { { { 24, 0, 4 }, { 16, 31, 1 } } }, + { { { 24, 0, 3 }, { 16, 31, 2 } } }, + { { { 24, 0, 2 }, { 17, 31, 1 } } }, + { { { 24, 0, 1 }, { 17, 31, 0 } } }, + { { { 24, 0, 0 }, { 20, 28, 0 } } }, + { { { 24, 0, 1 }, { 20, 28, 1 } } }, + { { { 24, 0, 2 }, { 18, 31, 1 } } }, + { { { 24, 0, 3 }, { 18, 31, 0 } } }, + { { { 24, 0, 4 }, { 20, 29, 0 } } }, + { { { 25, 0, 3 }, { 20, 29, 1 } } }, + { { { 25, 0, 2 }, { 19, 31, 1 } } }, + { { { 25, 0, 1 }, { 19, 31, 0 } } }, + { { { 25, 0, 0 }, { 20, 30, 0 } } }, + { { { 25, 0, 1 }, { 20, 30, 1 } } }, + { { { 25, 0, 2 }, { 20, 30, 2 } } }, + { { { 25, 0, 3 }, { 20, 31, 1 } } }, + { { { 25, 0, 4 }, { 20, 31, 0 } } }, + { { { 26, 0, 3 }, { 20, 31, 1 } } }, + { { { 26, 0, 2 }, { 20, 31, 2 } } }, + { { { 26, 0, 1 }, { 21, 31, 1 } } }, + { { { 26, 0, 0 }, { 21, 31, 0 } } }, + { { { 26, 0, 1 }, { 21, 31, 1 } } }, + { { { 26, 0, 2 }, { 21, 31, 2 } } }, + { { { 26, 0, 3 }, { 22, 31, 1 } } }, + { { { 26, 0, 4 }, { 22, 31, 0 } } }, + { { { 27, 0, 3 }, { 22, 31, 1 } } }, + { { { 27, 0, 2 }, { 22, 31, 2 } } }, + { { { 27, 0, 1 }, { 23, 31, 1 } } }, + { { { 27, 0, 0 }, { 23, 31, 0 } } }, + { { { 27, 0, 1 }, { 23, 31, 1 } } }, + { { { 27, 0, 2 }, { 23, 31, 2 } } }, + { { { 27, 0, 3 }, { 24, 31, 1 } } }, + { { { 27, 0, 4 }, { 24, 31, 0 } } }, + { { { 28, 0, 4 }, { 24, 31, 1 } } }, + { { { 28, 0, 3 }, { 24, 31, 2 } } }, + { { { 28, 0, 2 }, { 25, 31, 1 } } }, + { { { 28, 0, 1 }, { 25, 31, 0 } } }, + { { { 28, 0, 0 }, { 28, 28, 0 } } }, + { { { 28, 0, 1 }, { 28, 28, 1 } } }, + { { { 28, 0, 2 }, { 26, 31, 1 } } }, + { { { 28, 0, 3 }, { 26, 31, 0 } } }, + { { { 28, 0, 4 }, { 28, 29, 0 } } }, + { { { 29, 0, 3 }, { 28, 29, 1 } } }, + { { { 29, 0, 2 }, { 27, 31, 1 } } }, + { { { 29, 0, 1 }, { 27, 31, 0 } } }, + { { { 29, 0, 0 }, { 28, 30, 0 } } }, + { { { 29, 0, 1 }, { 28, 30, 1 } } }, + { { { 29, 0, 2 }, { 28, 30, 2 } } }, + { { { 29, 0, 3 }, { 28, 31, 1 } } }, + { { { 29, 0, 4 }, { 28, 31, 0 } } }, + { { { 30, 0, 3 }, { 28, 31, 1 } } }, + { { { 30, 0, 2 }, { 28, 31, 2 } } }, + { { { 30, 0, 1 }, { 29, 31, 1 } } }, + { { { 30, 0, 0 }, { 29, 31, 0 } } }, + { { { 30, 0, 1 }, { 29, 31, 1 } } }, + { { { 30, 0, 2 }, { 29, 31, 2 } } }, + { { { 30, 0, 3 }, { 30, 31, 1 } } }, + { { { 30, 0, 4 }, { 30, 31, 0 } } }, + { { { 31, 0, 3 }, { 30, 31, 1 } } }, + { { { 31, 0, 2 }, { 30, 31, 2 } } }, + { { { 31, 0, 1 }, { 31, 31, 1 } } }, + { { { 31, 0, 0 }, { 31, 31, 0 } } } +}; + +static SingleColourLookup const lookup_6_3[] = +{ + { { { 0, 0, 0 }, { 0, 0, 0 } } }, + { { { 0, 0, 1 }, { 0, 1, 1 } } }, + { { { 0, 0, 2 }, { 0, 1, 0 } } }, + { { { 1, 0, 1 }, { 0, 2, 1 } } }, + { { { 1, 0, 0 }, { 0, 2, 0 } } }, + { { { 1, 0, 1 }, { 0, 3, 1 } } }, + { { { 1, 0, 2 }, { 0, 3, 0 } } }, + { { { 2, 0, 1 }, { 0, 4, 1 } } }, + { { { 2, 0, 0 }, { 0, 4, 0 } } }, + { { { 2, 0, 1 }, { 0, 5, 1 } } }, + { { { 2, 0, 2 }, { 0, 5, 0 } } }, + { { { 3, 0, 1 }, { 0, 6, 1 } } }, + { { { 3, 0, 0 }, { 0, 6, 0 } } }, + { { { 3, 0, 1 }, { 0, 7, 1 } } }, + { { { 3, 0, 2 }, { 0, 7, 0 } } }, + { { { 4, 0, 1 }, { 0, 8, 1 } } }, + { { { 4, 0, 0 }, { 0, 8, 0 } } }, + { { { 4, 0, 1 }, { 0, 9, 1 } } }, + { { { 4, 0, 2 }, { 0, 9, 0 } } }, + { { { 5, 0, 1 }, { 0, 10, 1 } } }, + { { { 5, 0, 0 }, { 0, 10, 0 } } }, + { { { 5, 0, 1 }, { 0, 11, 1 } } }, + { { { 5, 0, 2 }, { 0, 11, 0 } } }, + { { { 6, 0, 1 }, { 0, 12, 1 } } }, + { { { 6, 0, 0 }, { 0, 12, 0 } } }, + { { { 6, 0, 1 }, { 0, 13, 1 } } }, + { { { 6, 0, 2 }, { 0, 13, 0 } } }, + { { { 7, 0, 1 }, { 0, 14, 1 } } }, + { { { 7, 0, 0 }, { 0, 14, 0 } } }, + { { { 7, 0, 1 }, { 0, 15, 1 } } }, + { { { 7, 0, 2 }, { 0, 15, 0 } } }, + { { { 8, 0, 1 }, { 0, 16, 1 } } }, + { { { 8, 0, 0 }, { 0, 16, 0 } } }, + { { { 8, 0, 1 }, { 0, 17, 1 } } }, + { { { 8, 0, 2 }, { 0, 17, 0 } } }, + { { { 9, 0, 1 }, { 0, 18, 1 } } }, + { { { 9, 0, 0 }, { 0, 18, 0 } } }, + { { { 9, 0, 1 }, { 0, 19, 1 } } }, + { { { 9, 0, 2 }, { 0, 19, 0 } } }, + { { { 10, 0, 1 }, { 0, 20, 1 } } }, + { { { 10, 0, 0 }, { 0, 20, 0 } } }, + { { { 10, 0, 1 }, { 0, 21, 1 } } }, + { { { 10, 0, 2 }, { 0, 21, 0 } } }, + { { { 11, 0, 1 }, { 0, 22, 1 } } }, + { { { 11, 0, 0 }, { 0, 22, 0 } } }, + { { { 11, 0, 1 }, { 0, 23, 1 } } }, + { { { 11, 0, 2 }, { 0, 23, 0 } } }, + { { { 12, 0, 1 }, { 0, 24, 1 } } }, + { { { 12, 0, 0 }, { 0, 24, 0 } } }, + { { { 12, 0, 1 }, { 0, 25, 1 } } }, + { { { 12, 0, 2 }, { 0, 25, 0 } } }, + { { { 13, 0, 1 }, { 0, 26, 1 } } }, + { { { 13, 0, 0 }, { 0, 26, 0 } } }, + { { { 13, 0, 1 }, { 0, 27, 1 } } }, + { { { 13, 0, 2 }, { 0, 27, 0 } } }, + { { { 14, 0, 1 }, { 0, 28, 1 } } }, + { { { 14, 0, 0 }, { 0, 28, 0 } } }, + { { { 14, 0, 1 }, { 0, 29, 1 } } }, + { { { 14, 0, 2 }, { 0, 29, 0 } } }, + { { { 15, 0, 1 }, { 0, 30, 1 } } }, + { { { 15, 0, 0 }, { 0, 30, 0 } } }, + { { { 15, 0, 1 }, { 0, 31, 1 } } }, + { { { 15, 0, 2 }, { 0, 31, 0 } } }, + { { { 16, 0, 2 }, { 1, 31, 1 } } }, + { { { 16, 0, 1 }, { 1, 31, 0 } } }, + { { { 16, 0, 0 }, { 0, 32, 0 } } }, + { { { 16, 0, 1 }, { 2, 31, 0 } } }, + { { { 16, 0, 2 }, { 0, 33, 0 } } }, + { { { 17, 0, 1 }, { 3, 31, 0 } } }, + { { { 17, 0, 0 }, { 0, 34, 0 } } }, + { { { 17, 0, 1 }, { 4, 31, 0 } } }, + { { { 17, 0, 2 }, { 0, 35, 0 } } }, + { { { 18, 0, 1 }, { 5, 31, 0 } } }, + { { { 18, 0, 0 }, { 0, 36, 0 } } }, + { { { 18, 0, 1 }, { 6, 31, 0 } } }, + { { { 18, 0, 2 }, { 0, 37, 0 } } }, + { { { 19, 0, 1 }, { 7, 31, 0 } } }, + { { { 19, 0, 0 }, { 0, 38, 0 } } }, + { { { 19, 0, 1 }, { 8, 31, 0 } } }, + { { { 19, 0, 2 }, { 0, 39, 0 } } }, + { { { 20, 0, 1 }, { 9, 31, 0 } } }, + { { { 20, 0, 0 }, { 0, 40, 0 } } }, + { { { 20, 0, 1 }, { 10, 31, 0 } } }, + { { { 20, 0, 2 }, { 0, 41, 0 } } }, + { { { 21, 0, 1 }, { 11, 31, 0 } } }, + { { { 21, 0, 0 }, { 0, 42, 0 } } }, + { { { 21, 0, 1 }, { 12, 31, 0 } } }, + { { { 21, 0, 2 }, { 0, 43, 0 } } }, + { { { 22, 0, 1 }, { 13, 31, 0 } } }, + { { { 22, 0, 0 }, { 0, 44, 0 } } }, + { { { 22, 0, 1 }, { 14, 31, 0 } } }, + { { { 22, 0, 2 }, { 0, 45, 0 } } }, + { { { 23, 0, 1 }, { 15, 31, 0 } } }, + { { { 23, 0, 0 }, { 0, 46, 0 } } }, + { { { 23, 0, 1 }, { 0, 47, 1 } } }, + { { { 23, 0, 2 }, { 0, 47, 0 } } }, + { { { 24, 0, 1 }, { 0, 48, 1 } } }, + { { { 24, 0, 0 }, { 0, 48, 0 } } }, + { { { 24, 0, 1 }, { 0, 49, 1 } } }, + { { { 24, 0, 2 }, { 0, 49, 0 } } }, + { { { 25, 0, 1 }, { 0, 50, 1 } } }, + { { { 25, 0, 0 }, { 0, 50, 0 } } }, + { { { 25, 0, 1 }, { 0, 51, 1 } } }, + { { { 25, 0, 2 }, { 0, 51, 0 } } }, + { { { 26, 0, 1 }, { 0, 52, 1 } } }, + { { { 26, 0, 0 }, { 0, 52, 0 } } }, + { { { 26, 0, 1 }, { 0, 53, 1 } } }, + { { { 26, 0, 2 }, { 0, 53, 0 } } }, + { { { 27, 0, 1 }, { 0, 54, 1 } } }, + { { { 27, 0, 0 }, { 0, 54, 0 } } }, + { { { 27, 0, 1 }, { 0, 55, 1 } } }, + { { { 27, 0, 2 }, { 0, 55, 0 } } }, + { { { 28, 0, 1 }, { 0, 56, 1 } } }, + { { { 28, 0, 0 }, { 0, 56, 0 } } }, + { { { 28, 0, 1 }, { 0, 57, 1 } } }, + { { { 28, 0, 2 }, { 0, 57, 0 } } }, + { { { 29, 0, 1 }, { 0, 58, 1 } } }, + { { { 29, 0, 0 }, { 0, 58, 0 } } }, + { { { 29, 0, 1 }, { 0, 59, 1 } } }, + { { { 29, 0, 2 }, { 0, 59, 0 } } }, + { { { 30, 0, 1 }, { 0, 60, 1 } } }, + { { { 30, 0, 0 }, { 0, 60, 0 } } }, + { { { 30, 0, 1 }, { 0, 61, 1 } } }, + { { { 30, 0, 2 }, { 0, 61, 0 } } }, + { { { 31, 0, 1 }, { 0, 62, 1 } } }, + { { { 31, 0, 0 }, { 0, 62, 0 } } }, + { { { 31, 0, 1 }, { 0, 63, 1 } } }, + { { { 31, 0, 2 }, { 0, 63, 0 } } }, + { { { 32, 0, 2 }, { 1, 63, 1 } } }, + { { { 32, 0, 1 }, { 1, 63, 0 } } }, + { { { 32, 0, 0 }, { 16, 48, 0 } } }, + { { { 32, 0, 1 }, { 2, 63, 0 } } }, + { { { 32, 0, 2 }, { 16, 49, 0 } } }, + { { { 33, 0, 1 }, { 3, 63, 0 } } }, + { { { 33, 0, 0 }, { 16, 50, 0 } } }, + { { { 33, 0, 1 }, { 4, 63, 0 } } }, + { { { 33, 0, 2 }, { 16, 51, 0 } } }, + { { { 34, 0, 1 }, { 5, 63, 0 } } }, + { { { 34, 0, 0 }, { 16, 52, 0 } } }, + { { { 34, 0, 1 }, { 6, 63, 0 } } }, + { { { 34, 0, 2 }, { 16, 53, 0 } } }, + { { { 35, 0, 1 }, { 7, 63, 0 } } }, + { { { 35, 0, 0 }, { 16, 54, 0 } } }, + { { { 35, 0, 1 }, { 8, 63, 0 } } }, + { { { 35, 0, 2 }, { 16, 55, 0 } } }, + { { { 36, 0, 1 }, { 9, 63, 0 } } }, + { { { 36, 0, 0 }, { 16, 56, 0 } } }, + { { { 36, 0, 1 }, { 10, 63, 0 } } }, + { { { 36, 0, 2 }, { 16, 57, 0 } } }, + { { { 37, 0, 1 }, { 11, 63, 0 } } }, + { { { 37, 0, 0 }, { 16, 58, 0 } } }, + { { { 37, 0, 1 }, { 12, 63, 0 } } }, + { { { 37, 0, 2 }, { 16, 59, 0 } } }, + { { { 38, 0, 1 }, { 13, 63, 0 } } }, + { { { 38, 0, 0 }, { 16, 60, 0 } } }, + { { { 38, 0, 1 }, { 14, 63, 0 } } }, + { { { 38, 0, 2 }, { 16, 61, 0 } } }, + { { { 39, 0, 1 }, { 15, 63, 0 } } }, + { { { 39, 0, 0 }, { 16, 62, 0 } } }, + { { { 39, 0, 1 }, { 16, 63, 1 } } }, + { { { 39, 0, 2 }, { 16, 63, 0 } } }, + { { { 40, 0, 1 }, { 17, 63, 1 } } }, + { { { 40, 0, 0 }, { 17, 63, 0 } } }, + { { { 40, 0, 1 }, { 18, 63, 1 } } }, + { { { 40, 0, 2 }, { 18, 63, 0 } } }, + { { { 41, 0, 1 }, { 19, 63, 1 } } }, + { { { 41, 0, 0 }, { 19, 63, 0 } } }, + { { { 41, 0, 1 }, { 20, 63, 1 } } }, + { { { 41, 0, 2 }, { 20, 63, 0 } } }, + { { { 42, 0, 1 }, { 21, 63, 1 } } }, + { { { 42, 0, 0 }, { 21, 63, 0 } } }, + { { { 42, 0, 1 }, { 22, 63, 1 } } }, + { { { 42, 0, 2 }, { 22, 63, 0 } } }, + { { { 43, 0, 1 }, { 23, 63, 1 } } }, + { { { 43, 0, 0 }, { 23, 63, 0 } } }, + { { { 43, 0, 1 }, { 24, 63, 1 } } }, + { { { 43, 0, 2 }, { 24, 63, 0 } } }, + { { { 44, 0, 1 }, { 25, 63, 1 } } }, + { { { 44, 0, 0 }, { 25, 63, 0 } } }, + { { { 44, 0, 1 }, { 26, 63, 1 } } }, + { { { 44, 0, 2 }, { 26, 63, 0 } } }, + { { { 45, 0, 1 }, { 27, 63, 1 } } }, + { { { 45, 0, 0 }, { 27, 63, 0 } } }, + { { { 45, 0, 1 }, { 28, 63, 1 } } }, + { { { 45, 0, 2 }, { 28, 63, 0 } } }, + { { { 46, 0, 1 }, { 29, 63, 1 } } }, + { { { 46, 0, 0 }, { 29, 63, 0 } } }, + { { { 46, 0, 1 }, { 30, 63, 1 } } }, + { { { 46, 0, 2 }, { 30, 63, 0 } } }, + { { { 47, 0, 1 }, { 31, 63, 1 } } }, + { { { 47, 0, 0 }, { 31, 63, 0 } } }, + { { { 47, 0, 1 }, { 32, 63, 1 } } }, + { { { 47, 0, 2 }, { 32, 63, 0 } } }, + { { { 48, 0, 2 }, { 33, 63, 1 } } }, + { { { 48, 0, 1 }, { 33, 63, 0 } } }, + { { { 48, 0, 0 }, { 48, 48, 0 } } }, + { { { 48, 0, 1 }, { 34, 63, 0 } } }, + { { { 48, 0, 2 }, { 48, 49, 0 } } }, + { { { 49, 0, 1 }, { 35, 63, 0 } } }, + { { { 49, 0, 0 }, { 48, 50, 0 } } }, + { { { 49, 0, 1 }, { 36, 63, 0 } } }, + { { { 49, 0, 2 }, { 48, 51, 0 } } }, + { { { 50, 0, 1 }, { 37, 63, 0 } } }, + { { { 50, 0, 0 }, { 48, 52, 0 } } }, + { { { 50, 0, 1 }, { 38, 63, 0 } } }, + { { { 50, 0, 2 }, { 48, 53, 0 } } }, + { { { 51, 0, 1 }, { 39, 63, 0 } } }, + { { { 51, 0, 0 }, { 48, 54, 0 } } }, + { { { 51, 0, 1 }, { 40, 63, 0 } } }, + { { { 51, 0, 2 }, { 48, 55, 0 } } }, + { { { 52, 0, 1 }, { 41, 63, 0 } } }, + { { { 52, 0, 0 }, { 48, 56, 0 } } }, + { { { 52, 0, 1 }, { 42, 63, 0 } } }, + { { { 52, 0, 2 }, { 48, 57, 0 } } }, + { { { 53, 0, 1 }, { 43, 63, 0 } } }, + { { { 53, 0, 0 }, { 48, 58, 0 } } }, + { { { 53, 0, 1 }, { 44, 63, 0 } } }, + { { { 53, 0, 2 }, { 48, 59, 0 } } }, + { { { 54, 0, 1 }, { 45, 63, 0 } } }, + { { { 54, 0, 0 }, { 48, 60, 0 } } }, + { { { 54, 0, 1 }, { 46, 63, 0 } } }, + { { { 54, 0, 2 }, { 48, 61, 0 } } }, + { { { 55, 0, 1 }, { 47, 63, 0 } } }, + { { { 55, 0, 0 }, { 48, 62, 0 } } }, + { { { 55, 0, 1 }, { 48, 63, 1 } } }, + { { { 55, 0, 2 }, { 48, 63, 0 } } }, + { { { 56, 0, 1 }, { 49, 63, 1 } } }, + { { { 56, 0, 0 }, { 49, 63, 0 } } }, + { { { 56, 0, 1 }, { 50, 63, 1 } } }, + { { { 56, 0, 2 }, { 50, 63, 0 } } }, + { { { 57, 0, 1 }, { 51, 63, 1 } } }, + { { { 57, 0, 0 }, { 51, 63, 0 } } }, + { { { 57, 0, 1 }, { 52, 63, 1 } } }, + { { { 57, 0, 2 }, { 52, 63, 0 } } }, + { { { 58, 0, 1 }, { 53, 63, 1 } } }, + { { { 58, 0, 0 }, { 53, 63, 0 } } }, + { { { 58, 0, 1 }, { 54, 63, 1 } } }, + { { { 58, 0, 2 }, { 54, 63, 0 } } }, + { { { 59, 0, 1 }, { 55, 63, 1 } } }, + { { { 59, 0, 0 }, { 55, 63, 0 } } }, + { { { 59, 0, 1 }, { 56, 63, 1 } } }, + { { { 59, 0, 2 }, { 56, 63, 0 } } }, + { { { 60, 0, 1 }, { 57, 63, 1 } } }, + { { { 60, 0, 0 }, { 57, 63, 0 } } }, + { { { 60, 0, 1 }, { 58, 63, 1 } } }, + { { { 60, 0, 2 }, { 58, 63, 0 } } }, + { { { 61, 0, 1 }, { 59, 63, 1 } } }, + { { { 61, 0, 0 }, { 59, 63, 0 } } }, + { { { 61, 0, 1 }, { 60, 63, 1 } } }, + { { { 61, 0, 2 }, { 60, 63, 0 } } }, + { { { 62, 0, 1 }, { 61, 63, 1 } } }, + { { { 62, 0, 0 }, { 61, 63, 0 } } }, + { { { 62, 0, 1 }, { 62, 63, 1 } } }, + { { { 62, 0, 2 }, { 62, 63, 0 } } }, + { { { 63, 0, 1 }, { 63, 63, 1 } } }, + { { { 63, 0, 0 }, { 63, 63, 0 } } } +}; + +static SingleColourLookup const lookup_5_4[] = +{ + { { { 0, 0, 0 }, { 0, 0, 0 } } }, + { { { 0, 0, 1 }, { 0, 1, 1 } } }, + { { { 0, 0, 2 }, { 0, 1, 0 } } }, + { { { 0, 0, 3 }, { 0, 1, 1 } } }, + { { { 0, 0, 4 }, { 0, 2, 1 } } }, + { { { 1, 0, 3 }, { 0, 2, 0 } } }, + { { { 1, 0, 2 }, { 0, 2, 1 } } }, + { { { 1, 0, 1 }, { 0, 3, 1 } } }, + { { { 1, 0, 0 }, { 0, 3, 0 } } }, + { { { 1, 0, 1 }, { 1, 2, 1 } } }, + { { { 1, 0, 2 }, { 1, 2, 0 } } }, + { { { 1, 0, 3 }, { 0, 4, 0 } } }, + { { { 1, 0, 4 }, { 0, 5, 1 } } }, + { { { 2, 0, 3 }, { 0, 5, 0 } } }, + { { { 2, 0, 2 }, { 0, 5, 1 } } }, + { { { 2, 0, 1 }, { 0, 6, 1 } } }, + { { { 2, 0, 0 }, { 0, 6, 0 } } }, + { { { 2, 0, 1 }, { 2, 3, 1 } } }, + { { { 2, 0, 2 }, { 2, 3, 0 } } }, + { { { 2, 0, 3 }, { 0, 7, 0 } } }, + { { { 2, 0, 4 }, { 1, 6, 1 } } }, + { { { 3, 0, 3 }, { 1, 6, 0 } } }, + { { { 3, 0, 2 }, { 0, 8, 0 } } }, + { { { 3, 0, 1 }, { 0, 9, 1 } } }, + { { { 3, 0, 0 }, { 0, 9, 0 } } }, + { { { 3, 0, 1 }, { 0, 9, 1 } } }, + { { { 3, 0, 2 }, { 0, 10, 1 } } }, + { { { 3, 0, 3 }, { 0, 10, 0 } } }, + { { { 3, 0, 4 }, { 2, 7, 1 } } }, + { { { 4, 0, 4 }, { 2, 7, 0 } } }, + { { { 4, 0, 3 }, { 0, 11, 0 } } }, + { { { 4, 0, 2 }, { 1, 10, 1 } } }, + { { { 4, 0, 1 }, { 1, 10, 0 } } }, + { { { 4, 0, 0 }, { 0, 12, 0 } } }, + { { { 4, 0, 1 }, { 0, 13, 1 } } }, + { { { 4, 0, 2 }, { 0, 13, 0 } } }, + { { { 4, 0, 3 }, { 0, 13, 1 } } }, + { { { 4, 0, 4 }, { 0, 14, 1 } } }, + { { { 5, 0, 3 }, { 0, 14, 0 } } }, + { { { 5, 0, 2 }, { 2, 11, 1 } } }, + { { { 5, 0, 1 }, { 2, 11, 0 } } }, + { { { 5, 0, 0 }, { 0, 15, 0 } } }, + { { { 5, 0, 1 }, { 1, 14, 1 } } }, + { { { 5, 0, 2 }, { 1, 14, 0 } } }, + { { { 5, 0, 3 }, { 0, 16, 0 } } }, + { { { 5, 0, 4 }, { 0, 17, 1 } } }, + { { { 6, 0, 3 }, { 0, 17, 0 } } }, + { { { 6, 0, 2 }, { 0, 17, 1 } } }, + { { { 6, 0, 1 }, { 0, 18, 1 } } }, + { { { 6, 0, 0 }, { 0, 18, 0 } } }, + { { { 6, 0, 1 }, { 2, 15, 1 } } }, + { { { 6, 0, 2 }, { 2, 15, 0 } } }, + { { { 6, 0, 3 }, { 0, 19, 0 } } }, + { { { 6, 0, 4 }, { 1, 18, 1 } } }, + { { { 7, 0, 3 }, { 1, 18, 0 } } }, + { { { 7, 0, 2 }, { 0, 20, 0 } } }, + { { { 7, 0, 1 }, { 0, 21, 1 } } }, + { { { 7, 0, 0 }, { 0, 21, 0 } } }, + { { { 7, 0, 1 }, { 0, 21, 1 } } }, + { { { 7, 0, 2 }, { 0, 22, 1 } } }, + { { { 7, 0, 3 }, { 0, 22, 0 } } }, + { { { 7, 0, 4 }, { 2, 19, 1 } } }, + { { { 8, 0, 4 }, { 2, 19, 0 } } }, + { { { 8, 0, 3 }, { 0, 23, 0 } } }, + { { { 8, 0, 2 }, { 1, 22, 1 } } }, + { { { 8, 0, 1 }, { 1, 22, 0 } } }, + { { { 8, 0, 0 }, { 0, 24, 0 } } }, + { { { 8, 0, 1 }, { 0, 25, 1 } } }, + { { { 8, 0, 2 }, { 0, 25, 0 } } }, + { { { 8, 0, 3 }, { 0, 25, 1 } } }, + { { { 8, 0, 4 }, { 0, 26, 1 } } }, + { { { 9, 0, 3 }, { 0, 26, 0 } } }, + { { { 9, 0, 2 }, { 2, 23, 1 } } }, + { { { 9, 0, 1 }, { 2, 23, 0 } } }, + { { { 9, 0, 0 }, { 0, 27, 0 } } }, + { { { 9, 0, 1 }, { 1, 26, 1 } } }, + { { { 9, 0, 2 }, { 1, 26, 0 } } }, + { { { 9, 0, 3 }, { 0, 28, 0 } } }, + { { { 9, 0, 4 }, { 0, 29, 1 } } }, + { { { 10, 0, 3 }, { 0, 29, 0 } } }, + { { { 10, 0, 2 }, { 0, 29, 1 } } }, + { { { 10, 0, 1 }, { 0, 30, 1 } } }, + { { { 10, 0, 0 }, { 0, 30, 0 } } }, + { { { 10, 0, 1 }, { 2, 27, 1 } } }, + { { { 10, 0, 2 }, { 2, 27, 0 } } }, + { { { 10, 0, 3 }, { 0, 31, 0 } } }, + { { { 10, 0, 4 }, { 1, 30, 1 } } }, + { { { 11, 0, 3 }, { 1, 30, 0 } } }, + { { { 11, 0, 2 }, { 4, 24, 0 } } }, + { { { 11, 0, 1 }, { 1, 31, 1 } } }, + { { { 11, 0, 0 }, { 1, 31, 0 } } }, + { { { 11, 0, 1 }, { 1, 31, 1 } } }, + { { { 11, 0, 2 }, { 2, 30, 1 } } }, + { { { 11, 0, 3 }, { 2, 30, 0 } } }, + { { { 11, 0, 4 }, { 2, 31, 1 } } }, + { { { 12, 0, 4 }, { 2, 31, 0 } } }, + { { { 12, 0, 3 }, { 4, 27, 0 } } }, + { { { 12, 0, 2 }, { 3, 30, 1 } } }, + { { { 12, 0, 1 }, { 3, 30, 0 } } }, + { { { 12, 0, 0 }, { 4, 28, 0 } } }, + { { { 12, 0, 1 }, { 3, 31, 1 } } }, + { { { 12, 0, 2 }, { 3, 31, 0 } } }, + { { { 12, 0, 3 }, { 3, 31, 1 } } }, + { { { 12, 0, 4 }, { 4, 30, 1 } } }, + { { { 13, 0, 3 }, { 4, 30, 0 } } }, + { { { 13, 0, 2 }, { 6, 27, 1 } } }, + { { { 13, 0, 1 }, { 6, 27, 0 } } }, + { { { 13, 0, 0 }, { 4, 31, 0 } } }, + { { { 13, 0, 1 }, { 5, 30, 1 } } }, + { { { 13, 0, 2 }, { 5, 30, 0 } } }, + { { { 13, 0, 3 }, { 8, 24, 0 } } }, + { { { 13, 0, 4 }, { 5, 31, 1 } } }, + { { { 14, 0, 3 }, { 5, 31, 0 } } }, + { { { 14, 0, 2 }, { 5, 31, 1 } } }, + { { { 14, 0, 1 }, { 6, 30, 1 } } }, + { { { 14, 0, 0 }, { 6, 30, 0 } } }, + { { { 14, 0, 1 }, { 6, 31, 1 } } }, + { { { 14, 0, 2 }, { 6, 31, 0 } } }, + { { { 14, 0, 3 }, { 8, 27, 0 } } }, + { { { 14, 0, 4 }, { 7, 30, 1 } } }, + { { { 15, 0, 3 }, { 7, 30, 0 } } }, + { { { 15, 0, 2 }, { 8, 28, 0 } } }, + { { { 15, 0, 1 }, { 7, 31, 1 } } }, + { { { 15, 0, 0 }, { 7, 31, 0 } } }, + { { { 15, 0, 1 }, { 7, 31, 1 } } }, + { { { 15, 0, 2 }, { 8, 30, 1 } } }, + { { { 15, 0, 3 }, { 8, 30, 0 } } }, + { { { 15, 0, 4 }, { 10, 27, 1 } } }, + { { { 16, 0, 4 }, { 10, 27, 0 } } }, + { { { 16, 0, 3 }, { 8, 31, 0 } } }, + { { { 16, 0, 2 }, { 9, 30, 1 } } }, + { { { 16, 0, 1 }, { 9, 30, 0 } } }, + { { { 16, 0, 0 }, { 12, 24, 0 } } }, + { { { 16, 0, 1 }, { 9, 31, 1 } } }, + { { { 16, 0, 2 }, { 9, 31, 0 } } }, + { { { 16, 0, 3 }, { 9, 31, 1 } } }, + { { { 16, 0, 4 }, { 10, 30, 1 } } }, + { { { 17, 0, 3 }, { 10, 30, 0 } } }, + { { { 17, 0, 2 }, { 10, 31, 1 } } }, + { { { 17, 0, 1 }, { 10, 31, 0 } } }, + { { { 17, 0, 0 }, { 12, 27, 0 } } }, + { { { 17, 0, 1 }, { 11, 30, 1 } } }, + { { { 17, 0, 2 }, { 11, 30, 0 } } }, + { { { 17, 0, 3 }, { 12, 28, 0 } } }, + { { { 17, 0, 4 }, { 11, 31, 1 } } }, + { { { 18, 0, 3 }, { 11, 31, 0 } } }, + { { { 18, 0, 2 }, { 11, 31, 1 } } }, + { { { 18, 0, 1 }, { 12, 30, 1 } } }, + { { { 18, 0, 0 }, { 12, 30, 0 } } }, + { { { 18, 0, 1 }, { 14, 27, 1 } } }, + { { { 18, 0, 2 }, { 14, 27, 0 } } }, + { { { 18, 0, 3 }, { 12, 31, 0 } } }, + { { { 18, 0, 4 }, { 13, 30, 1 } } }, + { { { 19, 0, 3 }, { 13, 30, 0 } } }, + { { { 19, 0, 2 }, { 16, 24, 0 } } }, + { { { 19, 0, 1 }, { 13, 31, 1 } } }, + { { { 19, 0, 0 }, { 13, 31, 0 } } }, + { { { 19, 0, 1 }, { 13, 31, 1 } } }, + { { { 19, 0, 2 }, { 14, 30, 1 } } }, + { { { 19, 0, 3 }, { 14, 30, 0 } } }, + { { { 19, 0, 4 }, { 14, 31, 1 } } }, + { { { 20, 0, 4 }, { 14, 31, 0 } } }, + { { { 20, 0, 3 }, { 16, 27, 0 } } }, + { { { 20, 0, 2 }, { 15, 30, 1 } } }, + { { { 20, 0, 1 }, { 15, 30, 0 } } }, + { { { 20, 0, 0 }, { 16, 28, 0 } } }, + { { { 20, 0, 1 }, { 15, 31, 1 } } }, + { { { 20, 0, 2 }, { 15, 31, 0 } } }, + { { { 20, 0, 3 }, { 15, 31, 1 } } }, + { { { 20, 0, 4 }, { 16, 30, 1 } } }, + { { { 21, 0, 3 }, { 16, 30, 0 } } }, + { { { 21, 0, 2 }, { 18, 27, 1 } } }, + { { { 21, 0, 1 }, { 18, 27, 0 } } }, + { { { 21, 0, 0 }, { 16, 31, 0 } } }, + { { { 21, 0, 1 }, { 17, 30, 1 } } }, + { { { 21, 0, 2 }, { 17, 30, 0 } } }, + { { { 21, 0, 3 }, { 20, 24, 0 } } }, + { { { 21, 0, 4 }, { 17, 31, 1 } } }, + { { { 22, 0, 3 }, { 17, 31, 0 } } }, + { { { 22, 0, 2 }, { 17, 31, 1 } } }, + { { { 22, 0, 1 }, { 18, 30, 1 } } }, + { { { 22, 0, 0 }, { 18, 30, 0 } } }, + { { { 22, 0, 1 }, { 18, 31, 1 } } }, + { { { 22, 0, 2 }, { 18, 31, 0 } } }, + { { { 22, 0, 3 }, { 20, 27, 0 } } }, + { { { 22, 0, 4 }, { 19, 30, 1 } } }, + { { { 23, 0, 3 }, { 19, 30, 0 } } }, + { { { 23, 0, 2 }, { 20, 28, 0 } } }, + { { { 23, 0, 1 }, { 19, 31, 1 } } }, + { { { 23, 0, 0 }, { 19, 31, 0 } } }, + { { { 23, 0, 1 }, { 19, 31, 1 } } }, + { { { 23, 0, 2 }, { 20, 30, 1 } } }, + { { { 23, 0, 3 }, { 20, 30, 0 } } }, + { { { 23, 0, 4 }, { 22, 27, 1 } } }, + { { { 24, 0, 4 }, { 22, 27, 0 } } }, + { { { 24, 0, 3 }, { 20, 31, 0 } } }, + { { { 24, 0, 2 }, { 21, 30, 1 } } }, + { { { 24, 0, 1 }, { 21, 30, 0 } } }, + { { { 24, 0, 0 }, { 24, 24, 0 } } }, + { { { 24, 0, 1 }, { 21, 31, 1 } } }, + { { { 24, 0, 2 }, { 21, 31, 0 } } }, + { { { 24, 0, 3 }, { 21, 31, 1 } } }, + { { { 24, 0, 4 }, { 22, 30, 1 } } }, + { { { 25, 0, 3 }, { 22, 30, 0 } } }, + { { { 25, 0, 2 }, { 22, 31, 1 } } }, + { { { 25, 0, 1 }, { 22, 31, 0 } } }, + { { { 25, 0, 0 }, { 24, 27, 0 } } }, + { { { 25, 0, 1 }, { 23, 30, 1 } } }, + { { { 25, 0, 2 }, { 23, 30, 0 } } }, + { { { 25, 0, 3 }, { 24, 28, 0 } } }, + { { { 25, 0, 4 }, { 23, 31, 1 } } }, + { { { 26, 0, 3 }, { 23, 31, 0 } } }, + { { { 26, 0, 2 }, { 23, 31, 1 } } }, + { { { 26, 0, 1 }, { 24, 30, 1 } } }, + { { { 26, 0, 0 }, { 24, 30, 0 } } }, + { { { 26, 0, 1 }, { 26, 27, 1 } } }, + { { { 26, 0, 2 }, { 26, 27, 0 } } }, + { { { 26, 0, 3 }, { 24, 31, 0 } } }, + { { { 26, 0, 4 }, { 25, 30, 1 } } }, + { { { 27, 0, 3 }, { 25, 30, 0 } } }, + { { { 27, 0, 2 }, { 28, 24, 0 } } }, + { { { 27, 0, 1 }, { 25, 31, 1 } } }, + { { { 27, 0, 0 }, { 25, 31, 0 } } }, + { { { 27, 0, 1 }, { 25, 31, 1 } } }, + { { { 27, 0, 2 }, { 26, 30, 1 } } }, + { { { 27, 0, 3 }, { 26, 30, 0 } } }, + { { { 27, 0, 4 }, { 26, 31, 1 } } }, + { { { 28, 0, 4 }, { 26, 31, 0 } } }, + { { { 28, 0, 3 }, { 28, 27, 0 } } }, + { { { 28, 0, 2 }, { 27, 30, 1 } } }, + { { { 28, 0, 1 }, { 27, 30, 0 } } }, + { { { 28, 0, 0 }, { 28, 28, 0 } } }, + { { { 28, 0, 1 }, { 27, 31, 1 } } }, + { { { 28, 0, 2 }, { 27, 31, 0 } } }, + { { { 28, 0, 3 }, { 27, 31, 1 } } }, + { { { 28, 0, 4 }, { 28, 30, 1 } } }, + { { { 29, 0, 3 }, { 28, 30, 0 } } }, + { { { 29, 0, 2 }, { 30, 27, 1 } } }, + { { { 29, 0, 1 }, { 30, 27, 0 } } }, + { { { 29, 0, 0 }, { 28, 31, 0 } } }, + { { { 29, 0, 1 }, { 29, 30, 1 } } }, + { { { 29, 0, 2 }, { 29, 30, 0 } } }, + { { { 29, 0, 3 }, { 29, 30, 1 } } }, + { { { 29, 0, 4 }, { 29, 31, 1 } } }, + { { { 30, 0, 3 }, { 29, 31, 0 } } }, + { { { 30, 0, 2 }, { 29, 31, 1 } } }, + { { { 30, 0, 1 }, { 30, 30, 1 } } }, + { { { 30, 0, 0 }, { 30, 30, 0 } } }, + { { { 30, 0, 1 }, { 30, 31, 1 } } }, + { { { 30, 0, 2 }, { 30, 31, 0 } } }, + { { { 30, 0, 3 }, { 30, 31, 1 } } }, + { { { 30, 0, 4 }, { 31, 30, 1 } } }, + { { { 31, 0, 3 }, { 31, 30, 0 } } }, + { { { 31, 0, 2 }, { 31, 30, 1 } } }, + { { { 31, 0, 1 }, { 31, 31, 1 } } }, + { { { 31, 0, 0 }, { 31, 31, 0 } } } +}; + +static SingleColourLookup const lookup_6_4[] = +{ + { { { 0, 0, 0 }, { 0, 0, 0 } } }, + { { { 0, 0, 1 }, { 0, 1, 0 } } }, + { { { 0, 0, 2 }, { 0, 2, 0 } } }, + { { { 1, 0, 1 }, { 0, 3, 1 } } }, + { { { 1, 0, 0 }, { 0, 3, 0 } } }, + { { { 1, 0, 1 }, { 0, 4, 0 } } }, + { { { 1, 0, 2 }, { 0, 5, 0 } } }, + { { { 2, 0, 1 }, { 0, 6, 1 } } }, + { { { 2, 0, 0 }, { 0, 6, 0 } } }, + { { { 2, 0, 1 }, { 0, 7, 0 } } }, + { { { 2, 0, 2 }, { 0, 8, 0 } } }, + { { { 3, 0, 1 }, { 0, 9, 1 } } }, + { { { 3, 0, 0 }, { 0, 9, 0 } } }, + { { { 3, 0, 1 }, { 0, 10, 0 } } }, + { { { 3, 0, 2 }, { 0, 11, 0 } } }, + { { { 4, 0, 1 }, { 0, 12, 1 } } }, + { { { 4, 0, 0 }, { 0, 12, 0 } } }, + { { { 4, 0, 1 }, { 0, 13, 0 } } }, + { { { 4, 0, 2 }, { 0, 14, 0 } } }, + { { { 5, 0, 1 }, { 0, 15, 1 } } }, + { { { 5, 0, 0 }, { 0, 15, 0 } } }, + { { { 5, 0, 1 }, { 0, 16, 0 } } }, + { { { 5, 0, 2 }, { 1, 15, 0 } } }, + { { { 6, 0, 1 }, { 0, 17, 0 } } }, + { { { 6, 0, 0 }, { 0, 18, 0 } } }, + { { { 6, 0, 1 }, { 0, 19, 0 } } }, + { { { 6, 0, 2 }, { 3, 14, 0 } } }, + { { { 7, 0, 1 }, { 0, 20, 0 } } }, + { { { 7, 0, 0 }, { 0, 21, 0 } } }, + { { { 7, 0, 1 }, { 0, 22, 0 } } }, + { { { 7, 0, 2 }, { 4, 15, 0 } } }, + { { { 8, 0, 1 }, { 0, 23, 0 } } }, + { { { 8, 0, 0 }, { 0, 24, 0 } } }, + { { { 8, 0, 1 }, { 0, 25, 0 } } }, + { { { 8, 0, 2 }, { 6, 14, 0 } } }, + { { { 9, 0, 1 }, { 0, 26, 0 } } }, + { { { 9, 0, 0 }, { 0, 27, 0 } } }, + { { { 9, 0, 1 }, { 0, 28, 0 } } }, + { { { 9, 0, 2 }, { 7, 15, 0 } } }, + { { { 10, 0, 1 }, { 0, 29, 0 } } }, + { { { 10, 0, 0 }, { 0, 30, 0 } } }, + { { { 10, 0, 1 }, { 0, 31, 0 } } }, + { { { 10, 0, 2 }, { 9, 14, 0 } } }, + { { { 11, 0, 1 }, { 0, 32, 0 } } }, + { { { 11, 0, 0 }, { 0, 33, 0 } } }, + { { { 11, 0, 1 }, { 2, 30, 0 } } }, + { { { 11, 0, 2 }, { 0, 34, 0 } } }, + { { { 12, 0, 1 }, { 0, 35, 0 } } }, + { { { 12, 0, 0 }, { 0, 36, 0 } } }, + { { { 12, 0, 1 }, { 3, 31, 0 } } }, + { { { 12, 0, 2 }, { 0, 37, 0 } } }, + { { { 13, 0, 1 }, { 0, 38, 0 } } }, + { { { 13, 0, 0 }, { 0, 39, 0 } } }, + { { { 13, 0, 1 }, { 5, 30, 0 } } }, + { { { 13, 0, 2 }, { 0, 40, 0 } } }, + { { { 14, 0, 1 }, { 0, 41, 0 } } }, + { { { 14, 0, 0 }, { 0, 42, 0 } } }, + { { { 14, 0, 1 }, { 6, 31, 0 } } }, + { { { 14, 0, 2 }, { 0, 43, 0 } } }, + { { { 15, 0, 1 }, { 0, 44, 0 } } }, + { { { 15, 0, 0 }, { 0, 45, 0 } } }, + { { { 15, 0, 1 }, { 8, 30, 0 } } }, + { { { 15, 0, 2 }, { 0, 46, 0 } } }, + { { { 16, 0, 2 }, { 0, 47, 0 } } }, + { { { 16, 0, 1 }, { 1, 46, 0 } } }, + { { { 16, 0, 0 }, { 0, 48, 0 } } }, + { { { 16, 0, 1 }, { 0, 49, 0 } } }, + { { { 16, 0, 2 }, { 0, 50, 0 } } }, + { { { 17, 0, 1 }, { 2, 47, 0 } } }, + { { { 17, 0, 0 }, { 0, 51, 0 } } }, + { { { 17, 0, 1 }, { 0, 52, 0 } } }, + { { { 17, 0, 2 }, { 0, 53, 0 } } }, + { { { 18, 0, 1 }, { 4, 46, 0 } } }, + { { { 18, 0, 0 }, { 0, 54, 0 } } }, + { { { 18, 0, 1 }, { 0, 55, 0 } } }, + { { { 18, 0, 2 }, { 0, 56, 0 } } }, + { { { 19, 0, 1 }, { 5, 47, 0 } } }, + { { { 19, 0, 0 }, { 0, 57, 0 } } }, + { { { 19, 0, 1 }, { 0, 58, 0 } } }, + { { { 19, 0, 2 }, { 0, 59, 0 } } }, + { { { 20, 0, 1 }, { 7, 46, 0 } } }, + { { { 20, 0, 0 }, { 0, 60, 0 } } }, + { { { 20, 0, 1 }, { 0, 61, 0 } } }, + { { { 20, 0, 2 }, { 0, 62, 0 } } }, + { { { 21, 0, 1 }, { 8, 47, 0 } } }, + { { { 21, 0, 0 }, { 0, 63, 0 } } }, + { { { 21, 0, 1 }, { 1, 62, 0 } } }, + { { { 21, 0, 2 }, { 1, 63, 0 } } }, + { { { 22, 0, 1 }, { 10, 46, 0 } } }, + { { { 22, 0, 0 }, { 2, 62, 0 } } }, + { { { 22, 0, 1 }, { 2, 63, 0 } } }, + { { { 22, 0, 2 }, { 3, 62, 0 } } }, + { { { 23, 0, 1 }, { 11, 47, 0 } } }, + { { { 23, 0, 0 }, { 3, 63, 0 } } }, + { { { 23, 0, 1 }, { 4, 62, 0 } } }, + { { { 23, 0, 2 }, { 4, 63, 0 } } }, + { { { 24, 0, 1 }, { 13, 46, 0 } } }, + { { { 24, 0, 0 }, { 5, 62, 0 } } }, + { { { 24, 0, 1 }, { 5, 63, 0 } } }, + { { { 24, 0, 2 }, { 6, 62, 0 } } }, + { { { 25, 0, 1 }, { 14, 47, 0 } } }, + { { { 25, 0, 0 }, { 6, 63, 0 } } }, + { { { 25, 0, 1 }, { 7, 62, 0 } } }, + { { { 25, 0, 2 }, { 7, 63, 0 } } }, + { { { 26, 0, 1 }, { 16, 45, 0 } } }, + { { { 26, 0, 0 }, { 8, 62, 0 } } }, + { { { 26, 0, 1 }, { 8, 63, 0 } } }, + { { { 26, 0, 2 }, { 9, 62, 0 } } }, + { { { 27, 0, 1 }, { 16, 48, 0 } } }, + { { { 27, 0, 0 }, { 9, 63, 0 } } }, + { { { 27, 0, 1 }, { 10, 62, 0 } } }, + { { { 27, 0, 2 }, { 10, 63, 0 } } }, + { { { 28, 0, 1 }, { 16, 51, 0 } } }, + { { { 28, 0, 0 }, { 11, 62, 0 } } }, + { { { 28, 0, 1 }, { 11, 63, 0 } } }, + { { { 28, 0, 2 }, { 12, 62, 0 } } }, + { { { 29, 0, 1 }, { 16, 54, 0 } } }, + { { { 29, 0, 0 }, { 12, 63, 0 } } }, + { { { 29, 0, 1 }, { 13, 62, 0 } } }, + { { { 29, 0, 2 }, { 13, 63, 0 } } }, + { { { 30, 0, 1 }, { 16, 57, 0 } } }, + { { { 30, 0, 0 }, { 14, 62, 0 } } }, + { { { 30, 0, 1 }, { 14, 63, 0 } } }, + { { { 30, 0, 2 }, { 15, 62, 0 } } }, + { { { 31, 0, 1 }, { 16, 60, 0 } } }, + { { { 31, 0, 0 }, { 15, 63, 0 } } }, + { { { 31, 0, 1 }, { 24, 46, 0 } } }, + { { { 31, 0, 2 }, { 16, 62, 0 } } }, + { { { 32, 0, 2 }, { 16, 63, 0 } } }, + { { { 32, 0, 1 }, { 17, 62, 0 } } }, + { { { 32, 0, 0 }, { 25, 47, 0 } } }, + { { { 32, 0, 1 }, { 17, 63, 0 } } }, + { { { 32, 0, 2 }, { 18, 62, 0 } } }, + { { { 33, 0, 1 }, { 18, 63, 0 } } }, + { { { 33, 0, 0 }, { 27, 46, 0 } } }, + { { { 33, 0, 1 }, { 19, 62, 0 } } }, + { { { 33, 0, 2 }, { 19, 63, 0 } } }, + { { { 34, 0, 1 }, { 20, 62, 0 } } }, + { { { 34, 0, 0 }, { 28, 47, 0 } } }, + { { { 34, 0, 1 }, { 20, 63, 0 } } }, + { { { 34, 0, 2 }, { 21, 62, 0 } } }, + { { { 35, 0, 1 }, { 21, 63, 0 } } }, + { { { 35, 0, 0 }, { 30, 46, 0 } } }, + { { { 35, 0, 1 }, { 22, 62, 0 } } }, + { { { 35, 0, 2 }, { 22, 63, 0 } } }, + { { { 36, 0, 1 }, { 23, 62, 0 } } }, + { { { 36, 0, 0 }, { 31, 47, 0 } } }, + { { { 36, 0, 1 }, { 23, 63, 0 } } }, + { { { 36, 0, 2 }, { 24, 62, 0 } } }, + { { { 37, 0, 1 }, { 24, 63, 0 } } }, + { { { 37, 0, 0 }, { 32, 47, 0 } } }, + { { { 37, 0, 1 }, { 25, 62, 0 } } }, + { { { 37, 0, 2 }, { 25, 63, 0 } } }, + { { { 38, 0, 1 }, { 26, 62, 0 } } }, + { { { 38, 0, 0 }, { 32, 50, 0 } } }, + { { { 38, 0, 1 }, { 26, 63, 0 } } }, + { { { 38, 0, 2 }, { 27, 62, 0 } } }, + { { { 39, 0, 1 }, { 27, 63, 0 } } }, + { { { 39, 0, 0 }, { 32, 53, 0 } } }, + { { { 39, 0, 1 }, { 28, 62, 0 } } }, + { { { 39, 0, 2 }, { 28, 63, 0 } } }, + { { { 40, 0, 1 }, { 29, 62, 0 } } }, + { { { 40, 0, 0 }, { 32, 56, 0 } } }, + { { { 40, 0, 1 }, { 29, 63, 0 } } }, + { { { 40, 0, 2 }, { 30, 62, 0 } } }, + { { { 41, 0, 1 }, { 30, 63, 0 } } }, + { { { 41, 0, 0 }, { 32, 59, 0 } } }, + { { { 41, 0, 1 }, { 31, 62, 0 } } }, + { { { 41, 0, 2 }, { 31, 63, 0 } } }, + { { { 42, 0, 1 }, { 32, 61, 0 } } }, + { { { 42, 0, 0 }, { 32, 62, 0 } } }, + { { { 42, 0, 1 }, { 32, 63, 0 } } }, + { { { 42, 0, 2 }, { 41, 46, 0 } } }, + { { { 43, 0, 1 }, { 33, 62, 0 } } }, + { { { 43, 0, 0 }, { 33, 63, 0 } } }, + { { { 43, 0, 1 }, { 34, 62, 0 } } }, + { { { 43, 0, 2 }, { 42, 47, 0 } } }, + { { { 44, 0, 1 }, { 34, 63, 0 } } }, + { { { 44, 0, 0 }, { 35, 62, 0 } } }, + { { { 44, 0, 1 }, { 35, 63, 0 } } }, + { { { 44, 0, 2 }, { 44, 46, 0 } } }, + { { { 45, 0, 1 }, { 36, 62, 0 } } }, + { { { 45, 0, 0 }, { 36, 63, 0 } } }, + { { { 45, 0, 1 }, { 37, 62, 0 } } }, + { { { 45, 0, 2 }, { 45, 47, 0 } } }, + { { { 46, 0, 1 }, { 37, 63, 0 } } }, + { { { 46, 0, 0 }, { 38, 62, 0 } } }, + { { { 46, 0, 1 }, { 38, 63, 0 } } }, + { { { 46, 0, 2 }, { 47, 46, 0 } } }, + { { { 47, 0, 1 }, { 39, 62, 0 } } }, + { { { 47, 0, 0 }, { 39, 63, 0 } } }, + { { { 47, 0, 1 }, { 40, 62, 0 } } }, + { { { 47, 0, 2 }, { 48, 46, 0 } } }, + { { { 48, 0, 2 }, { 40, 63, 0 } } }, + { { { 48, 0, 1 }, { 41, 62, 0 } } }, + { { { 48, 0, 0 }, { 41, 63, 0 } } }, + { { { 48, 0, 1 }, { 48, 49, 0 } } }, + { { { 48, 0, 2 }, { 42, 62, 0 } } }, + { { { 49, 0, 1 }, { 42, 63, 0 } } }, + { { { 49, 0, 0 }, { 43, 62, 0 } } }, + { { { 49, 0, 1 }, { 48, 52, 0 } } }, + { { { 49, 0, 2 }, { 43, 63, 0 } } }, + { { { 50, 0, 1 }, { 44, 62, 0 } } }, + { { { 50, 0, 0 }, { 44, 63, 0 } } }, + { { { 50, 0, 1 }, { 48, 55, 0 } } }, + { { { 50, 0, 2 }, { 45, 62, 0 } } }, + { { { 51, 0, 1 }, { 45, 63, 0 } } }, + { { { 51, 0, 0 }, { 46, 62, 0 } } }, + { { { 51, 0, 1 }, { 48, 58, 0 } } }, + { { { 51, 0, 2 }, { 46, 63, 0 } } }, + { { { 52, 0, 1 }, { 47, 62, 0 } } }, + { { { 52, 0, 0 }, { 47, 63, 0 } } }, + { { { 52, 0, 1 }, { 48, 61, 0 } } }, + { { { 52, 0, 2 }, { 48, 62, 0 } } }, + { { { 53, 0, 1 }, { 56, 47, 0 } } }, + { { { 53, 0, 0 }, { 48, 63, 0 } } }, + { { { 53, 0, 1 }, { 49, 62, 0 } } }, + { { { 53, 0, 2 }, { 49, 63, 0 } } }, + { { { 54, 0, 1 }, { 58, 46, 0 } } }, + { { { 54, 0, 0 }, { 50, 62, 0 } } }, + { { { 54, 0, 1 }, { 50, 63, 0 } } }, + { { { 54, 0, 2 }, { 51, 62, 0 } } }, + { { { 55, 0, 1 }, { 59, 47, 0 } } }, + { { { 55, 0, 0 }, { 51, 63, 0 } } }, + { { { 55, 0, 1 }, { 52, 62, 0 } } }, + { { { 55, 0, 2 }, { 52, 63, 0 } } }, + { { { 56, 0, 1 }, { 61, 46, 0 } } }, + { { { 56, 0, 0 }, { 53, 62, 0 } } }, + { { { 56, 0, 1 }, { 53, 63, 0 } } }, + { { { 56, 0, 2 }, { 54, 62, 0 } } }, + { { { 57, 0, 1 }, { 62, 47, 0 } } }, + { { { 57, 0, 0 }, { 54, 63, 0 } } }, + { { { 57, 0, 1 }, { 55, 62, 0 } } }, + { { { 57, 0, 2 }, { 55, 63, 0 } } }, + { { { 58, 0, 1 }, { 56, 62, 1 } } }, + { { { 58, 0, 0 }, { 56, 62, 0 } } }, + { { { 58, 0, 1 }, { 56, 63, 0 } } }, + { { { 58, 0, 2 }, { 57, 62, 0 } } }, + { { { 59, 0, 1 }, { 57, 63, 1 } } }, + { { { 59, 0, 0 }, { 57, 63, 0 } } }, + { { { 59, 0, 1 }, { 58, 62, 0 } } }, + { { { 59, 0, 2 }, { 58, 63, 0 } } }, + { { { 60, 0, 1 }, { 59, 62, 1 } } }, + { { { 60, 0, 0 }, { 59, 62, 0 } } }, + { { { 60, 0, 1 }, { 59, 63, 0 } } }, + { { { 60, 0, 2 }, { 60, 62, 0 } } }, + { { { 61, 0, 1 }, { 60, 63, 1 } } }, + { { { 61, 0, 0 }, { 60, 63, 0 } } }, + { { { 61, 0, 1 }, { 61, 62, 0 } } }, + { { { 61, 0, 2 }, { 61, 63, 0 } } }, + { { { 62, 0, 1 }, { 62, 62, 1 } } }, + { { { 62, 0, 0 }, { 62, 62, 0 } } }, + { { { 62, 0, 1 }, { 62, 63, 0 } } }, + { { { 62, 0, 2 }, { 63, 62, 0 } } }, + { { { 63, 0, 1 }, { 63, 63, 1 } } }, + { { { 63, 0, 0 }, { 63, 63, 0 } } } +}; diff --git a/extern/libsquish-1.15/squish.cpp b/extern/libsquish-1.15/squish.cpp new file mode 100644 index 0000000..1d22a64 --- /dev/null +++ b/extern/libsquish-1.15/squish.cpp @@ -0,0 +1,403 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include +#include "squish.h" +#include "colourset.h" +#include "maths.h" +#include "rangefit.h" +#include "clusterfit.h" +#include "colourblock.h" +#include "alpha.h" +#include "singlecolourfit.h" + +namespace squish { + +static int FixFlags( int flags ) +{ + // grab the flag bits + int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 ); + int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit ); + int extra = flags & kWeightColourByAlpha; + + // set defaults + if ( method != kDxt3 + && method != kDxt5 + && method != kBc4 + && method != kBc5 ) + { + method = kDxt1; + } + if( fit != kColourRangeFit && fit != kColourIterativeClusterFit ) + fit = kColourClusterFit; + + // done + return method | fit | extra; +} + +void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric ) +{ + // fix any bad flags + flags = FixFlags( flags ); + + if ( ( flags & ( kBc4 | kBc5 ) ) != 0 ) + { + u8 alpha[16*4]; + for( int i = 0; i < 16; ++i ) + { + alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A + } + + u8* rBlock = reinterpret_cast< u8* >( block ); + CompressAlphaDxt5( alpha, mask, rBlock ); + + if ( ( flags & ( kBc5 ) ) != 0 ) + { + for( int i = 0; i < 16; ++i ) + { + alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A + } + + u8* gBlock = reinterpret_cast< u8* >( block ) + 8; + CompressAlphaDxt5( alpha, mask, gBlock ); + } + + return; + } + + // get the block locations + void* colourBlock = block; + void* alphaBlock = block; + if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 ) + colourBlock = reinterpret_cast< u8* >( block ) + 8; + + // create the minimal point set + ColourSet colours( rgba, mask, flags ); + + // check the compression type and compress colour + if( colours.GetCount() == 1 ) + { + // always do a single colour fit + SingleColourFit fit( &colours, flags ); + fit.Compress( colourBlock ); + } + else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 ) + { + // do a range fit + RangeFit fit( &colours, flags, metric ); + fit.Compress( colourBlock ); + } + else + { + // default to a cluster fit (could be iterative or not) + ClusterFit fit( &colours, flags, metric ); + fit.Compress( colourBlock ); + } + + // compress alpha separately if necessary + if( ( flags & kDxt3 ) != 0 ) + CompressAlphaDxt3( rgba, mask, alphaBlock ); + else if( ( flags & kDxt5 ) != 0 ) + CompressAlphaDxt5( rgba, mask, alphaBlock ); +} + +void Decompress( u8* rgba, void const* block, int flags ) +{ + // fix any bad flags + flags = FixFlags( flags ); + + // get the block locations + void const* colourBlock = block; + void const* alphaBlock = block; + if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 ) + colourBlock = reinterpret_cast< u8 const* >( block ) + 8; + + // decompress colour + DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 ); + + // decompress alpha separately if necessary + if( ( flags & kDxt3 ) != 0 ) + DecompressAlphaDxt3( rgba, alphaBlock ); + else if( ( flags & kDxt5 ) != 0 ) + DecompressAlphaDxt5( rgba, alphaBlock ); +} + +int GetStorageRequirements( int width, int height, int flags ) +{ + // fix any bad flags + flags = FixFlags( flags ); + + // compute the storage requirements + int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 ); + int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; + return blockcount*blocksize; +} + +void CopyRGBA( u8 const* source, u8* dest, int flags ) +{ + if (flags & kSourceBGRA) + { + // convert from bgra to rgba + dest[0] = source[2]; + dest[1] = source[1]; + dest[2] = source[0]; + dest[3] = source[3]; + } + else + { + for( int i = 0; i < 4; ++i ) + *dest++ = *source++; + } +} + +void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric ) +{ + // fix any bad flags + flags = FixFlags( flags ); + + // loop over blocks +#ifdef SQUISH_USE_OPENMP +# pragma omp parallel for +#endif + for( int y = 0; y < height; y += 4 ) + { + // initialise the block output + u8* targetBlock = reinterpret_cast< u8* >( blocks ); + int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; + targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock; + + for( int x = 0; x < width; x += 4 ) + { + // build the 4x4 block of pixels + u8 sourceRgba[16*4]; + u8* targetPixel = sourceRgba; + int mask = 0; + for( int py = 0; py < 4; ++py ) + { + for( int px = 0; px < 4; ++px ) + { + // get the source pixel in the image + int sx = x + px; + int sy = y + py; + + // enable if we're in the image + if( sx < width && sy < height ) + { + // copy the rgba value + u8 const* sourcePixel = rgba + pitch*sy + 4*sx; + CopyRGBA(sourcePixel, targetPixel, flags); + // enable this pixel + mask |= ( 1 << ( 4*py + px ) ); + } + + // advance to the next pixel + targetPixel += 4; + } + } + + // compress it into the output + CompressMasked( sourceRgba, mask, targetBlock, flags, metric ); + + // advance + targetBlock += bytesPerBlock; + } + } +} + +void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric ) +{ + CompressImage(rgba, width, height, width*4, blocks, flags, metric); +} + +void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags ) +{ + // fix any bad flags + flags = FixFlags( flags ); + + // loop over blocks +#ifdef SQUISH_USE_OPENMP +# pragma omp parallel for +#endif + for( int y = 0; y < height; y += 4 ) + { + // initialise the block input + u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks ); + int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; + sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock; + + for( int x = 0; x < width; x += 4 ) + { + // decompress the block + u8 targetRgba[4*16]; + Decompress( targetRgba, sourceBlock, flags ); + + // write the decompressed pixels to the correct image locations + u8 const* sourcePixel = targetRgba; + for( int py = 0; py < 4; ++py ) + { + for( int px = 0; px < 4; ++px ) + { + // get the target location + int sx = x + px; + int sy = y + py; + + // write if we're in the image + if( sx < width && sy < height ) + { + // copy the rgba value + u8* targetPixel = rgba + pitch*sy + 4*sx; + CopyRGBA(sourcePixel, targetPixel, flags); + } + + // advance to the next pixel + sourcePixel += 4; + } + } + + // advance + sourceBlock += bytesPerBlock; + } + } +} + +void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags ) +{ + DecompressImage( rgba, width, height, width*4, blocks, flags ); +} + +static double ErrorSq(double x, double y) +{ + return (x - y) * (x - y); +} + +static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse) +{ + // Computes the MSE for the block and weights it by the variance of the original block. + // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel) + // then the block is close to being a single colour. Quantisation errors in single colour blocks + // are easier to see than similar errors in blocks that contain more colours, particularly when there + // are many such blocks in a large area (eg a blue sky background) as they cause banding. Given that + // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor + // of 5. This implies that images with large, single colour areas will have a higher potential WMSE + // than images with lots of detail. + + cmse = amse = 0; + unsigned int sum_p[4]; // per channel sum of pixels + unsigned int sum_p2[4]; // per channel sum of pixels squared + memset(sum_p, 0, sizeof(sum_p)); + memset(sum_p2, 0, sizeof(sum_p2)); + for( unsigned int py = 0; py < 4; ++py ) + { + for( unsigned int px = 0; px < 4; ++px ) + { + if( px < w && py < h ) + { + double pixelCMSE = 0; + for( int i = 0; i < 3; ++i ) + { + pixelCMSE += ErrorSq(original[i], compressed[i]); + sum_p[i] += original[i]; + sum_p2[i] += (unsigned int)original[i]*original[i]; + } + if( original[3] == 0 && compressed[3] == 0 ) + pixelCMSE = 0; // transparent in both, so colour is inconsequential + amse += ErrorSq(original[3], compressed[3]); + cmse += pixelCMSE; + sum_p[3] += original[3]; + sum_p2[3] += (unsigned int)original[3]*original[3]; + } + original += 4; + compressed += 4; + } + } + unsigned int variance = 0; + for( int i = 0; i < 4; ++i ) + variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i]; + if( variance < 4 * w * w * h * h ) + { + amse *= 5; + cmse *= 5; + } +} + +void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE ) +{ + // fix any bad flags + flags = FixFlags( flags ); + colourMSE = alphaMSE = 0; + + // initialise the block input + squish::u8 const* sourceBlock = dxt; + int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16; + + // loop over blocks + for( int y = 0; y < height; y += 4 ) + { + for( int x = 0; x < width; x += 4 ) + { + // decompress the block + u8 targetRgba[4*16]; + Decompress( targetRgba, sourceBlock, flags ); + u8 const* sourcePixel = targetRgba; + + // copy across to a similar pixel block + u8 originalRgba[4*16]; + u8* originalPixel = originalRgba; + + for( int py = 0; py < 4; ++py ) + { + for( int px = 0; px < 4; ++px ) + { + int sx = x + px; + int sy = y + py; + if( sx < width && sy < height ) + { + u8 const* targetPixel = rgba + pitch*sy + 4*sx; + CopyRGBA(targetPixel, originalPixel, flags); + } + sourcePixel += 4; + originalPixel += 4; + } + } + + // compute the weighted MSE of the block + double blockCMSE, blockAMSE; + ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE); + colourMSE += blockCMSE; + alphaMSE += blockAMSE; + // advance + sourceBlock += bytesPerBlock; + } + } + colourMSE /= (width * height * 3); + alphaMSE /= (width * height); +} + +void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE ) +{ + ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE); +} + +} // namespace squish diff --git a/src/nvtt/tests/CMakeLists.txt b/src/nvtt/tests/CMakeLists.txt index 4a3a6de..0cf1ab4 100644 --- a/src/nvtt/tests/CMakeLists.txt +++ b/src/nvtt/tests/CMakeLists.txt @@ -28,6 +28,9 @@ TARGET_LINK_LIBRARIES(cubemaptest nvcore nvmath nvimage nvtt) ADD_EXECUTABLE(nvhdrtest hdrtest.cpp) TARGET_LINK_LIBRARIES(nvhdrtest nvcore nvimage nvtt bc6h nvmath) +ADD_EXECUTABLE(bc1enc bc1enc.cpp) +TARGET_LINK_LIBRARIES(bc1enc nvcore nvimage nvmath nvtt squish CMP_Core) + INSTALL(TARGETS nvtestsuite nvhdrtest DESTINATION bin) #include_directories("/usr/include/ffmpeg/")