From 4ff7af50cae4890f4e2bcee784b189cb100e98a9 Mon Sep 17 00:00:00 2001 From: Ignacio Castano Date: Sun, 5 Jul 2020 23:05:07 -0700 Subject: [PATCH] Upgrade CMP Core. --- extern/CMP_Core/shaders/BC1_Encode_kernel.cpp | 489 +-- extern/CMP_Core/shaders/BC1_Encode_kernel.h | 20 +- .../CMP_Core/shaders/BC1_Encode_kernel.hlsl | 99 + extern/CMP_Core/shaders/BC2_Encode_kernel.cpp | 95 +- extern/CMP_Core/shaders/BC2_Encode_kernel.h | 2 +- .../CMP_Core/shaders/BC2_Encode_kernel.hlsl | 101 + extern/CMP_Core/shaders/BC3_Encode_kernel.cpp | 69 +- extern/CMP_Core/shaders/BC3_Encode_kernel.h | 2 +- .../CMP_Core/shaders/BC3_Encode_kernel.hlsl | 101 + extern/CMP_Core/shaders/BC4_Encode_kernel.cpp | 33 +- extern/CMP_Core/shaders/BC4_Encode_kernel.h | 2 +- .../CMP_Core/shaders/BC4_Encode_kernel.hlsl | 97 + extern/CMP_Core/shaders/BC5_Encode_kernel.cpp | 89 +- extern/CMP_Core/shaders/BC5_Encode_kernel.h | 2 +- .../CMP_Core/shaders/BC5_Encode_kernel.hlsl | 98 + extern/CMP_Core/shaders/BC6_Encode_kernel.cpp | 60 +- extern/CMP_Core/shaders/BC6_Encode_kernel.h | 34 +- .../CMP_Core/shaders/BC6_Encode_kernel.hlsl | 2572 +++++++++++ extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp | 291 +- extern/CMP_Core/shaders/BC7_Encode_Kernel.h | 162 +- .../CMP_Core/shaders/BC7_Encode_kernel.hlsl | 1936 ++++++++ extern/CMP_Core/shaders/BCn_Common_Kernel.h | 3907 +++++++++-------- extern/CMP_Core/shaders/Common_Def.h | 202 +- extern/CMP_Core/shaders/CopyFiles.bat | 73 +- extern/CMP_Core/source/CMP_Core.h | 4 +- extern/CMP_Core/source/cmp_math_func.h | 143 + extern/CMP_Core/source/cmp_math_vec4.h | 52 +- extern/CMP_Core/test/BlockConstants.h | 294 +- extern/CMP_Core/test/CMakeLists.txt | 2 + extern/CMP_Core/test/CompressonatorTests.cpp | 2089 +++++++-- 30 files changed, 10071 insertions(+), 3049 deletions(-) create mode 100644 extern/CMP_Core/shaders/BC1_Encode_kernel.hlsl create mode 100644 extern/CMP_Core/shaders/BC2_Encode_kernel.hlsl create mode 100644 extern/CMP_Core/shaders/BC3_Encode_kernel.hlsl create mode 100644 extern/CMP_Core/shaders/BC4_Encode_kernel.hlsl create mode 100644 extern/CMP_Core/shaders/BC5_Encode_kernel.hlsl create mode 100644 extern/CMP_Core/shaders/BC6_Encode_kernel.hlsl create mode 100644 extern/CMP_Core/shaders/BC7_Encode_kernel.hlsl create mode 100644 extern/CMP_Core/source/cmp_math_func.h diff --git a/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp index 4c68e42..43d831c 100644 --- a/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp +++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.cpp @@ -1,5 +1,5 @@ -//===================================================================== -// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +//============================================================================== +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -19,397 +19,117 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // -//===================================================================== -#include "BC1_Encode_kernel.h" - -//============================================== BC1 INTERFACES ======================================================= -void CompressBlockBC1_Fast( - CMP_Vec4uc srcBlockTemp[16], - CMP_GLOBAL CGU_UINT32 compressedBlock[2]) -{ - int i, k; - - CMP_Vec3f rgb; - CMP_Vec3f average_rgb; // The centrepoint of the axis - CMP_Vec3f v_rgb; // The axis - CMP_Vec3f uniques[16]; // The list of unique colours - int unique_pixels; // The number of unique pixels - CGU_FLOAT unique_recip; // Reciprocal of the above for fast multiplication - int index_map[16]; // The map of source pixels to unique indices - - CGU_FLOAT pos_on_axis[16]; // The distance each unique falls along the compression axis - CGU_FLOAT dist_from_axis[16]; // The distance each unique falls from the compression axis - CGU_FLOAT left = 0, right = 0, centre = 0; // The extremities and centre (average of left/right) of uniques along the compression axis - CGU_FLOAT axis_mapping_error = 0; // The total computed error in mapping pixels to the axis - - int swap; // Indicator if the RGB values need swapping to generate an opaque result - - // ------------------------------------------------------------------------------------- - // (3) Find the array of unique pixel values and sum them to find their average position - // ------------------------------------------------------------------------------------- - { - // Find the array of unique pixel values and sum them to find their average position - int current_pixel, firstdiff; - current_pixel = unique_pixels = 0; - average_rgb = 0.0f; - firstdiff = -1; - for (i = 0; i<16; i++) - { - for (k = 0; k 0) { rg_pos += rgb.y; rb_pos += rgb.z; } - if (rgb.z > 0) bg_pos += rgb.y; +void CompressBlockBC1_Internal( + const CMP_Vec4uc srcBlockTemp[16], + CMP_GLOBAL CGU_UINT32 compressedBlock[2], + CMP_GLOBAL CMP_BC15Options *BC15options) +{ + CGU_UINT8 srcindex = 0; + CGU_FLOAT BlockA[16]; + CGU_Vec3f rgbBlockUV[16]; + for ( CGU_INT32 j = 0; j < 4; j++) { + for ( CGU_INT32 i = 0; i < 4; i++) { + rgbBlockUV[srcindex].x = (CGU_FLOAT)(srcBlockTemp[srcindex].x & 0xFF)/ 255.0f; // R + rgbBlockUV[srcindex].y = (CGU_FLOAT)(srcBlockTemp[srcindex].y & 0xFF)/ 255.0f; // G + rgbBlockUV[srcindex].z = (CGU_FLOAT)(srcBlockTemp[srcindex].z & 0xFF)/ 255.0f; // B + srcindex++; } - v_rgb = v_rgb*unique_recip; - if (rg_pos < 0) v_rgb.x = -v_rgb.x; - if (bg_pos < 0) v_rgb.z = -v_rgb.z; - if ((rg_pos == bg_pos) && (rg_pos == 0)) - if (rb_pos < 0) v_rgb.z = -v_rgb.z; - } - - // ------------------------------------------------------------------------------------- - // (5) Axis projection and remapping - // ------------------------------------------------------------------------------------- - { - CGU_FLOAT v2_recip; - // Normalise the axis for simplicity of future calculation - v2_recip = (v_rgb.x*v_rgb.x + v_rgb.y*v_rgb.y + v_rgb.z*v_rgb.z); - if (v2_recip > 0) - v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip); - else - v2_recip = 1.0f; - v_rgb = v_rgb*v2_recip; } - // ------------------------------------------------------------------------------------- - // (6) Map the axis - // ------------------------------------------------------------------------------------- - // the line joining (and extended on either side of) average and axis - // defines the axis onto which the points will be projected - // Project all the points onto the axis, calculate the distance along - // the axis from the centre of the axis (average) - // From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is - // P + ((R-P).v) / (v.v))v - // The distance along v is therefore (R-P).v / (v.v) - // (v.v) is 1 if v is a unit vector. - // - // Calculate the extremities at the same time - these need to be reasonably accurately - // represented in all cases - // - // In this first calculation, also find the error of mapping the points to the axis - this - // is our major indicator of whether or not the block has compressed well - if the points - // map well onto the axis then most of the noise introduced is high-frequency noise + CMP_BC15Options internalOptions = *BC15options; + internalOptions = CalculateColourWeightings3f(rgbBlockUV,internalOptions); + CGU_Vec3f channelWeights = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]}; + CGU_BOOL isSRGB = internalOptions.m_bIsSRGB; // feature not supported in this section of code until v4.1 + CGU_Vec2ui cmpBlock = 0; + +//#define CMP_PRINTRESULTS +#ifdef TEST_HEATMAP + + #ifdef CMP_PRINTRESULTS + static int q1= 0,q2= 0,same = 0; + static int testnum = 0; + printf("%4d ",testnum); + #endif { - left = 10000.0f; - right = -10000.0f; - axis_mapping_error = 0; - for (i = 0; i < unique_pixels; i++) - { - // Compute the distance along the axis of the point of closest approach - CMP_Vec3f temp = (uniques[i] - average_rgb); - pos_on_axis[i] = (temp.x * v_rgb.x) + (temp.y * v_rgb.y) + (temp.z * v_rgb.z); - - // Compute the actual point and thence the mapping error - rgb = uniques[i] - (average_rgb + (v_rgb * pos_on_axis[i])); - dist_from_axis[i] = rgb.x*rgb.x + rgb.y*rgb.y + rgb.z*rgb.z; - axis_mapping_error += dist_from_axis[i]; - - // Work out the extremities - if (pos_on_axis[i] < left) - left = pos_on_axis[i]; - if (pos_on_axis[i] > right) - right = pos_on_axis[i]; - } - } - // ------------------------------------------------------------------------------------- - // (7) Now we have a good axis and the basic information about how the points are mapped - // to it - // Our initial guess is to represent the endpoints accurately, by moving the average - // to the centre and recalculating the point positions along the line - // ------------------------------------------------------------------------------------- - { - centre = (left + right) / 2; - average_rgb = average_rgb + (v_rgb*centre); - for (i = 0; im_fquality < 0.3)?CompressBC1Block_SRGB(rgbBlockUV):CompressBC1Block(rgbBlockUV); + comp2 = CompressBlockBC1_UNORM(rgbBlockUV, BC15options->m_fquality,BC15options->m_fquality < 0.3?true:false); - // ------------------------------------------------------------------------------------- - // (8) Calculate the high and low output colour values - // Involved in this is a rounding procedure which is undoubtedly slightly twitchy. A - // straight rounded average is not correct, as the decompressor 'unrounds' by replicating - // the top bits to the bottom. - // In order to take account of this process, we don't just apply a straight rounding correction, - // but base our rounding on the input value (a straight rounding is actually pretty good in terms of - // error measure, but creates a visual colour and/or brightness shift relative to the original image) - // The method used here is to apply a centre-biased rounding dependent on the input value, which was - // (mostly by experiment) found to give minimum MSE while preserving the visual characteristics of - // the image. - // rgb = (average_rgb + (left|right)*v_rgb); - // ------------------------------------------------------------------------------------- - { - CGU_UINT32 c0, c1, t; - int rd, gd, bd; - rgb = (average_rgb + (v_rgb * left)); - rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z); - gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z); - bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z); - ROUND_AND_CLAMP(rd, 5); - ROUND_AND_CLAMP(gd, 6); - ROUND_AND_CLAMP(bd, 5); - c0 = ((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3); - - rgb = average_rgb + (v_rgb * right); - rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z); - gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z); - bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z); - ROUND_AND_CLAMP(rd, 5); - ROUND_AND_CLAMP(gd, 6); - ROUND_AND_CLAMP(bd, 5); - c1 = (((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3)); - - // Force to be a 4-colour opaque block - in which case, c0 is greater than c1 - // blocktype == 4 - { - if (c0 < c1) - { - t = c0; - c0 = c1; - c1 = t; - swap = 1; - } - else if (c0 == c1) - { - // This block will always be encoded in 3-colour mode - // Need to ensure that only one of the two points gets used, - // avoiding accidentally setting some transparent pixels into the block - for (i = 0; im_fquality < 0.3)?true:false); + float err2 = CMP_RGBBlockError(rgbBlockUV,comp2,(BC15options->m_fquality < 0.3)?true:false); + err = err1-err2; } - compressedBlock[0] = c0 | (c1 << 16); - } - - // ------------------------------------------------------------------------------------- - // (9) Final clustering, creating the 2-bit values that define the output - // ------------------------------------------------------------------------------------- - { - CGU_UINT32 bit; - CGU_FLOAT division; - CGU_FLOAT cluster_x[4]; - CGU_FLOAT cluster_y[4]; - int cluster_count[4]; - - // (blocktype == 4) + if (err > 0.0f) { - compressedBlock[1] = 0; - division = right*2.0f / 3.0f; - centre = (left + right) / 2; // Actually, this code only works if centre is 0 or approximately so - - for (i = 0; i<4; i++) - { - cluster_x[i] = cluster_y[i] = 0.0f; - cluster_count[i] = 0; - } - - - for (i = 0; i<16; i++) - { - rgb.z = pos_on_axis[index_map[i]]; - // Endpoints (indicated by block > average) are 0 and 1, while - // interpolants are 2 and 3 - if (fabs(rgb.z) >= division) - bit = 0; - else - bit = 2; - // Positive is in the latter half of the block - if (rgb.z >= centre) - bit += 1; - // Set the output, taking swapping into account - compressedBlock[1] |= ((bit^swap) << (2 * i)); - - // Average the X and Y locations for each cluster - cluster_x[bit] += (CGU_FLOAT)(i & 3); - cluster_y[bit] += (CGU_FLOAT)(i >> 2); - cluster_count[bit]++; - } - - for (i = 0; i<4; i++) - { - CGU_FLOAT cr; - if (cluster_count[i]) - { - cr = 1.0f / cluster_count[i]; - cluster_x[i] *= cr; - cluster_y[i] *= cr; - } - else - { - cluster_x[i] = cluster_y[i] = -1; - } - } - - // patterns in axis position detection - // (same algorithm as used in the SSE version) - if ((compressedBlock[0] & 0xffff) != (compressedBlock[0] >> 16)) - { - CGU_UINT32 i1, k1; - CGU_UINT32 x = 0, y = 0; - int xstep = 0, ystep = 0; - - // Find a corner to search from - for (k1 = 0; k1<4; k1++) - { - switch (k1) - { - case 0: - x = 0; y = 0; xstep = 1; ystep = 1; - break; - case 1: - x = 0; y = 3; xstep = 1; ystep = -1; - break; - case 2: - x = 3; y = 0; xstep = -1; ystep = 1; - break; - case 3: - x = 3; y = 3; xstep = -1; ystep = -1; - break; - } - - for (i1 = 0; i1<4; i1++) - { - if ((POS(x, y + ystep*i1) < POS(x + xstep, y + ystep*i1)) || - (POS(x + xstep, y + ystep*i1) < POS(x + 2 * xstep, y + ystep*i1)) || - (POS(x + 2 * xstep, y + ystep*i1) < POS(x + 3 * xstep, y + ystep*i1)) - ) - break; - if ((POS(x + xstep*i1, y) < POS(x + xstep*i1, y + ystep)) || - (POS(x + xstep*i1, y + ystep) < POS(x + xstep*i1, y + 2 * ystep)) || - (POS(x + xstep*i1, y + 2 * ystep) < POS(x + xstep*i1, y + 3 * ystep)) - ) - break; - } - if (i1 == 4) - break; - } - } + cmpBlock = red; + } + else if (err < 0.0f) { + cmpBlock = green; + } + else { + cmpBlock = blue; } - } - // done -} + #ifdef CMP_PRINTRESULTS + printf("Q1 [%4X:%4X] %.3f, ",cmpBlockQ1.x,cmpBlockQ1.y,err1); + printf("Q2 [%4X:%4X] %.3f, ",cmpBlock.x,cmpBlock.y ,err2); + testnum++; + #endif +#else -INLINE void store_uint8(CMP_GLOBAL CGU_UINT8 u_dstptr[8], CGU_UINT32 data[2]) -{ - int shift = 0; - for (CGU_INT k=0; k<4; k++) - { - u_dstptr[k] = (data[0] >> shift)&0xFF; - shift += 8; - } - shift = 0; - for (CGU_INT k=4; k<8; k++) - { - u_dstptr[k] = (data[1] >> shift)&0xFF; - shift += 8; - } -} + // printf("q = %f\n",internalOptions.m_fquality); + cmpBlock = CompressBlockBC1_RGBA_Internal( + rgbBlockUV, + BlockA, + channelWeights, + 0, //internalOptions.m_nAlphaThreshold, bug to investigate in debug is ok release has issue! + 1, + internalOptions.m_fquality, + isSRGB + ); +#endif + compressedBlock[0] = cmpBlock.x; + compressedBlock[1] = cmpBlock.y; -void CompressBlockBC1_Internal( - const CMP_Vec4uc srcBlockTemp[16], - CMP_GLOBAL CGU_UINT32 compressedBlock[2], - CMP_GLOBAL const CMP_BC15Options *BC15options) -{ - CGU_UINT8 blkindex = 0; - CGU_UINT8 srcindex = 0; - CGU_UINT8 rgbBlock[64]; - for ( CGU_INT32 j = 0; j < 4; j++) { - for ( CGU_INT32 i = 0; i < 4; i++) { - rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B - rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G - rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R - rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A - srcindex++; - } - } - CMP_BC15Options internalOptions = *BC15options; - CalculateColourWeightings(rgbBlock, &internalOptions); - - CompressRGBBlock(rgbBlock, - compressedBlock, - &internalOptions, - TRUE, - FALSE, - internalOptions.m_nAlphaThreshold); } +#endif -//============================================== USER INTERFACES ======================================================== +//============================================== CPU USER INTERFACES ======================================================== #ifndef ASPM_GPU int CMP_CDECL CreateOptionsBC1(void **options) { @@ -528,15 +248,27 @@ int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8], BC15options = &BC15optionsDefault; SetDefaultBC15Options(BC15options); } - DecompressDXTRGB_Internal(srcBlock, ( CGU_UINT32 *)cmpBlock, BC15options); + CGU_Vec2ui compBlock; + + compBlock.x = (CGU_UINT32)cmpBlock[3] << 24 | + (CGU_UINT32)cmpBlock[2] << 16 | + (CGU_UINT32)cmpBlock[1] << 8 | + (CGU_UINT32)cmpBlock[0]; + + compBlock.y = (CGU_UINT32)cmpBlock[7] << 24 | + (CGU_UINT32)cmpBlock[6] << 16 | + (CGU_UINT32)cmpBlock[5] << 8 | + (CGU_UINT32)cmpBlock[4]; + + cmp_decompressDXTRGBA_Internal(srcBlock, compBlock, BC15options->m_mapDecodeRGBA); return CGU_CORE_OK; } #endif //============================================== OpenCL USER INTERFACE ======================================================== -#ifdef ASPM_GPU +#ifdef ASPM_OPENCL CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_GLOBAL const CMP_Vec4uc* ImageSource, CMP_GLOBAL CGU_UINT8* ImageDestination, @@ -547,14 +279,10 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CGU_UINT32 xID; CGU_UINT32 yID; -//printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality); -#ifdef ASPM_GPU + //printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality); xID = get_global_id(0); yID = get_global_id(1); -#else - xID = 0; - yID = 0; -#endif + if (xID >= (SourceInfo->m_src_width / BlockX)) return; if (yID >= (SourceInfo->m_src_height / BlockX)) return; @@ -572,11 +300,6 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( } srcindex += srcWidth; } - - // fast low quality mode that matches v3.1 code - if (SourceInfo->m_fquality <= 0.04f) - CompressBlockBC1_Fast(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI]); - else - CompressBlockBC1_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options); + CompressBlockBC1_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options); } #endif diff --git a/extern/CMP_Core/shaders/BC1_Encode_kernel.h b/extern/CMP_Core/shaders/BC1_Encode_kernel.h index 73a0acf..ac1f5d0 100644 --- a/extern/CMP_Core/shaders/BC1_Encode_kernel.h +++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.h @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -26,23 +26,5 @@ #include "Common_Def.h" #include "BCn_Common_Kernel.h" -#define CS_RED(r, g, b) (r) -#define CS_GREEN(r, g, b) (g) -#define CS_BLUE(r, g, b) ((b+g)*0.5f) -#define DCS_RED(r, g, b) (r) -#define DCS_GREEN(r, g, b) (g) -#define DCS_BLUE(r, g, b) ((2.0f*b)-g) -#define BYTEPP 4 -#define BC1CompBlockSize 8 - - -#define ROUND_AND_CLAMP(v, shift) \ -{\ - if (v < 0) v = 0;\ - else if (v > 255) v = 255;\ - else v += (0x80>>shift) - (v>>shift);\ -} - -#define POS(x,y) (pos_on_axis[(x)+(y)*4]) #endif \ No newline at end of file diff --git a/extern/CMP_Core/shaders/BC1_Encode_kernel.hlsl b/extern/CMP_Core/shaders/BC1_Encode_kernel.hlsl new file mode 100644 index 0000000..46b1b6b --- /dev/null +++ b/extern/CMP_Core/shaders/BC1_Encode_kernel.hlsl @@ -0,0 +1,99 @@ +//===================================================================== +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// File: BC1_Encode_kernel.hlsl +//-------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//-------------------------------------------------------------------------------------- +#ifndef ASPM_HLSL +#define ASPM_HLSL +#endif + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; + float g_alpha_weight; + float g_quality; +}; + +#include "BCn_Common_Kernel.h" + +// Source Data +Texture2D g_Input : register( t0 ); +StructuredBuffer g_InBuff : register( t1 ); + +// Compressed Output Data +RWStructuredBuffer g_OutBuff : register( u0 ); + +// Processing multiple blocks at a time +#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block +#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4 +#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y) +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 + +groupshared float4 shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + // we process 4 BC blocks per thread group + uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on + uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group + uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block + + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + + // Load up the pixels + if (pixelInBlock < 16) + { + // load pixels (0..1) + shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) )); + } + + GroupMemoryBarrierWithGroupSync(); + + // Process and save s + if (pixelInBlock == 0) + { + float3 block[16]; + for (int i = 0; i < 16; i++ ) + { + block[i].x = shared_temp[pixelBase + i].x; + block[i].y = shared_temp[pixelBase + i].y; + block[i].z = shared_temp[pixelBase + i].z; + } + + g_OutBuff[blockID] = CompressBlockBC1_UNORM(block,g_quality,false); + } +} diff --git a/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp index a8b355b..a1a26d0 100644 --- a/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp +++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.cpp @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -20,70 +20,50 @@ // THE SOFTWARE. // //===================================================================== + #include "BC2_Encode_kernel.h" //============================================== BC2 INTERFACES ======================================================= -void DXTCV11CompressExplicitAlphaBlock(const CGU_UINT8 block_8[16], CMP_GLOBAL CGU_UINT32 block_dxtc[2]) -{ - CGU_UINT8 i; - block_dxtc[0] = block_dxtc[1] = 0; - for (i = 0; i < 16; i++) - { - int v = block_8[i]; - v = (v + 7 - (v >> 4)); - v >>= 4; - if (v < 0) - v = 0; - if (v > 0xf) - v = 0xf; - if (i < 8) - block_dxtc[0] |= v << (4 * i); - else - block_dxtc[1] |= v << (4 * (i - 8)); - } -} - -#define EXPLICIT_ALPHA_PIXEL_MASK 0xf -#define EXPLICIT_ALPHA_PIXEL_BPP 4 - -CGU_INT CompressExplicitAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], - CMP_GLOBAL CGU_UINT32 compressedBlock[2]) -{ - DXTCV11CompressExplicitAlphaBlock(alphaBlock, compressedBlock); - return CGU_CORE_OK; -} - void CompressBlockBC2_Internal(const CMP_Vec4uc srcBlockTemp[16], CMP_GLOBAL CGU_UINT32 compressedBlock[4], CMP_GLOBAL const CMP_BC15Options *BC15options) { - CGU_UINT8 blkindex = 0; - CGU_UINT8 srcindex = 0; - CGU_UINT8 rgbaBlock[64]; - for (CGU_INT32 j = 0; j < 4; j++) { - for (CGU_INT32 i = 0; i < 4; i++) { - rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B - rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G - rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R - rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A - srcindex++; - } + + CGU_Vec2ui cmpBlock; + CGU_Vec3f rgbBlock[16]; + CGU_FLOAT BlockA[16]; + + for (CGU_INT32 i = 0; i < 16; i++) { + rgbBlock[i].x = (CGU_FLOAT)(srcBlockTemp[i].x & 0xFF)/255.0f; // R + rgbBlock[i].y = (CGU_FLOAT)(srcBlockTemp[i].y & 0xFF)/255.0f; // G + rgbBlock[i].z = (CGU_FLOAT)(srcBlockTemp[i].z & 0xFF)/255.0f; // B + BlockA[i] = (CGU_FLOAT)(srcBlockTemp[i].w & 0xFF)/255.0f; // A } - CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; - for (CGU_INT32 i = 0; i < 16; i++) - alphaBlock[i] = (CGU_UINT8)(((CGU_INT32*)rgbaBlock)[i] >> RGBA8888_OFFSET_A); + cmpBlock = cmp_compressExplicitAlphaBlock(BlockA); + compressedBlock[DXTC_OFFSET_ALPHA ] = cmpBlock.x; + compressedBlock[DXTC_OFFSET_ALPHA+1] = cmpBlock.y; // Need a copy, as CalculateColourWeightings sets variables in the BC15options CMP_BC15Options internalOptions = *BC15options; - CalculateColourWeightings(rgbaBlock, &internalOptions); - - CGU_INT err = CompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); - if (err != 0) - return; + internalOptions = CalculateColourWeightings3f(rgbBlock, internalOptions); + internalOptions.m_bUseAlpha = false; + CGU_Vec3f channelWeights = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]}; + CGU_Vec3f MinColor = {0,0,0}, MaxColor={0,0,0}; + + cmpBlock = CompressBlockBC1_RGBA_Internal( + rgbBlock, + BlockA, + channelWeights, + 0,//internalOptions.m_nAlphaThreshold, + 1, //internalOptions.m_nRefinementSteps + internalOptions.m_fquality, + FALSE); + + compressedBlock[DXTC_OFFSET_RGB] = cmpBlock.x; + compressedBlock[DXTC_OFFSET_RGB+1] = cmpBlock.y; - CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,FALSE,FALSE,0); } //============================================== USER INTERFACES ======================================================== @@ -141,6 +121,9 @@ int CMP_CDECL SetChannelWeightsBC2(void *options, return CGU_CORE_OK; } +#define EXPLICIT_ALPHA_PIXEL_MASK 0xf +#define EXPLICIT_ALPHA_PIXEL_BPP 4 + // Decompresses an explicit alpha block (DXT3) void DecompressExplicitAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], const CGU_UINT32 compressedBlock[2]) @@ -160,7 +143,13 @@ void DecompressBC2_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[BLOCK_SIZE_4X4X4], CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; DecompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); - DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options); + + + CGU_Vec2ui compBlock; + compBlock.x = compressedBlock[DXTC_OFFSET_RGB]; + compBlock.y = compressedBlock[DXTC_OFFSET_RGB+1]; + + cmp_decompressDXTRGBA_Internal(rgbaBlock, compBlock,BC15options->m_mapDecodeRGBA); for (CGU_UINT32 i = 0; i < 16; i++) ((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] = (alphaBlock[i] << RGBA8888_OFFSET_A) | (((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] & ~(BYTE_MASK << RGBA8888_OFFSET_A)); @@ -219,7 +208,7 @@ int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16], #endif //============================================== OpenCL USER INTERFACE ======================================================== -#ifdef ASPM_GPU +#ifdef ASPM_OPENCL CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_GLOBAL const CMP_Vec4uc* ImageSource, CMP_GLOBAL CGU_UINT8* ImageDestination, diff --git a/extern/CMP_Core/shaders/BC2_Encode_kernel.h b/extern/CMP_Core/shaders/BC2_Encode_kernel.h index a152751..4b1487d 100644 --- a/extern/CMP_Core/shaders/BC2_Encode_kernel.h +++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.h @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal diff --git a/extern/CMP_Core/shaders/BC2_Encode_kernel.hlsl b/extern/CMP_Core/shaders/BC2_Encode_kernel.hlsl new file mode 100644 index 0000000..0af9bf1 --- /dev/null +++ b/extern/CMP_Core/shaders/BC2_Encode_kernel.hlsl @@ -0,0 +1,101 @@ +//===================================================================== +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// File: BC1Encode.hlsl +//-------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//-------------------------------------------------------------------------------------- +#ifndef ASPM_HLSL +#define ASPM_HLSL +#endif + + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; + float g_alpha_weight; + float g_quality; +}; + +#include "BCn_Common_Kernel.h" + +// Source Data +Texture2D g_Input : register( t0 ); +StructuredBuffer g_InBuff : register( t1 ); + +// Compressed Output Data +RWStructuredBuffer g_OutBuff : register( u0 ); + +// Processing multiple blocks at a time +#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block +#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4 +#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y) +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 + +groupshared float4 shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + // we process 4 BC blocks per thread group + uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on + uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group + uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block + + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + + // Load up the pixels + if (pixelInBlock < 16) + { + // load pixels (0..1) + shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) )); + } + + GroupMemoryBarrierWithGroupSync(); + + // Process and save s + if (pixelInBlock == 0) + { + float3 blockRGB[16]; + float blockA[16]; + for (int i = 0; i < 16; i++ ) + { + blockRGB[i].x = shared_temp[pixelBase + i].x; + blockRGB[i].y = shared_temp[pixelBase + i].y; + blockRGB[i].z = shared_temp[pixelBase + i].z; + blockA[i] = shared_temp[pixelBase + i].w; + } + g_OutBuff[blockID] = CompressBlockBC2_UNORM(blockRGB,blockA,g_quality,false); + } +} diff --git a/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp index 8fc30e6..69b57dd 100644 --- a/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp +++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.cpp @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -23,37 +23,50 @@ #include "BC3_Encode_kernel.h" //============================================== BC3 INTERFACES ======================================================= +#ifndef ASPM_HLSL void CompressBlockBC3_Internal(const CMP_Vec4uc srcBlockTemp[16], CMP_GLOBAL CGU_UINT32 compressedBlock[4], - CMP_GLOBAL const CMP_BC15Options *BC15options) { - CGU_UINT8 blkindex = 0; - CGU_UINT8 srcindex = 0; - CGU_UINT8 rgbaBlock[64]; - for (CGU_INT32 j = 0; j < 4; j++) { - for (CGU_INT32 i = 0; i < 4; i++) { - rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B - rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G - rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R - rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A - srcindex++; - } + CMP_GLOBAL CMP_BC15Options *BC15options) { + CGU_Vec3f rgbBlock[16]; + CGU_FLOAT alphaBlock[BLOCK_SIZE_4X4]; + + for (CGU_INT32 i = 0; i < 16; i++) { + rgbBlock[i].x = (CGU_FLOAT)(srcBlockTemp[i].x & 0xFF)/255; // R + rgbBlock[i].y = (CGU_FLOAT)(srcBlockTemp[i].y & 0xFF)/255; // G + rgbBlock[i].z = (CGU_FLOAT)(srcBlockTemp[i].z & 0xFF)/255; // B + alphaBlock[i] = (CGU_FLOAT)(srcBlockTemp[i].w) / 255.0f; } - CMP_BC15Options internalOptions = *BC15options; - CalculateColourWeightings(rgbaBlock, &internalOptions); + CMP_BC15Options internalOptions = *BC15options; - CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; - for (CGU_INT32 i = 0; i < 16; i++) - alphaBlock[i] = - (CGU_UINT8)(((CGU_INT32 *)rgbaBlock)[i] >> RGBA8888_OFFSET_A); + CGU_Vec2ui cmpBlock; + + cmpBlock = cmp_compressAlphaBlock(alphaBlock,internalOptions.m_fquality); + compressedBlock[0] = cmpBlock.x; + compressedBlock[1] = cmpBlock.y; + + for (CGU_INT32 i = 0; i < 16; i++) { + alphaBlock[i] = (CGU_FLOAT)(srcBlockTemp[i].w); + } + + internalOptions = CalculateColourWeightings3f(rgbBlock, internalOptions); + CGU_Vec3f channelWeights = {internalOptions.m_fChannelWeights[0],internalOptions.m_fChannelWeights[1],internalOptions.m_fChannelWeights[2]}; + + cmpBlock = CompressBlockBC1_RGBA_Internal( + rgbBlock, + alphaBlock, + channelWeights, + 0, // internalOptions.m_nAlphaThreshold, + 1, // internalOptions.m_nRefinementSteps + internalOptions.m_fquality, + FALSE); - CGU_INT err = CompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); - if (err != 0) return; - CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions, - FALSE, FALSE, 0); + compressedBlock[2] = cmpBlock.x; + compressedBlock[3] = cmpBlock.y; } +#endif //============================================== USER INTERFACES ======================================================== #ifndef ASPM_GPU @@ -117,8 +130,12 @@ void DecompressBC3_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], const CMP_BC15Options *BC15options) { CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; - DecompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); - DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options); + cmp_decompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]); + + CGU_Vec2ui compBlock; + compBlock.x = compressedBlock[DXTC_OFFSET_RGB]; + compBlock.y = compressedBlock[DXTC_OFFSET_RGB+1]; + cmp_decompressDXTRGBA_Internal(rgbaBlock, compBlock,BC15options->m_mapDecodeRGBA); for (CGU_UINT32 i = 0; i < 16; i++) ((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] = @@ -178,7 +195,7 @@ int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16], #endif //============================================== OpenCL USER INTERFACE ==================================================== -#ifdef ASPM_GPU +#ifdef ASPM_OPENCL CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_GLOBAL const CMP_Vec4uc *ImageSource, CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo, diff --git a/extern/CMP_Core/shaders/BC3_Encode_kernel.h b/extern/CMP_Core/shaders/BC3_Encode_kernel.h index 9e97da1..0deb0cf 100644 --- a/extern/CMP_Core/shaders/BC3_Encode_kernel.h +++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.h @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal diff --git a/extern/CMP_Core/shaders/BC3_Encode_kernel.hlsl b/extern/CMP_Core/shaders/BC3_Encode_kernel.hlsl new file mode 100644 index 0000000..03f1cff --- /dev/null +++ b/extern/CMP_Core/shaders/BC3_Encode_kernel.hlsl @@ -0,0 +1,101 @@ +//===================================================================== +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// File: BC1Encode.hlsl +//-------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//-------------------------------------------------------------------------------------- +#ifndef ASPM_HLSL +#define ASPM_HLSL +#endif + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; + float g_alpha_weight; + float g_quality; +}; + +#include "BCn_Common_Kernel.h" + +// Source Data +Texture2D g_Input : register( t0 ); +StructuredBuffer g_InBuff : register( t1 ); + +// Compressed Output Data +RWStructuredBuffer g_OutBuff : register( u0 ); + +// Processing multiple blocks at a time +#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block +#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4 +#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y) +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 + +groupshared float4 shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + // we process 4 BC blocks per thread group + uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on + uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group + uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block + + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + + // Load up the pixels + if (pixelInBlock < 16) + { + // load pixels (0..1) + shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) )); + } + + GroupMemoryBarrierWithGroupSync(); + + // Process and save s + if (pixelInBlock == 0) + { + float3 blockRGB[16]; + float blockA[16]; + for (int i = 0; i < 16; i++ ) + { + blockRGB[i].x = shared_temp[pixelBase + i].x; + blockRGB[i].y = shared_temp[pixelBase + i].y; + blockRGB[i].z = shared_temp[pixelBase + i].z; + blockA[i] = shared_temp[pixelBase + i].w; + } + + g_OutBuff[blockID] = CompressBlockBC3_UNORM(blockRGB,blockA, g_quality,false); + } +} diff --git a/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp index 6242cf8..accb374 100644 --- a/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp +++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.cpp @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -32,15 +32,19 @@ void CompressBlockBC4_Internal(const CMP_Vec4uc srcBlockTemp[16], } CGU_UINT8 blkindex = 0; CGU_UINT8 srcindex = 0; - CGU_UINT8 alphaBlock[16]; + CGU_FLOAT alphaBlock[16]; for (CGU_INT32 j = 0; j < 4; j++) { for (CGU_INT32 i = 0; i < 4; i++) { - alphaBlock[blkindex++] = - (CGU_UINT8)srcBlockTemp[srcindex].x; // Red channel + alphaBlock[blkindex++] = srcBlockTemp[srcindex].x / 255.0f; // Red channel srcindex++; } } - CompressAlphaBlock(alphaBlock, (CMP_GLOBAL CGU_UINT32 *)compressedBlock); + + CGU_Vec2ui cmpBlock; + + cmpBlock = cmp_compressAlphaBlock(alphaBlock,BC15options->m_fquality); + compressedBlock[0] = cmpBlock.x; + compressedBlock[1] = cmpBlock.y; } void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], @@ -48,7 +52,7 @@ void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], const CMP_BC15Options *BC15options) { if (BC15options) {} CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4]; - DecompressAlphaBlock(alphaBlock, compressedBlock); + cmp_decompressAlphaBlock(alphaBlock, compressedBlock); CGU_UINT8 blkindex = 0; CGU_UINT8 srcindex = 0; @@ -63,18 +67,27 @@ void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], } } -void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[16], +void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[BLOCK_SIZE_4X4], CMP_GLOBAL CGU_UINT32 compressedBlock[2], CMP_GLOBAL const CMP_BC15Options *BC15options) { if (BC15options) {} - CompressAlphaBlock(srcBlockTemp, (CMP_GLOBAL CGU_UINT32 *)compressedBlock); + CGU_FLOAT alphaBlock[BLOCK_SIZE_4X4]; + + for (CGU_INT32 i = 0; i < BLOCK_SIZE_4X4; i++) alphaBlock[i] = (srcBlockTemp[i] / 255.0f); + + CGU_Vec2ui cmpBlock; + cmpBlock = cmp_compressAlphaBlock(alphaBlock,BC15options->m_fquality); + compressedBlock[0] = cmpBlock.x; + compressedBlock[1] = cmpBlock.y; + + } void DecompressBlockBC4_SingleChannel(CGU_UINT8 srcBlockTemp[16], const CGU_UINT32 compressedBlock[2], const CMP_BC15Options *BC15options) { if (BC15options) {} - DecompressAlphaBlock(srcBlockTemp, compressedBlock); + cmp_decompressAlphaBlock(srcBlockTemp, compressedBlock); } //============================================== USER INTERFACES ======================================================== @@ -161,7 +174,7 @@ int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8], #endif //============================================== OpenCL USER INTERFACE ==================================================== -#ifdef ASPM_GPU +#ifdef ASPM_OPENCL CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_GLOBAL const CMP_Vec4uc *ImageSource, CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo, diff --git a/extern/CMP_Core/shaders/BC4_Encode_kernel.h b/extern/CMP_Core/shaders/BC4_Encode_kernel.h index 65af4a7..62648cf 100644 --- a/extern/CMP_Core/shaders/BC4_Encode_kernel.h +++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.h @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal diff --git a/extern/CMP_Core/shaders/BC4_Encode_kernel.hlsl b/extern/CMP_Core/shaders/BC4_Encode_kernel.hlsl new file mode 100644 index 0000000..4f40ce1 --- /dev/null +++ b/extern/CMP_Core/shaders/BC4_Encode_kernel.hlsl @@ -0,0 +1,97 @@ +//===================================================================== +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// File: BC4Encode.hlsl +//-------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//-------------------------------------------------------------------------------------- +#ifndef ASPM_HLSL +#define ASPM_HLSL +#endif + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; + float g_alpha_weight; + float g_quality; +}; + +#include "BCn_Common_Kernel.h" + +// Source Data +Texture2D g_Input : register( t0 ); +StructuredBuffer g_InBuff : register( t1 ); + +// Compressed Output Data +RWStructuredBuffer g_OutBuff : register( u0 ); + +// Processing multiple blocks at a time +#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block +#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4 +#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y) +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 + +groupshared float4 shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + // we process 4 BC blocks per thread group + uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on + uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group + uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block + + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + + // Load up the pixels + if (pixelInBlock < 16) + { + // load pixels (0..1) + shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) )); + } + + GroupMemoryBarrierWithGroupSync(); + + // Process and save s + if (pixelInBlock == 0) + { + float block[16]; + // covert back to UV for processing!! + for ( uint i = 0; i < 16; i ++ ) + { + block[i].x = shared_temp[pixelBase + i].x; + } + g_OutBuff[blockID] = CompressBlockBC4_UNORM(block, g_quality); + } +} diff --git a/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp index d4784dd..5862478 100644 --- a/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp +++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.cpp @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -24,36 +24,45 @@ //============================================== BC5 INTERFACES ======================================================= -void CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16], +CGU_Vec4ui CompressBC5Block_Internal(CMP_IN CGU_FLOAT aBlockU[16], CMP_IN CGU_FLOAT aBlockV[16], CMP_IN CGU_FLOAT fquality) +{ + CGU_Vec4ui compBlock; + CGU_Vec2ui cmpBlock; + + cmpBlock = cmp_compressAlphaBlock(aBlockU,fquality); + compBlock.x = cmpBlock.x; + compBlock.y = cmpBlock.y; + + cmpBlock = cmp_compressAlphaBlock(aBlockV,fquality); + compBlock.z = cmpBlock.x; + compBlock.w = cmpBlock.y; + return compBlock; +} + +#ifndef ASPM_HLSL +void CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16], // range 0 to 255 CMP_GLOBAL CGU_UINT32 compressedBlock[4], CMP_GLOBAL CMP_BC15Options *BC15options) { - if (BC15options->m_fquality) { - // Resreved - } - CGU_UINT8 blkindex = 0; - CGU_UINT8 srcindex = 0; - CGU_UINT8 alphaBlock[16]; - for (CGU_INT32 j = 0; j < 4; j++) { - for (CGU_INT32 i = 0; i < 4; i++) { - alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // Red channel - srcindex++; - } - } - CompressAlphaBlock(alphaBlock,&compressedBlock[0]); - - blkindex = 0; - srcindex = 0; - for (CGU_INT32 j = 0; j < 4; j++) { - for (CGU_INT32 i = 0; i < 4; i++) { - alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // Green channel - srcindex++; - } + CGU_Vec4ui cmpBlock; + CGU_FLOAT alphaBlockU[16]; + CGU_FLOAT alphaBlockV[16]; + CGU_UINT32 i; + + for (i = 0; i < 16; i++) { + alphaBlockU[i] = srcBlockTemp[i].x / 255.0f; + alphaBlockV[i] = srcBlockTemp[i].y / 255.0f; } - CompressAlphaBlock(alphaBlock,&compressedBlock[2]); + cmpBlock = CompressBC5Block_Internal(alphaBlockU, alphaBlockV,BC15options->m_fquality); + compressedBlock[0] = cmpBlock.x; + compressedBlock[1] = cmpBlock.y; + compressedBlock[2] = cmpBlock.z; + compressedBlock[3] = cmpBlock.w; } +#endif +#ifndef ASPM_GPU void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], CGU_UINT32 compressedBlock[4], CMP_BC15Options *BC15options) @@ -61,8 +70,8 @@ void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4]; CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4]; - DecompressAlphaBlock(alphaBlockR, &compressedBlock[0]); - DecompressAlphaBlock(alphaBlockG, &compressedBlock[2]); + cmp_decompressAlphaBlock(alphaBlockR, &compressedBlock[0]); + cmp_decompressAlphaBlock(alphaBlockG, &compressedBlock[2]); CGU_UINT8 blkindex = 0; CGU_UINT8 srcindex = 0; @@ -94,15 +103,29 @@ void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64], } - void CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[16], const CGU_UINT8 srcBlockG[16], CMP_GLOBAL CGU_UINT32 compressedBlock[4], CMP_GLOBAL const CMP_BC15Options *BC15options) { if (BC15options) {} - CompressAlphaBlock(srcBlockR,&compressedBlock[0]); - CompressAlphaBlock(srcBlockG,&compressedBlock[2]); + CGU_Vec2ui cmpBlock; + CGU_FLOAT srcAlphaRF[16]; + CGU_FLOAT srcAlphaGF[16]; + + for (CGU_INT i =0; i< 16; i++) + { + srcAlphaRF[i] = srcBlockR[i]; + srcAlphaGF[i] = srcBlockG[i]; + } + + cmpBlock = cmp_compressAlphaBlock(srcAlphaRF,BC15options->m_fquality); + compressedBlock[0] = cmpBlock.x; + compressedBlock[1] = cmpBlock.y; + + cmpBlock = cmp_compressAlphaBlock(srcAlphaGF,BC15options->m_fquality); + compressedBlock[2] = cmpBlock.x; + compressedBlock[3] = cmpBlock.y; } void DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16], @@ -111,10 +134,10 @@ void DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16], const CMP_BC15Options *BC15options) { if (BC15options) {} - DecompressAlphaBlock(srcBlockR, &compressedBlock[0]); - DecompressAlphaBlock(srcBlockG, &compressedBlock[2]); + cmp_decompressAlphaBlock(srcBlockR, &compressedBlock[0]); + cmp_decompressAlphaBlock(srcBlockG, &compressedBlock[2]); } - +#endif //============================================== USER INTERFACES ======================================================== #ifndef ASPM_GPU @@ -224,7 +247,7 @@ int CMP_CDECL DecompressBlockBC5(const CGU_UINT8 cmpBlock[16], #endif //============================================== OpenCL USER INTERFACE ==================================================== -#ifdef ASPM_GPU +#ifdef ASPM_OPENCL CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL const CMP_Vec4uc* ImageSource, CMP_GLOBAL CGU_UINT8* ImageDestination, CMP_GLOBAL Source_Info* SourceInfo, diff --git a/extern/CMP_Core/shaders/BC5_Encode_kernel.h b/extern/CMP_Core/shaders/BC5_Encode_kernel.h index 89cffcc..6c604a5 100644 --- a/extern/CMP_Core/shaders/BC5_Encode_kernel.h +++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.h @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal diff --git a/extern/CMP_Core/shaders/BC5_Encode_kernel.hlsl b/extern/CMP_Core/shaders/BC5_Encode_kernel.hlsl new file mode 100644 index 0000000..0719e7c --- /dev/null +++ b/extern/CMP_Core/shaders/BC5_Encode_kernel.hlsl @@ -0,0 +1,98 @@ +//===================================================================== +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// File: BC1Encode.hlsl +//-------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//-------------------------------------------------------------------------------------- +#ifndef ASPM_HLSL +#define ASPM_HLSL +#endif + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; + float g_alpha_weight; + float g_quality; +}; + +#include "BCn_Common_Kernel.h" + +// Source Data +Texture2D g_Input : register( t0 ); +StructuredBuffer g_InBuff : register( t1 ); + +// Compressed Output Data +RWStructuredBuffer g_OutBuff : register( u0 ); + +// Processing multiple blocks at a time +#define MAX_USED_THREAD 16 // pixels in a BC (block compressed) block +#define BLOCK_IN_GROUP 4 // the number of BC blocks a thread group processes = 64 / 16 = 4 +#define THREAD_GROUP_SIZE 64 // 4 blocks where a block is (BLOCK_SIZE_X x BLOCK_SIZE_Y) +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 + +groupshared float4 shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + // we process 4 BC blocks per thread group + uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on + uint pixelBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group + uint pixelInBlock = GI - pixelBase; // id of the pixel in this BC block + + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + + // Load up the pixels + if (pixelInBlock < 16) + { + // load pixels (0..1) + shared_temp[GI] = float4(g_Input.Load( uint3( base_x + pixelInBlock % 4, base_y + pixelInBlock / 4, 0 ) )); + } + + GroupMemoryBarrierWithGroupSync(); + + // Process and save s + if (pixelInBlock == 0) + { + float blockU[16]; + float blockV[16]; + for ( uint i = 0; i < 16; i ++ ) + { + blockU[i] = shared_temp[pixelBase + i].x; + blockV[i] = shared_temp[pixelBase + i].y; + } + g_OutBuff[blockID] = CompressBlockBC5_UNORM(blockU,blockV,g_quality); + } +} diff --git a/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp b/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp index f131583..6b3ea72 100644 --- a/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp +++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.cpp @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -29,11 +29,11 @@ void memset(CGU_UINT8 *srcdata, CGU_UINT8 value, CGU_INT size) *srcdata++ = value; } -void memcpy(CGU_UINT8 *srcdata, CGU_UINT8 *dstdata, CGU_INT size) +void memcpy(CGU_UINT8 *dstdata, CGU_UINT8 *srcdata, CGU_INT size) { for (CGU_INT i = 0; i < size; i++) { - *srcdata = *dstdata; + *dstdata = *srcdata; srcdata++; dstdata++; } @@ -509,7 +509,7 @@ CGU_FLOAT totalError_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_FLOAT // index, uncentered, in the range 0..k-1 // -void quant_AnD_Shell(CGU_FLOAT* v_, CGU_INT k, CGU_INT n, CGU_INT *idx) +void quant_AnD_Shell(CGU_FLOAT* v_, CGU_INT k, CGU_INT n, CGU_INT idx[MAX_ENTRIES]) { #define MAX_BLOCK MAX_ENTRIES CGU_INT i, j; @@ -2530,7 +2530,7 @@ CGU_INT Unquantize(CGU_INT comp, unsigned char uBitsPerComp, CGU_BOOL bSigned) return unq; } -CGU_INT finish_unquantizeF16(CGU_INT q, CGU_BOOL isSigned) +CGU_INT finish_unquantizef16(CGU_INT q, CGU_BOOL isSigned) { // Is it F16 Signed else F16 Unsigned if (isSigned) @@ -2565,8 +2565,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); // F16 format - outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); - outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false); } } else @@ -2581,8 +2581,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); // F16 format - outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); - outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false); } } @@ -2602,8 +2602,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); // F16 format - outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); - outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false); } } else @@ -2618,8 +2618,8 @@ void decompress_endpoints1(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false); // F16 format - outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); - outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); + outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false); } } } @@ -2659,10 +2659,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, true); // F16 format - outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], true); - outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], true); - outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], true); - outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], true); + outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], true); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], true); + outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], true); + outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], true); } } @@ -2682,10 +2682,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); // nbits to F16 format - outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); - outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); - outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); - outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); + outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false); + outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false); + outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false); } } @@ -2713,10 +2713,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); // nbits to F16 format - outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); - outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); - outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); - outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); + outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false); + outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false); + outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false); } } @@ -2736,10 +2736,10 @@ void decompress_endpoints2(BC6H_Encode_local * bc6h_format, CGU_INT oEndPoints[M out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false); // nbits to F16 format - outf[0][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][0][i], false); - outf[0][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[0][1][i], false); - outf[1][0][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][0][i], false); - outf[1][1][i] = (CGU_FLOAT)finish_unquantizeF16((int)out[1][1][i], false); + outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false); + outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false); + outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false); + outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false); } } } @@ -3906,7 +3906,7 @@ int CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16], #endif // !ASPM_GPU //============================================== OpenCL USER INTERFACE ==================================================== -#ifdef ASPM_GPU +#ifdef ASPM_OPENCL CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_GLOBAL CGU_UINT8* p_source_pixels, CMP_GLOBAL CGU_UINT8* p_encoded_blocks, diff --git a/extern/CMP_Core/shaders/BC6_Encode_kernel.h b/extern/CMP_Core/shaders/BC6_Encode_kernel.h index 1a6c206..435993d 100644 --- a/extern/CMP_Core/shaders/BC6_Encode_kernel.h +++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.h @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -23,6 +23,8 @@ #ifndef BC6_ENCODE_KERNEL_H #define BC6_ENCODE_KERNEL_H +#pragma warning(disable:4505) // disable warnings on unreferenced local function has been removed + #include "Common_Def.h" #define MAX_TRACE 10 @@ -127,25 +129,25 @@ typedef struct __constant ModePartitions ModePartition[MAX_BC6H_MODES + 1] = { - 0, 0,0,0, 0, 0, 0, 0, 0, // Mode = Invaild + {0, {0,0,0}, 0, 0, 0, 0, 0}, // Mode = Invaild // Two region Partition - 10, 5,5,5, 1, 2, 3, 0x00, 31, // Mode = 1 - 7, 6,6,6, 1, 2, 3, 0x01, 248, // Mode = 2 - 11, 5,4,4, 1, 5, 3, 0x02, 15, // Mode = 3 - 11, 4,5,4, 1, 5, 3, 0x06, 15, // Mode = 4 - 11, 4,4,5, 1, 5, 3, 0x0a, 15, // Mode = 5 - 9, 5,5,5, 1, 5, 3, 0x0e, 62, // Mode = 6 - 8, 6,5,5, 1, 5, 3, 0x12, 124, // Mode = 7 - 8, 5,6,5, 1, 5, 3, 0x16, 124, // Mode = 8 - 8, 5,5,6, 1, 5, 3, 0x1a, 124, // Mode = 9 - 6, 6,6,6, 0, 5, 3, 0x1e, 496, // Mode = 10 + { 10, {5,5,5}, 1, 2, 3, 0x00, 31 }, // Mode = 1 + { 7, {6,6,6}, 1, 2, 3, 0x01, 248}, // Mode = 2 + { 11, {5,4,4}, 1, 5, 3, 0x02, 15 }, // Mode = 3 + { 11, {4,5,4}, 1, 5, 3, 0x06, 15 }, // Mode = 4 + { 11, {4,4,5}, 1, 5, 3, 0x0a, 15 }, // Mode = 5 + { 9, {5,5,5}, 1, 5, 3, 0x0e, 62 }, // Mode = 6 + { 8, {6,5,5}, 1, 5, 3, 0x12, 124}, // Mode = 7 + { 8, {5,6,5}, 1, 5, 3, 0x16, 124}, // Mode = 8 + { 8, {5,5,6}, 1, 5, 3, 0x1a, 124}, // Mode = 9 + { 6, {6,6,6}, 0, 5, 3, 0x1e, 496}, // Mode = 10 // One region Partition - 10, 10,10,10, 0, 5, 4, 0x03, 31, // Mode = 11 - 11, 9,9,9, 1, 5, 4, 0x07, 15, // Mode = 12 - 12, 8,8,8, 1, 5, 4, 0x0b, 7, // Mode = 13 - 16, 4,4,4, 1, 5, 4, 0x0f, 1, // Mode = 14 + {10, {10,10,10}, 0, 5, 4, 0x03, 31}, // Mode = 11 + {11, {9,9,9 }, 1, 5, 4, 0x07, 15}, // Mode = 12 + {12, {8,8,8 }, 1, 5, 4, 0x0b, 7 }, // Mode = 13 + {16, {4,4,4 }, 1, 5, 4, 0x0f, 1 } // Mode = 14 }; //================================================ diff --git a/extern/CMP_Core/shaders/BC6_Encode_kernel.hlsl b/extern/CMP_Core/shaders/BC6_Encode_kernel.hlsl new file mode 100644 index 0000000..07d4117 --- /dev/null +++ b/extern/CMP_Core/shaders/BC6_Encode_kernel.hlsl @@ -0,0 +1,2572 @@ +//-------------------------------------------------------------------------------------- +// File: BC6HEncode.hlsl +// +// The Compute Shader for BC6H Encoder +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//-------------------------------------------------------------------------------------- + +#define REF_DEVICE +#ifndef ASPM_HLSL +#define ASPM_HLSL +#endif + +#define UINTLENGTH 32 +#define NCHANNELS 3 +#define SIGNED_F16 96 +#define UNSIGNED_F16 95 +#define MAX_FLOAT asfloat(0x7F7FFFFF) +#define MIN_FLOAT asfloat(0xFF7FFFFF) +#define MAX_INT asint(0x7FFFFFFF) +#define MIN_INT asint(0x80000000) + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; //either SIGNED_F16 for DXGI_FORMAT_BC6H_SF16 or UNSIGNED_F16 for DXGI_FORMAT_BC6H_UF16 + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; + float g_alpha_weight; + float g_quality; +}; + +static const uint candidateModeMemory[14] = { 0x00, 0x01, 0x02, 0x06, 0x0A, 0x0E, 0x12, 0x16, 0x1A, 0x1E, 0x03, 0x07, 0x0B, 0x0F }; +static const uint candidateModeFlag[14] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; +static const bool candidateModeTransformed[14] = { true, true, true, true, true, true, true, true, true, false, false, true, true, true }; +static const uint4 candidateModePrec[14] = { uint4(10,5,5,5), uint4(7,6,6,6), + uint4(11,5,4,4), uint4(11,4,5,4), uint4(11,4,4,5), uint4(9,5,5,5), + uint4(8,6,5,5), uint4(8,5,6,5), uint4(8,5,5,6), uint4(6,6,6,6), + uint4(10,10,10,10), uint4(11,9,9,9), uint4(12,8,8,8), uint4(16,4,4,4) }; + +/*static const uint4x4 candidateSection[32] = +{ + {0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1}, {0,0,0,1, 0,0,0,1, 0,0,0,1, 0,0,0,1}, {0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1}, {0,0,0,1, 0,0,1,1, 0,0,1,1, 0,1,1,1}, + {0,0,0,0, 0,0,0,1, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,1, 0,0,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,0,1,1, 0,1,1,1}, + {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,1,1}, + {0,0,0,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1}, + {0,0,0,0, 1,0,0,0, 1,1,1,0, 1,1,1,1}, {0,1,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,1,0}, {0,1,1,1, 0,0,1,1, 0,0,0,1, 0,0,0,0}, + {0,0,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,1,0,0, 1,1,1,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,1, 0,0,1,1, 0,0,1,1, 0,0,0,1}, + {0,0,1,1, 0,0,0,1, 0,0,0,1, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,0, 0,1,1,0, 0,1,1,0, 0,1,1,0}, {0,0,1,1, 0,1,1,0, 0,1,1,0, 1,1,0,0}, + {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 0,0,0,0}, {0,1,1,1, 0,0,0,1, 1,0,0,0, 1,1,1,0}, {0,0,1,1, 1,0,0,1, 1,0,0,1, 1,1,0,0} +};*/ + +static const uint candidateSectionBit[32] = +{ + 0xCCCC, 0x8888, 0xEEEE, 0xECC8, + 0xC880, 0xFEEC, 0xFEC8, 0xEC80, + 0xC800, 0xFFEC, 0xFE80, 0xE800, + 0xFFE8, 0xFF00, 0xFFF0, 0xF000, + 0xF710, 0x008E, 0x7100, 0x08CE, + 0x008C, 0x7310, 0x3100, 0x8CCE, + 0x088C, 0x3110, 0x6666, 0x366C, + 0x17E8, 0x0FF0, 0x718E, 0x399C +}; + +static const uint candidateFixUpIndex1D[32] = +{ + 15,15,15,15, + 15,15,15,15, + 15,15,15,15, + 15,15,15,15, + 15, 2, 8, 2, + 2, 8, 8,15, + 2, 8, 2, 2, + 8, 8, 2, 2 +}; + +//0, 9, 18, 27, 37, 46, 55, 64 +static const uint aStep1[64] = {0,0,0,0,0,1,1,1, + 1,1,1,1,1,1,2,2, + 2,2,2,2,2,2,2,3, + 3,3,3,3,3,3,3,3, + 3,4,4,4,4,4,4,4, + 4,4,5,5,5,5,5,5, + 5,5,5,6,6,6,6,6, + 6,6,6,6,7,7,7,7}; + +//0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 +static const uint aStep2[64] = { 0, 0, 0, 1, 1, 1, 1, 2, + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 7, 7, 7, + 7, 8, 8, 8, 8, 9, 9, 9, + 9,10,10,10,10,10,11,11, + 11,11,12,12,12,12,13,13, + 13,13,14,14,14,14,15,15}; + +static const float3 RGB2LUM = float3(0.2126f, 0.7152f, 0.0722f); + +#define THREAD_GROUP_SIZE 64 +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 +#define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X) + + +//Forward declaration +uint3 float2half( float3 pixel_f ); +int3 start_quantize( uint3 pixel_h ); +void quantize( inout int2x3 endPoint, uint prec ); +void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); +void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); +void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); + +void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed ); +void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed ); +void unquantize( inout int2x3 color, uint prec ); +uint3 finish_unquantize( int3 color ); +void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i ); +void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i ); +float3 half2float(uint3 color_h ); + +void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index ); +void block_package( inout uint4 block, int2x3 endPoint, uint mode_type ); + +void swap(inout int3 lhs, inout int3 rhs) +{ + int3 tmp = lhs; + lhs = rhs; + rhs = tmp; +} + +Texture2D g_Input : register( t0 ); +StructuredBuffer g_InBuff : register( t1 ); + +RWStructuredBuffer g_OutBuff : register( u0 ); + +struct SharedData +{ + float3 pixel; + int3 pixel_ph; + float3 pixel_hr; + float pixel_lum; + float error; + uint best_mode; + uint best_partition; + int3 endPoint_low; + int3 endPoint_high; + float endPoint_lum_low; + float endPoint_lum_high; +}; + +groupshared SharedData shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryModeG10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) +{ + const uint MAX_USED_THREAD = 16; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; + uint3 pixel_h = float2half( shared_temp[GI].pixel ); + shared_temp[GI].pixel_hr = half2float(pixel_h); + shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM); + shared_temp[GI].pixel_ph = start_quantize( pixel_h ); + + shared_temp[GI].endPoint_low = shared_temp[GI].pixel_ph; + shared_temp[GI].endPoint_high = shared_temp[GI].pixel_ph; + shared_temp[GI].endPoint_lum_low = shared_temp[GI].pixel_lum; + shared_temp[GI].endPoint_lum_high = shared_temp[GI].pixel_lum; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 8) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + //ergod mode_type 11:14 + if ( threadInBlock == 0 ) + { + int2x3 endPoint; + // find_axis + endPoint[0] = shared_temp[threadBase + 0].endPoint_low; + endPoint[1] = shared_temp[threadBase + 0].endPoint_high; + + //compute_index + float3 span = endPoint[1] - endPoint[0];// fixed a bug in v0.2 + float span_norm_sqr = dot( span, span );// fixed a bug in v0.2 + float dotProduct = dot( span, shared_temp[threadBase + 0].pixel_ph - endPoint[0] );// fixed a bug in v0.2 + if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 ) + { + swap(endPoint[0], endPoint[1]); + + shared_temp[GI].endPoint_low = endPoint[0]; + shared_temp[GI].endPoint_high = endPoint[1]; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 4) + { + int2x3 endPoint; + endPoint[0] = shared_temp[threadBase + 0].endPoint_low; + endPoint[1] = shared_temp[threadBase + 0].endPoint_high; + + float3 span = endPoint[1] - endPoint[0]; + float span_norm_sqr = dot( span, span ); + + uint4 prec = candidateModePrec[threadInBlock + 10]; + int2x3 endPoint_q = endPoint; + quantize( endPoint_q, prec.x ); + + bool transformed = candidateModeTransformed[threadInBlock + 10]; + if (transformed) + { + endPoint_q[1] -= endPoint_q[0]; + } + + bool bBadQuantize; + finish_quantize( bBadQuantize, endPoint_q, prec, transformed ); + + start_unquantize( endPoint_q, prec, transformed ); + + unquantize( endPoint_q, prec.x ); + + float error = 0; + [loop]for ( uint j = 0; j < 16; j ++ ) + { + float dotProduct = dot( span, shared_temp[threadBase + j].pixel_ph - endPoint[0] );// fixed a bug in v0.2 + uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] ); + + uint3 pixel_rh; + generate_palette_unquantized16( pixel_rh, endPoint_q[0], endPoint_q[1], index ); + float3 pixel_r = half2float( pixel_rh ); + pixel_r -= shared_temp[threadBase + j].pixel_hr; + error += dot(pixel_r, pixel_r); + } + if ( bBadQuantize ) + error = 1e20f; + + shared_temp[GI].error = error; + shared_temp[GI].best_mode = candidateModeFlag[threadInBlock + 10]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode; + } + + g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, 0, 0); + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryModeLE10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) +{ + const uint MAX_USED_THREAD = 32; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } + + if (asfloat(g_InBuff[blockID].x) < 1e-6f) + { + g_OutBuff[blockID] = g_InBuff[blockID]; + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; + uint3 pixel_h = float2half( shared_temp[GI].pixel ); + shared_temp[GI].pixel_hr = half2float(pixel_h); + shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM); + shared_temp[GI].pixel_ph = start_quantize( pixel_h ); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + //ergod mode_type 1:10 + if (threadInBlock < 32) + { + // find_axis + int2x3 endPoint[2]; + endPoint[0][0] = MAX_INT; + endPoint[0][1] = MIN_INT; + endPoint[1][0] = MAX_INT; + endPoint[1][1] = MIN_INT; + + float2 endPoint_lum[2]; + endPoint_lum[0][0] = MAX_FLOAT; + endPoint_lum[0][1] = MIN_FLOAT; + endPoint_lum[1][0] = MAX_FLOAT; + endPoint_lum[1][1] = MIN_FLOAT; + + uint bit = candidateSectionBit[threadInBlock]; + for ( uint i = 0; i < 16; i ++ ) + { + int3 pixel_ph = shared_temp[threadBase + i].pixel_ph; + float pixel_lum = shared_temp[threadBase + i].pixel_lum; + if ( (bit >> i) & 1 ) //It gets error when using "candidateSection" as "endPoint_ph" index + { + if (endPoint_lum[1][0] > pixel_lum) + { + endPoint[1][0] = pixel_ph; + endPoint_lum[1][0] = pixel_lum; + } + if (endPoint_lum[1][1] < pixel_lum) + { + endPoint[1][1] = pixel_ph; + endPoint_lum[1][1] = pixel_lum; + } + } + else + { + if (endPoint_lum[0][0] > pixel_lum) + { + endPoint[0][0] = pixel_ph; + endPoint_lum[0][0] = pixel_lum; + } + if (endPoint_lum[0][1] < pixel_lum) + { + endPoint[0][1] = pixel_ph; + endPoint_lum[0][1] = pixel_lum; + } + } + } + + //compute_index + float3 span[2];// fixed a bug in v0.2 + float span_norm_sqr[2];// fixed a bug in v0.2 + [unroll] + for (uint p = 0; p < 2; ++ p) + { + span[p] = endPoint[p][1] - endPoint[p][0]; + span_norm_sqr[p] = dot( span[p], span[p] ); + + float dotProduct = dot( span[p], shared_temp[threadBase + (0 == p ? 0 : candidateFixUpIndex1D[threadInBlock])].pixel_ph - endPoint[p][0] );// fixed a bug in v0.2 + if ( span_norm_sqr[p] > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr[p] ) > 32 ) + { + span[p] = -span[p]; + swap(endPoint[p][0], endPoint[p][1]); + } + } + + uint4 prec = candidateModePrec[g_mode_id]; + int2x3 endPoint_q[2] = endPoint; + quantize( endPoint_q[0], prec.x ); + quantize( endPoint_q[1], prec.x ); + + bool transformed = candidateModeTransformed[g_mode_id]; + if (transformed) + { + endPoint_q[0][1] -= endPoint_q[0][0]; + endPoint_q[1][0] -= endPoint_q[0][0]; + endPoint_q[1][1] -= endPoint_q[0][0]; + } + + int bBadQuantize = 0; + finish_quantize_0( bBadQuantize, endPoint_q[0], prec, transformed ); + finish_quantize_1( bBadQuantize, endPoint_q[1], prec, transformed ); + + start_unquantize( endPoint_q, prec, transformed ); + + unquantize( endPoint_q[0], prec.x ); + unquantize( endPoint_q[1], prec.x ); + + float error = 0; + for ( uint j = 0; j < 16; j ++ ) + { + uint3 pixel_rh; + if ((bit >> j) & 1) + { + float dotProduct = dot( span[1], shared_temp[threadBase + j].pixel_ph - endPoint[1][0] );// fixed a bug in v0.2 + uint index = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[1] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep1[63] ); + generate_palette_unquantized8( pixel_rh, endPoint_q[1][0], endPoint_q[1][1], index ); + } + else + { + float dotProduct = dot( span[0], shared_temp[threadBase + j].pixel_ph - endPoint[0][0] );// fixed a bug in v0.2 + uint index = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[0] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep1[63] ); + generate_palette_unquantized8( pixel_rh, endPoint_q[0][0], endPoint_q[0][1], index ); + } + + float3 pixel_r = half2float( pixel_rh ); + pixel_r -= shared_temp[threadBase + j].pixel_hr; + error += dot(pixel_r, pixel_r); + } + if ( bBadQuantize ) + error = 1e20f; + + shared_temp[GI].error = error; + shared_temp[GI].best_mode = candidateModeFlag[g_mode_id]; + shared_temp[GI].best_partition = threadInBlock; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 16) + { + if ( shared_temp[GI].error > shared_temp[GI + 16].error ) + { + shared_temp[GI].error = shared_temp[GI + 16].error; + shared_temp[GI].best_mode = shared_temp[GI + 16].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 16].best_partition; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 8) + { + if ( shared_temp[GI].error > shared_temp[GI + 8].error ) + { + shared_temp[GI].error = shared_temp[GI + 8].error; + shared_temp[GI].best_mode = shared_temp[GI + 8].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 8].best_partition; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if ( shared_temp[GI].error > shared_temp[GI + 4].error ) + { + shared_temp[GI].error = shared_temp[GI + 4].error; + shared_temp[GI].best_mode = shared_temp[GI + 4].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 4].best_partition; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 2].best_partition; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 1].best_partition; + } + + if (asfloat(g_InBuff[blockID].x) > shared_temp[GI].error) + { + g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, shared_temp[GI].best_partition, 0); + } + else + { + g_OutBuff[blockID] = g_InBuff[blockID]; + } + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + const uint MAX_USED_THREAD = 32; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; + shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel, RGB2LUM); + uint3 pixel_h = float2half( shared_temp[GI].pixel ); + shared_temp[GI].pixel_ph = start_quantize( pixel_h ); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + uint best_mode = g_InBuff[blockID].y; + uint best_partition = g_InBuff[blockID].z; + + uint4 block = 0; + + if (threadInBlock < 32) + { + int2x3 endPoint; + endPoint[0] = MAX_INT; + endPoint[1] = MIN_INT; + + float2 endPoint_lum; + endPoint_lum[0] = MAX_FLOAT; + endPoint_lum[1] = MIN_FLOAT; + + int2 endPoint_lum_index; + endPoint_lum_index[0] = -1; + endPoint_lum_index[1] = -1; + + int3 pixel_ph = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_ph; + float pixel_lum = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_lum; + if (threadInBlock < 16) + { + if (best_mode > 10) + { + endPoint[0] = endPoint[1] = pixel_ph; + endPoint_lum[0] = endPoint_lum[1] = pixel_lum; + } + else + { + uint bits = candidateSectionBit[best_partition]; + if (0 == ((bits >> threadInBlock) & 1)) + { + endPoint[0] = endPoint[1] = pixel_ph; + endPoint_lum[0] = endPoint_lum[1] = pixel_lum; + } + } + } + else + { + if (best_mode <= 10) + { + uint bits = candidateSectionBit[best_partition]; + if (1 == ((bits >> (threadInBlock & 0xF)) & 1)) + { + endPoint[0] = endPoint[1] = pixel_ph; + endPoint_lum[0] = endPoint_lum[1] = pixel_lum; + } + } + } + + shared_temp[GI].endPoint_low = endPoint[0]; + shared_temp[GI].endPoint_high = endPoint[1]; + + shared_temp[GI].endPoint_lum_low = endPoint_lum[0]; + shared_temp[GI].endPoint_lum_high = endPoint_lum[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if ((threadInBlock & 0xF) < 8) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if ((threadInBlock & 0xF) < 4) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if ((threadInBlock & 0xF) < 2) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if ((threadInBlock & 0xF) < 1) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 2) + { + // find_axis + int2x3 endPoint; + endPoint[0] = shared_temp[threadBase + threadInBlock * 16].endPoint_low; + endPoint[1] = shared_temp[threadBase + threadInBlock * 16].endPoint_high; + + uint fixup = 0; + if ((1 == threadInBlock) && (best_mode <= 10)) + { + fixup = candidateFixUpIndex1D[best_partition]; + } + + float3 span = endPoint[1] - endPoint[0]; + float span_norm_sqr = dot( span, span ); + float dotProduct = dot( span, shared_temp[threadBase + fixup].pixel_ph - endPoint[0] ); + if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 ) + { + swap(endPoint[0], endPoint[1]); + } + + shared_temp[GI].endPoint_low = endPoint[0]; + shared_temp[GI].endPoint_high = endPoint[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 16) + { + uint bits; + if (best_mode > 10) + { + bits = 0; + } + else + { + bits = candidateSectionBit[best_partition]; + } + + float3 span; + float dotProduct; + if ((bits >> threadInBlock) & 1) + { + span = shared_temp[threadBase + 1].endPoint_high - shared_temp[threadBase + 1].endPoint_low; + dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 1].endPoint_low ); + } + else + { + span = shared_temp[threadBase + 0].endPoint_high - shared_temp[threadBase + 0].endPoint_low; + dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 0].endPoint_low ); + } + float span_norm_sqr = dot( span, span ); + + if (best_mode > 10) + { + uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] ); + if (threadInBlock == 0) + { + block.z |= index << 1; + } + else if (threadInBlock < 8) + { + block.z |= index << (threadInBlock * 4); + } + else + { + block.w |= index << ((threadInBlock - 8) * 4); + } + } + else + { + uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep1[63] ); + + uint fixup = candidateFixUpIndex1D[best_partition]; + int2 offset = int2((fixup != 2), (fixup == 15)); + + if (threadInBlock == 0) + { + block.z |= index << 18; + } + else if (threadInBlock < 3) + { + block.z |= index << (20 + (threadInBlock - 1) * 3); + } + else if (threadInBlock < 5) + { + block.z |= index << (25 + (threadInBlock - 3) * 3 + offset.x); + } + else if (threadInBlock == 5) + { + block.w |= index >> !offset.x; + if (!offset.x) + { + block.z |= index << 31; + } + } + else if (threadInBlock < 9) + { + block.w |= index << (2 + (threadInBlock - 6) * 3 + offset.x); + } + else + { + block.w |= index << (11 + (threadInBlock - 9) * 3 + offset.y); + } + } + + shared_temp[GI].pixel_hr.xy = asfloat(block.zw); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 8) + { + shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 8].pixel_hr.xy)); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 4].pixel_hr.xy)); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 2].pixel_hr.xy)); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 1].pixel_hr.xy)); + + block.zw = asuint(shared_temp[GI].pixel_hr.xy); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + bool transformed = candidateModeTransformed[best_mode - 1]; + uint4 prec = candidateModePrec[best_mode - 1]; + if (threadInBlock == 2) + { + int2x3 endPoint_q; + endPoint_q[0] = shared_temp[threadBase + 0].endPoint_low; + endPoint_q[1] = shared_temp[threadBase + 0].endPoint_high; + + quantize( endPoint_q, prec.x ); + if (transformed) + { + endPoint_q[1] -= endPoint_q[0]; + } + + shared_temp[GI].endPoint_low = endPoint_q[0]; + shared_temp[GI].endPoint_high = endPoint_q[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock == 3) + { + int3 ep0 = shared_temp[threadBase + 2].endPoint_low; + int2x3 endPoint_q; + endPoint_q[0] = shared_temp[threadBase + 1].endPoint_low; + endPoint_q[1] = shared_temp[threadBase + 1].endPoint_high; + + if (best_mode <= 10) + { + quantize( endPoint_q, prec.x ); + if (transformed) + { + endPoint_q[0] -= ep0; + endPoint_q[1] -= ep0; + } + + shared_temp[GI].endPoint_low = endPoint_q[0]; + shared_temp[GI].endPoint_high = endPoint_q[1]; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 2) + { + int2x3 endPoint_q; + endPoint_q[0] = shared_temp[threadBase + threadInBlock + 2].endPoint_low; + endPoint_q[1] = shared_temp[threadBase + threadInBlock + 2].endPoint_high; + + int bBadQuantize = 0; + if (threadInBlock == 0) + { + if (best_mode > 10) + { + finish_quantize( bBadQuantize, endPoint_q, prec, transformed ); + } + else + { + finish_quantize_0( bBadQuantize, endPoint_q, prec, transformed ); + } + } + else // if (threadInBlock == 1) + { + if (best_mode <= 10) + { + finish_quantize_1( bBadQuantize, endPoint_q, prec, transformed ); + } + } + + shared_temp[GI].endPoint_low = endPoint_q[0]; + shared_temp[GI].endPoint_high = endPoint_q[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if ( threadInBlock == 0 ) + { + int2x3 endPoint_q[2]; + endPoint_q[0][0] = shared_temp[threadBase + 0].endPoint_low; + endPoint_q[0][1] = shared_temp[threadBase + 0].endPoint_high; + endPoint_q[1][0] = shared_temp[threadBase + 1].endPoint_low; + endPoint_q[1][1] = shared_temp[threadBase + 1].endPoint_high; + + if ( best_mode > 10 ) + { + block_package( block, endPoint_q[0], best_mode ); + } + else + { + block_package( block, endPoint_q, best_mode, best_partition ); + } + + g_OutBuff[blockID] = block; + } +} + +uint float2half1( float f ) +{ + uint Result; + + uint IValue = asuint(f); + uint Sign = (IValue & 0x80000000U) >> 16U; + IValue = IValue & 0x7FFFFFFFU; + + if (IValue > 0x47FFEFFFU) + { + // The number is too large to be represented as a half. Saturate to infinity. + Result = 0x7FFFU; + } + else + { + if (IValue < 0x38800000U) + { + // The number is too small to be represented as a normalized half. + // Convert it to a denormalized value. + uint Shift = 113U - (IValue >> 23U); + IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift; + } + else + { + // Rebias the exponent to represent the value as a normalized half. + IValue += 0xC8000000U; + } + + Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU; + } + return (Result|Sign); +} + +uint3 float2half( float3 endPoint_f ) +{ + //uint3 sign = asuint(endPoint_f) & 0x80000000; + //uint3 expo = asuint(endPoint_f) & 0x7F800000; + //uint3 base = asuint(endPoint_f) & 0x007FFFFF; + //return ( expo < 0x33800000 ) ? 0 + // //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present + // : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2 + // //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation + // : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff ) + // // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present + // // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number + // : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) ); + + + return uint3( float2half1( endPoint_f.x ), float2half1( endPoint_f.y ), float2half1( endPoint_f.z ) ); +} +int3 start_quantize( uint3 pixel_h ) +{ + if ( g_format == UNSIGNED_F16 ) + { + return asint( ( pixel_h << 6 ) / 31 ); + } + else + { + return ( pixel_h < 0x8000 ) ? ( ( pixel_h == 0x7bff ) ? 0x7fff : asint( ( pixel_h << 5 ) / 31 ) )// fixed a bug in v0.2 + : ( ( pixel_h == 0x7bff ) ? 0xffff8001 : -asint( ( ( 0x00007fff & pixel_h ) << 5 ) / 31 ) );// fixed a bug in v0.2 + } +} +void quantize( inout int2x3 endPoint, uint prec ) +{ + int iprec = asint( prec ); + if ( g_format == UNSIGNED_F16 ) + { + endPoint = ( ( iprec >= 15 ) | ( endPoint == 0 ) ) ? endPoint + : ( ( endPoint == asint(0xFFFF) ) ? ( ( 1 << iprec ) - 1 ) + : ( ( ( endPoint << iprec ) + asint(0x0000) ) >> 16 ) ); + } + else + { + endPoint = ( ( iprec >= 16 ) | ( endPoint == 0 ) ) ? endPoint + : ( ( endPoint >= 0 ) ? ( ( endPoint == asint(0x7FFF) ) ? ( ( 1 << ( iprec - 1 ) ) - 1 ) : ( ( ( endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) + : ( ( -endPoint == asint(0x7FFF) ) ? -( ( 1 << ( iprec - 1 ) ) - 1 ) : -( ( ( -endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) ); + } +} +void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) +{ + if ( transformed ) + { + bool3 bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) + : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); + bBadQuantize |= any(bBadComponent); + + endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 ); + endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) + : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); + } + else + { + endPoint &= ( ( 1 << prec.x ) - 1 ); + } +} +void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) +{ + if ( transformed ) + { + bool2x3 bBadComponent; + bBadComponent[0] = ( endPoint[0] >= 0 ) ? ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) ) + : ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) ); + bBadComponent[1] = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) + : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); + bBadQuantize |= any(bBadComponent); + + endPoint[0] = ( endPoint[0] >= 0 ) ? ( ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[0] ) + : ( ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[0] & ( ( 1 << prec.yzw ) - 1 ) ) ); + endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) + : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); + } + else + { + endPoint &= ( ( 1 << prec.x ) - 1 ); + } +} +void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) +{ + if ( transformed ) + { + bool3 bBadComponent; + bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) + : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); + bBadQuantize = any( bBadComponent ); + + endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 ); + endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) + : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); + } + else + { + endPoint &= ( ( 1 << prec.x ) - 1 ); + + bBadQuantize = 0; + } +} + +void SIGN_EXTEND( uint3 prec, inout int3 color ) +{ + uint3 p = 1 << (prec - 1); + color = (color & p) ? (color & (p - 1)) - p : color; +} + +void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint ) +{ + if ( g_format == SIGNED_F16 ) + SIGN_EXTEND( prec.x, endPoint[0] ); + if ( g_format == SIGNED_F16 || transformed ) + SIGN_EXTEND( prec.yzw, endPoint[1] ); +} + +void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint[2] ) +{ + if ( g_format == SIGNED_F16 ) + SIGN_EXTEND( prec.x, endPoint[0][0] ); + if ( g_format == SIGNED_F16 || transformed ) + { + SIGN_EXTEND( prec.yzw, endPoint[0][1] ); + SIGN_EXTEND( prec.yzw, endPoint[1][0] ); + SIGN_EXTEND( prec.yzw, endPoint[1][1] ); + } +} +void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed ) +{ + sign_extend( transformed, prec, endPoint ); + if ( transformed ) + { + endPoint[0][1] += endPoint[0][0]; + endPoint[1][0] += endPoint[0][0]; + endPoint[1][1] += endPoint[0][0]; + } +} +void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed ) +{ + sign_extend( transformed, prec, endPoint ); + if ( transformed ) + endPoint[1] += endPoint[0]; +} +void unquantize( inout int2x3 color, uint prec ) +{ + int iprec = asint( prec ); + if (g_format == UNSIGNED_F16 ) + { + if (prec < 15) + { + color = (color != 0) ? (color == ((1 << iprec) - 1) ? 0xFFFF : (((color << 16) + 0x8000) >> iprec)) : color; + } + } + else + { + if (prec < 16) + { + uint2x3 s = color >= 0 ? 0 : 1; + color = abs(color); + color = (color != 0) ? (color >= ((1 << (iprec - 1)) - 1) ? 0x7FFF : (((color << 15) + 0x4000) >> (iprec - 1))) : color; + color = s > 0 ? -color : color; + } + } +} +uint3 finish_unquantize( int3 color ) +{ + if ( g_format == UNSIGNED_F16 ) + color = ( color * 31 ) >> 6; + else + { + color = ( color < 0 ) ? -( ( -color * 31 ) >> 5 ) : ( color * 31 ) >> 5; + color = ( color < 0 ) ? ( ( -color ) | 0x8000 ) : color; + } + return asuint(color); +} +void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i ) +{ + static const int aWeight3[] = {0, 9, 18, 27, 37, 46, 55, 64}; + + int3 tmp = ( low * ( 64 - aWeight3[i] ) + high * aWeight3[i] + 32 ) >> 6; + palette = finish_unquantize( tmp ); +} +void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i ) +{ + static const int aWeight4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; + + int3 tmp = ( low * ( 64 - aWeight4[i] ) + high * aWeight4[i] + 32 ) >> 6; + palette = finish_unquantize( tmp ); +} + +float half2float1( uint Value ) +{ + uint Mantissa = (uint)(Value & 0x03FF); + + uint Exponent; + if ((Value & 0x7C00) != 0) // The value is normalized + { + Exponent = (uint)((Value >> 10) & 0x1F); + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x0400) == 0); + + Mantissa &= 0x03FF; + } + else // The value is zero + { + Exponent = (uint)(-112); + } + + uint Result = ((Value & 0x8000) << 16) | // Sign + ((Exponent + 112) << 23) | // Exponent + (Mantissa << 13); // Mantissa + + return asfloat(Result); +} + +float3 half2float(uint3 color_h ) +{ + //uint3 sign = color_h & 0x8000; + //uint3 expo = color_h & 0x7C00; + //uint3 base = color_h & 0x03FF; + //return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24 + // : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00 + + return float3( half2float1( color_h.x ), half2float1( color_h.y ), half2float1( color_h.z ) ); +} + +void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index ) // for mode 1 - 10 +{ + block.xy = 0; + block.z &= 0xFFFC0000; + + //block.z |= (partition_index & 0x1f) << 13; + + if ( mode_type == candidateModeFlag[0]) + { + /*block.x = candidateModeMemory[0]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.x |= ( endPoint[1][0].g >> 2 ) & 0x00000004; + block.x |= ( endPoint[1][0].b >> 1 ) & 0x00000008; + block.x |= endPoint[1][1].b & 0x00000010; + block.y |= ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[0] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[0] >> 1) & 1) << 1; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 2; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 3; + block.x |= ((endPoint[1][1].b >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[1]) + { + /*block.x = candidateModeMemory[1]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00000FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x003F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); + block.x |= ( ( endPoint[1][0].g >> 3 ) & 0x00000004 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 ); + block.x |= ( endPoint[1][1].g >> 1 ) & 0x00000018; + block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 ); + block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); + block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[1] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[1] >> 1) & 1) << 1; + block.x |= ((endPoint[1][0].g >> 5) & 1) << 2; + block.x |= ((endPoint[1][1].g >> 4) & 1) << 3; + block.x |= ((endPoint[1][1].g >> 5) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[1][1].b >> 0) & 1) << 12; + block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[1][0].b >> 5) & 1) << 22; + block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[1][1].b >> 3) & 1) << 0; + block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[2]) + { + /*block.x = candidateModeMemory[2]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].r >> 2 ) & 0x00000100; + block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000; + block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[2] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[2] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[2] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[2] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[2] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[0][0].r >> 10) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][0].g >> 10) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][0].b >> 10) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[3]) + { + /*block.x = candidateModeMemory[3]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080; + block.y |= ( endPoint[0][0].g << 8 ) & 0x00040000; + block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000001E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780); + block.yz |= ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000); + block.z |= ( ( endPoint[1][0].g << 7 ) & 0x00000800 ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( endPoint[1][1].b << 4 ) & 0x00000040; + block.z |= ( endPoint[1][1].b << 5 ) & 0x00000020;*/ + + block.x |= ((candidateModeMemory[3] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[3] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[3] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[3] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[3] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][0].r >> 10) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[0][0].g >> 10) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][0].b >> 10) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][1].b >> 0) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][0].g >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[4]) + { + /*block.x = candidateModeMemory[4]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080; + block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000; + block.y |= ( ( endPoint[0][0].b << 18 ) & 0x10000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.y |= ( ( endPoint[1][0].g << 9 ) & 0x00001E00 ) | ( ( endPoint[1][0].b << 4 ) & 0x00000100 ); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780); + block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000060); + block.z |= ( endPoint[1][0].r << 1 ) & 0x0000001E; + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( ( endPoint[1][1].b << 7 ) & 0x00000800 ) | ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/ + + block.x |= ((candidateModeMemory[4] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[4] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[4] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[4] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[4] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][0].r >> 10) & 1) << 7; + block.y |= ((endPoint[1][0].b >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][0].g >> 10) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[0][0].b >> 10) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][1].b >> 1) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].b >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[5]) + { + /*block.x = candidateModeMemory[5]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00003FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x00FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000); + block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000003; + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); + block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 ); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/ + + block.x |= ((candidateModeMemory[5] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[5] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[5] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[5] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[5] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[6]) + { + /*block.x = candidateModeMemory[6]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000); + block.x |= ( ( endPoint[1][1].g << 9 ) & 0x00002000 ) | ( ( endPoint[1][1].b << 21 ) & 0x00800000); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); + block.y |= ( ( endPoint[1][1].b >> 2 ) & 0x00000006 ); + block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 ) | ( ( endPoint[1][1].b << 18 ) & 0x00040000 ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[6] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[6] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[6] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[6] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[6] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[1][1].g >> 4) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[1][1].b >> 3) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[7]) + { + /*block.x = candidateModeMemory[7]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.x |= ( ( endPoint[1][0].g << 18 ) & 0x00800000 ); + block.x |= ( ( endPoint[1][1].b << 13 ) & 0x00002000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); + block.y |= ( endPoint[1][1].b << 27 ) & 0x10000000; + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/ + + block.x |= ((candidateModeMemory[7] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[7] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[7] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[7] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[7] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[1][1].b >> 0) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[1][0].g >> 5) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[1][1].g >> 5) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[8]) + { + /*block.x = candidateModeMemory[8]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); + block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.x |= ( ( endPoint[1][0].b << 18 ) & 0x00800000 ); + block.x |= ( endPoint[1][1].b << 12 ) & 0x00002000; + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.y |= ( endPoint[1][1].b << 18 ) & 0x00040000; + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/ + + block.x |= ((candidateModeMemory[8] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[8] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[8] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[8] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[8] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[1][0].b >> 5) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[9]) + { + /*block.x = candidateModeMemory[9]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x000007E0 ) | ( ( endPoint[0][0].g << 15 ) & 0x001F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0x7E000000 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); + block.x |= ( ( endPoint[1][0].g << 16 ) & 0x00200000 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 ); + block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 ); + block.x |= ( ( endPoint[1][1].g << 26 ) & 0x80000000 ) | ( ( endPoint[1][1].g << 7 ) & 0x00000800 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[9] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[9] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[9] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[9] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[9] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[1][1].g >> 4) & 1) << 11; + block.x |= ((endPoint[1][1].b >> 0) & 1) << 12; + block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[1][0].g >> 5) & 1) << 21; + block.x |= ((endPoint[1][0].b >> 5) & 1) << 22; + block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[1][1].g >> 5) & 1) << 31; + block.y |= ((endPoint[1][1].b >> 3) & 1) << 0; + block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } +} +void block_package( inout uint4 block, int2x3 endPoint, uint mode_type ) // for mode 11 - 14 +{ + /*block.x = ( ( endPoint[0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0].b >> 7 ) & 0x00000007;*/ + + block.xy = 0; + block.z &= 0xFFFFFFFE; + + + if ( mode_type == candidateModeFlag[10]) + { + /* block.x |= candidateModeMemory[10]; + block.y |= ( ( endPoint[1].r << 3 ) & 0x00001FF8 ) | ( ( endPoint[1].g << 13 ) & 0x007FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 ); + block.z |= ( endPoint[1].b >> 9 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[10] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[10] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[10] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[10] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[10] >> 4) & 1) << 4; + block.x |= ((endPoint[0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0].b >> 9) & 1) << 2; + block.y |= ((endPoint[1].r >> 0) & 1) << 3; + block.y |= ((endPoint[1].r >> 1) & 1) << 4; + block.y |= ((endPoint[1].r >> 2) & 1) << 5; + block.y |= ((endPoint[1].r >> 3) & 1) << 6; + block.y |= ((endPoint[1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1].r >> 6) & 1) << 9; + block.y |= ((endPoint[1].r >> 7) & 1) << 10; + block.y |= ((endPoint[1].r >> 8) & 1) << 11; + block.y |= ((endPoint[1].r >> 9) & 1) << 12; + block.y |= ((endPoint[1].g >> 0) & 1) << 13; + block.y |= ((endPoint[1].g >> 1) & 1) << 14; + block.y |= ((endPoint[1].g >> 2) & 1) << 15; + block.y |= ((endPoint[1].g >> 3) & 1) << 16; + block.y |= ((endPoint[1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1].g >> 6) & 1) << 19; + block.y |= ((endPoint[1].g >> 7) & 1) << 20; + block.y |= ((endPoint[1].g >> 8) & 1) << 21; + block.y |= ((endPoint[1].g >> 9) & 1) << 22; + block.y |= ((endPoint[1].b >> 0) & 1) << 23; + block.y |= ((endPoint[1].b >> 1) & 1) << 24; + block.y |= ((endPoint[1].b >> 2) & 1) << 25; + block.y |= ((endPoint[1].b >> 3) & 1) << 26; + block.y |= ((endPoint[1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1].b >> 6) & 1) << 29; + block.y |= ((endPoint[1].b >> 7) & 1) << 30; + block.y |= ((endPoint[1].b >> 8) & 1) << 31; + block.z |= ((endPoint[1].b >> 9) & 1) << 0; + } + else if (mode_type == candidateModeFlag[11]) + { + /*block.x |= candidateModeMemory[11]; + block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 ); + block.y |= ( ( endPoint[1].r << 3 ) & 0x00000FF8 ) | ( ( endPoint[1].g << 13 ) & 0x003FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 ); + block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[11] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[11] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[11] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[11] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[11] >> 4) & 1) << 4; + block.x |= ((endPoint[0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0].b >> 9) & 1) << 2; + block.y |= ((endPoint[1].r >> 0) & 1) << 3; + block.y |= ((endPoint[1].r >> 1) & 1) << 4; + block.y |= ((endPoint[1].r >> 2) & 1) << 5; + block.y |= ((endPoint[1].r >> 3) & 1) << 6; + block.y |= ((endPoint[1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1].r >> 6) & 1) << 9; + block.y |= ((endPoint[1].r >> 7) & 1) << 10; + block.y |= ((endPoint[1].r >> 8) & 1) << 11; + block.y |= ((endPoint[0].r >> 10) & 1) << 12; + block.y |= ((endPoint[1].g >> 0) & 1) << 13; + block.y |= ((endPoint[1].g >> 1) & 1) << 14; + block.y |= ((endPoint[1].g >> 2) & 1) << 15; + block.y |= ((endPoint[1].g >> 3) & 1) << 16; + block.y |= ((endPoint[1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1].g >> 6) & 1) << 19; + block.y |= ((endPoint[1].g >> 7) & 1) << 20; + block.y |= ((endPoint[1].g >> 8) & 1) << 21; + block.y |= ((endPoint[0].g >> 10) & 1) << 22; + block.y |= ((endPoint[1].b >> 0) & 1) << 23; + block.y |= ((endPoint[1].b >> 1) & 1) << 24; + block.y |= ((endPoint[1].b >> 2) & 1) << 25; + block.y |= ((endPoint[1].b >> 3) & 1) << 26; + block.y |= ((endPoint[1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1].b >> 6) & 1) << 29; + block.y |= ((endPoint[1].b >> 7) & 1) << 30; + block.y |= ((endPoint[1].b >> 8) & 1) << 31; + block.z |= ((endPoint[0].b >> 10) & 1) << 0; + } + else if (mode_type == candidateModeFlag[12])// violate the spec in [0].low + { + /*block.x |= candidateModeMemory[12]; + block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 ); + block.y |= ( ( endPoint[0].r << 0 ) & 0x00000800 ) | ( ( endPoint[0].g << 10 ) & 0x00200000 ); + block.y |= ( endPoint[0].b << 20 ) & 0x80000000; + block.y |= ( ( endPoint[1].r << 3 ) & 0x000007F8 ) | ( ( endPoint[1].g << 13 ) & 0x001FE000 ) | ( ( endPoint[1].b << 23 ) & 0x7F800000 ); + block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[12] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[12] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[12] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[12] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[12] >> 4) & 1) << 4; + block.x |= ((endPoint[0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0].b >> 9) & 1) << 2; + block.y |= ((endPoint[1].r >> 0) & 1) << 3; + block.y |= ((endPoint[1].r >> 1) & 1) << 4; + block.y |= ((endPoint[1].r >> 2) & 1) << 5; + block.y |= ((endPoint[1].r >> 3) & 1) << 6; + block.y |= ((endPoint[1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1].r >> 6) & 1) << 9; + block.y |= ((endPoint[1].r >> 7) & 1) << 10; + block.y |= ((endPoint[0].r >> 11) & 1) << 11; + block.y |= ((endPoint[0].r >> 10) & 1) << 12; + block.y |= ((endPoint[1].g >> 0) & 1) << 13; + block.y |= ((endPoint[1].g >> 1) & 1) << 14; + block.y |= ((endPoint[1].g >> 2) & 1) << 15; + block.y |= ((endPoint[1].g >> 3) & 1) << 16; + block.y |= ((endPoint[1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1].g >> 6) & 1) << 19; + block.y |= ((endPoint[1].g >> 7) & 1) << 20; + block.y |= ((endPoint[0].g >> 11) & 1) << 21; + block.y |= ((endPoint[0].g >> 10) & 1) << 22; + block.y |= ((endPoint[1].b >> 0) & 1) << 23; + block.y |= ((endPoint[1].b >> 1) & 1) << 24; + block.y |= ((endPoint[1].b >> 2) & 1) << 25; + block.y |= ((endPoint[1].b >> 3) & 1) << 26; + block.y |= ((endPoint[1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1].b >> 6) & 1) << 29; + block.y |= ((endPoint[1].b >> 7) & 1) << 30; + block.y |= ((endPoint[0].b >> 11) & 1) << 31; + block.z |= ((endPoint[0].b >> 10) & 1) << 0; + } + else if (mode_type == candidateModeFlag[13]) + { + /*block.x |= candidateModeMemory[13]; + block.y |= ( ( endPoint[0].r >> 8 ) & 0x00000080 ); + block.y |= ( ( endPoint[0].r >> 6 ) & 0x00000100 ); + block.y |= ( ( endPoint[0].r >> 4 ) & 0x00000200 ); + block.y |= ( ( endPoint[0].r >> 2 ) & 0x00000400 ); + block.y |= ( ( endPoint[0].r >> 0 ) & 0x00000800 ); + block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ); + block.y |= ( ( endPoint[0].g << 2 ) & 0x00020000 ); + block.y |= ( ( endPoint[0].g << 4 ) & 0x00040000 ); + block.y |= ( ( endPoint[0].g << 6 ) & 0x00080000 ); + block.y |= ( ( endPoint[0].g << 8 ) & 0x00100000 ); + block.y |= ( ( endPoint[0].g << 10 ) & 0x00200000 ); + block.y |= ( ( endPoint[0].g << 12 ) & 0x00400000 ); + block.y |= ( ( endPoint[0].b << 12 ) & 0x08000000 ); + block.y |= ( ( endPoint[0].b << 14 ) & 0x10000000 ); + block.y |= ( ( endPoint[0].b << 16 ) & 0x20000000 ); + block.y |= ( ( endPoint[0].b << 18 ) & 0x40000000 ); + block.y |= ( ( endPoint[0].b << 20 ) & 0x80000000 ); + block.y |= ( ( endPoint[1].r << 3 ) & 0x00000078 ) | ( ( endPoint[1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[1].b << 23 ) & 0x07800000 ); + block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[13] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[13] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[13] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[13] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[13] >> 4) & 1) << 4; + block.x |= ((endPoint[0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0].b >> 9) & 1) << 2; + block.y |= ((endPoint[1].r >> 0) & 1) << 3; + block.y |= ((endPoint[1].r >> 1) & 1) << 4; + block.y |= ((endPoint[1].r >> 2) & 1) << 5; + block.y |= ((endPoint[1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0].r >> 15) & 1) << 7; + block.y |= ((endPoint[0].r >> 14) & 1) << 8; + block.y |= ((endPoint[0].r >> 13) & 1) << 9; + block.y |= ((endPoint[0].r >> 12) & 1) << 10; + block.y |= ((endPoint[0].r >> 11) & 1) << 11; + block.y |= ((endPoint[0].r >> 10) & 1) << 12; + block.y |= ((endPoint[1].g >> 0) & 1) << 13; + block.y |= ((endPoint[1].g >> 1) & 1) << 14; + block.y |= ((endPoint[1].g >> 2) & 1) << 15; + block.y |= ((endPoint[1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0].g >> 15) & 1) << 17; + block.y |= ((endPoint[0].g >> 14) & 1) << 18; + block.y |= ((endPoint[0].g >> 13) & 1) << 19; + block.y |= ((endPoint[0].g >> 12) & 1) << 20; + block.y |= ((endPoint[0].g >> 11) & 1) << 21; + block.y |= ((endPoint[0].g >> 10) & 1) << 22; + block.y |= ((endPoint[1].b >> 0) & 1) << 23; + block.y |= ((endPoint[1].b >> 1) & 1) << 24; + block.y |= ((endPoint[1].b >> 2) & 1) << 25; + block.y |= ((endPoint[1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0].b >> 15) & 1) << 27; + block.y |= ((endPoint[0].b >> 14) & 1) << 28; + block.y |= ((endPoint[0].b >> 13) & 1) << 29; + block.y |= ((endPoint[0].b >> 12) & 1) << 30; + block.y |= ((endPoint[0].b >> 11) & 1) << 31; + block.z |= ((endPoint[0].b >> 10) & 1) << 0; + } +} diff --git a/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp b/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp index ef6b1cb..b7fad18 100644 --- a/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp +++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.cpp @@ -1,6 +1,6 @@ //===================================================================== -// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -106,6 +106,7 @@ CGU_INT expandbits(CGU_INT bits, CGU_INT v) } CMP_EXPORT CGU_INT bc7_isa() { +#ifndef ASPM_GPU #if defined(ISPC_TARGET_SSE2) ASPM_PRINT(("SSE2")); return 0; @@ -120,8 +121,9 @@ CMP_EXPORT CGU_INT bc7_isa() { return 3; #else ASPM_PRINT(("CPU")); - return -1; #endif +#endif + return -1; } CMP_EXPORT void init_BC7ramps() @@ -528,139 +530,6 @@ void GetProjecedImage( INLINE CGV_UINT8 get_partition_subset(CGV_INT part_id, CGU_INT maxSubsets, CGV_INT index) { - CMP_STATIC uniform CMP_CONSTANT CGU_UINT32 subset_mask_table[] = { - // 2 subset region patterns - 0x0000CCCCu, // 0 1100 1100 1100 1100 (MSB..LSB) - 0x00008888u, // 1 1000 1000 1000 1000 - 0x0000EEEEu, // 2 1110 1110 1110 1110 - 0x0000ECC8u, // 3 1110 1100 1100 1000 - 0x0000C880u, // 4 1100 1000 1000 0000 - 0x0000FEECu, // 5 1111 1110 1110 1100 - 0x0000FEC8u, // 6 1111 1110 1100 1000 - 0x0000EC80u, // 7 1110 1100 1000 0000 - 0x0000C800u, // 8 1100 1000 0000 0000 - 0x0000FFECu, // 9 1111 1111 1110 1100 - 0x0000FE80u, // 10 1111 1110 1000 0000 - 0x0000E800u, // 11 1110 1000 0000 0000 - 0x0000FFE8u, // 12 1111 1111 1110 1000 - 0x0000FF00u, // 13 1111 1111 0000 0000 - 0x0000FFF0u, // 14 1111 1111 1111 0000 - 0x0000F000u, // 15 1111 0000 0000 0000 - 0x0000F710u, // 16 1111 0111 0001 0000 - 0x0000008Eu, // 17 0000 0000 1000 1110 - 0x00007100u, // 18 0111 0001 0000 0000 - 0x000008CEu, // 19 0000 1000 1100 1110 - 0x0000008Cu, // 20 0000 0000 1000 1100 - 0x00007310u, // 21 0111 0011 0001 0000 - 0x00003100u, // 22 0011 0001 0000 0000 - 0x00008CCEu, // 23 1000 1100 1100 1110 - 0x0000088Cu, // 24 0000 1000 1000 1100 - 0x00003110u, // 25 0011 0001 0001 0000 - 0x00006666u, // 26 0110 0110 0110 0110 - 0x0000366Cu, // 27 0011 0110 0110 1100 - 0x000017E8u, // 28 0001 0111 1110 1000 - 0x00000FF0u, // 29 0000 1111 1111 0000 - 0x0000718Eu, // 30 0111 0001 1000 1110 - 0x0000399Cu, // 31 0011 1001 1001 1100 - 0x0000AAAAu, // 32 1010 1010 1010 1010 - 0x0000F0F0u, // 33 1111 0000 1111 0000 - 0x00005A5Au, // 34 0101 1010 0101 1010 - 0x000033CCu, // 35 0011 0011 1100 1100 - 0x00003C3Cu, // 36 0011 1100 0011 1100 - 0x000055AAu, // 37 0101 0101 1010 1010 - 0x00009696u, // 38 1001 0110 1001 0110 - 0x0000A55Au, // 39 1010 0101 0101 1010 - 0x000073CEu, // 40 0111 0011 1100 1110 - 0x000013C8u, // 41 0001 0011 1100 1000 - 0x0000324Cu, // 42 0011 0010 0100 1100 - 0x00003BDCu, // 43 0011 1011 1101 1100 - 0x00006996u, // 44 0110 1001 1001 0110 - 0x0000C33Cu, // 45 1100 0011 0011 1100 - 0x00009966u, // 46 1001 1001 0110 0110 - 0x00000660u, // 47 0000 0110 0110 0000 - 0x00000272u, // 48 0000 0010 0111 0010 - 0x000004E4u, // 49 0000 0100 1110 0100 - 0x00004E40u, // 50 0100 1110 0100 0000 - 0x00002720u, // 51 0010 0111 0010 0000 - 0x0000C936u, // 52 1100 1001 0011 0110 - 0x0000936Cu, // 53 1001 0011 0110 1100 - 0x000039C6u, // 54 0011 1001 1100 0110 - 0x0000639Cu, // 55 0110 0011 1001 1100 - 0x00009336u, // 56 1001 0011 0011 0110 - 0x00009CC6u, // 57 1001 1100 1100 0110 - 0x0000817Eu, // 58 1000 0001 0111 1110 - 0x0000E718u, // 59 1110 0111 0001 1000 - 0x0000CCF0u, // 60 1100 1100 1111 0000 - 0x00000FCCu, // 61 0000 1111 1100 1100 - 0x00007744u, // 62 0111 0111 0100 0100 - 0x0000EE22u, // 63 1110 1110 0010 0010 - // 3 Subset region patterns - 0xF60008CCu,// 0 1111 0110 0000 0000 : 0000 1000 1100 1100 = 2222122011001100 (MSB...LSB) - 0x73008CC8u,// 1 0111 0011 0000 0000 : 1000 1100 1100 1000 = 1222112211001000 - 0x3310CC80u,// 2 0011 0011 0001 0000 : 1100 1100 1000 0000 = 1122112210020000 - 0x00CEEC00u,// 3 0000 0000 1100 1110 : 1110 1100 0000 0000 = 1110110022002220 - 0xCC003300u,// 4 1100 1100 0000 0000 : 0011 0011 0000 0000 = 2211221100000000 - 0xCC0000CCu,// 5 1100 1100 0000 0000 : 0000 0000 1100 1100 = 2200220011001100 - 0x00CCFF00u,// 6 0000 0000 1100 1100 : 1111 1111 0000 0000 = 1111111122002200 - 0x3300CCCCu,// 7 0011 0011 0000 0000 : 1100 1100 1100 1100 = 1122112211001100 - 0xF0000F00u,// 8 1111 0000 0000 0000 : 0000 1111 0000 0000 = 2222111100000000 - 0xF0000FF0u,// 9 1111 0000 0000 0000 : 0000 1111 1111 0000 = 2222111111110000 - 0xFF0000F0u,// 10 1111 1111 0000 0000 : 0000 0000 1111 0000 = 2222222211110000 - 0x88884444u,// 11 1000 1000 1000 1000 : 0100 0100 0100 0100 = 2100210021002100 - 0x88886666u,// 12 1000 1000 1000 1000 : 0110 0110 0110 0110 = 2110211021102110 - 0xCCCC2222u,// 13 1100 1100 1100 1100 : 0010 0010 0010 0010 = 2210221022102210 - 0xEC80136Cu,// 14 1110 1100 1000 0000 : 0001 0011 0110 1100 = 2221221121101100 - 0x7310008Cu,// 15 0111 0011 0001 0000 : 0000 0000 1000 1100 = 0222002210021100 - 0xC80036C8u,// 16 1100 1000 0000 0000 : 0011 0110 1100 1000 = 2211211011001000 - 0x310008CEu,// 17 0011 0001 0000 0000 : 0000 1000 1100 1110 = 0022100211001110 - 0xCCC03330u,// 18 1100 1100 1100 0000 : 0011 0011 0011 0000 = 2211221122110000 - 0x0CCCF000u,// 19 0000 1100 1100 1100 : 1111 0000 0000 0000 = 1111220022002200 - 0xEE0000EEu,// 20 1110 1110 0000 0000 : 0000 0000 1110 1110 = 2220222011101110 - 0x77008888u,// 21 0111 0111 0000 0000 : 1000 1000 1000 1000 = 1222122210001000 - 0xCC0022C0u,// 22 1100 1100 0000 0000 : 0010 0010 1100 0000 = 2210221011000000 - 0x33004430u,// 23 0011 0011 0000 0000 : 0100 0100 0011 0000 = 0122012200110000 - 0x00CC0C22u,// 24 0000 0000 1100 1100 : 0000 1100 0010 0010 = 0000110022102210 - 0xFC880344u,// 25 1111 1100 1000 1000 : 0000 0011 0100 0100 = 2222221121002100 - 0x06606996u,// 26 0000 0110 0110 0000 : 0110 1001 1001 0110 = 0110122112210110 - 0x66009960u,// 27 0110 0110 0000 0000 : 1001 1001 0110 0000 = 1221122101100000 - 0xC88C0330u,// 28 1100 1000 1000 1100 : 0000 0011 0011 0000 = 2200201120112200 - 0xF9000066u,// 29 1111 1001 0000 0000 : 0000 0000 0110 0110 = 2222200201100110 - 0x0CC0C22Cu,// 30 0000 1100 1100 0000 : 1100 0010 0010 1100 = 1100221022101100 - 0x73108C00u,// 31 0111 0011 0001 0000 : 1000 1100 0000 0000 = 1222112200020000 - 0xEC801300u,// 32 1110 1100 1000 0000 : 0001 0011 0000 0000 = 2221221120000000 - 0x08CEC400u,// 33 0000 1000 1100 1110 : 1100 0100 0000 0000 = 1100210022002220 - 0xEC80004Cu,// 34 1110 1100 1000 0000 : 0000 0000 0100 1100 = 2220220021001100 - 0x44442222u,// 35 0100 0100 0100 0100 : 0010 0010 0010 0010 = 0210021002100210 - 0x0F0000F0u,// 36 0000 1111 0000 0000 : 0000 0000 1111 0000 = 0000222211110000 - 0x49242492u,// 37 0100 1001 0010 0100 : 0010 0100 1001 0010 = 0210210210210210 - 0x42942942u,// 38 0100 0010 1001 0100 : 0010 1001 0100 0010 = 0210102121020210 - 0x0C30C30Cu,// 39 0000 1100 0011 0000 : 1100 0011 0000 1100 = 1100221100221100 - 0x03C0C03Cu,// 40 0000 0011 1100 0000 : 1100 0000 0011 1100 = 1100002222111100 - 0xFF0000AAu,// 41 1111 1111 0000 0000 : 0000 0000 1010 1010 = 2222222210101010 - 0x5500AA00u,// 42 0101 0101 0000 0000 : 1010 1010 0000 0000 = 1212121200000000 - 0xCCCC3030u,// 43 1100 1100 1100 1100 : 0011 0000 0011 0000 = 2211220022112200 - 0x0C0CC0C0u,// 44 0000 1100 0000 1100 : 1100 0000 1100 0000 = 1100220011002200 - 0x66669090u,// 45 0110 0110 0110 0110 : 1001 0000 1001 0000 = 1221022012210220 - 0x0FF0A00Au,// 46 0000 1111 1111 0000 : 1010 0000 0000 1010 = 1010222222221010 - 0x5550AAA0u,// 47 0101 0101 0101 0000 : 1010 1010 1010 0000 = 1212121212120000 - 0xF0000AAAu,// 48 1111 0000 0000 0000 : 0000 1010 1010 1010 = 2222101010101010 - 0x0E0EE0E0u,// 49 0000 1110 0000 1110 : 1110 0000 1110 0000 = 1110222011102220 - 0x88887070u,// 50 1000 1000 1000 1000 : 0111 0000 0111 0000 = 2111200021112000 - 0x99906660u,// 51 1001 1001 1001 0000 : 0110 0110 0110 0000 = 2112211221120000 - 0xE00E0EE0u,// 52 1110 0000 0000 1110 : 0000 1110 1110 0000 = 2220111011102220 - 0x88880770u,// 53 1000 1000 1000 1000 : 0000 0111 0111 0000 = 2000211121112000 - 0xF0000666u,// 54 1111 0000 0000 0000 : 0000 0110 0110 0110 = 2222011001100110 - 0x99006600u,// 55 1001 1001 0000 0000 : 0110 0110 0000 0000 = 2112211200000000 - 0xFF000066u,// 56 1111 1111 0000 0000 : 0000 0000 0110 0110 = 2222222201100110 - 0xC00C0CC0u,// 57 1100 0000 0000 1100 : 0000 1100 1100 0000 = 2200110011002200 - 0xCCCC0330u,// 58 1100 1100 1100 1100 : 0000 0011 0011 0000 = 2200221122112200 - 0x90006000u,// 59 1001 0000 0000 0000 : 0110 0000 0000 0000 = 2112000000000000 - 0x08088080u,// 60 0000 1000 0000 1000 : 1000 0000 1000 0000 = 1000200010002000 - 0xEEEE1010u,// 61 1110 1110 1110 1110 : 0001 0000 0001 0000 = 2221222022212220 - 0xFFF0000Au,// 62 1111 1111 1111 0000 : 0000 0000 0000 1010 = 2222222222221010 - 0x731008CEu,// 63 0111 0011 0001 0000 : 0000 1000 1100 1110 = 0222102211021110 - }; - if (maxSubsets == 2) { CGV_UINT32 mask_packed = subset_mask_table[part_id]; @@ -1029,14 +898,6 @@ INLINE CGV_EPOCODE ep_find_floor( { #ifdef ASPM_GPU // GPU Code CGV_FLOAT rampf = 0.0F; - CMP_CONSTANT CGV_EPOCODE rampI[5*SOURCE_BLOCK_SIZE] = { - 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 0 bit index - 0 ,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 1 bit index - 0 ,21,43,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 2 bit index - 0 ,9 ,18,27,37,46,55,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 3 bit index - 0 ,4 ,9 ,13,17,21,26,30,34,38,43,47,51,55,60,64 // 4 bit index - }; - CGV_EPOCODE e1 = expand_epocode(p1, bits); CGV_EPOCODE e2 = expand_epocode(p2,bits); CGV_FLOAT ramp = gather_epocode(rampI,clogBC7*16+index)/64.0F; @@ -1077,21 +938,6 @@ INLINE CGV_EPOCODE ep_find_floor( INLINE void get_fixuptable(CGV_FIXUPINDEX fixup[3], CGV_PARTID part_id) { - // same as CMP SDK v3.1 BC7_FIXUPINDEX1 & BC7_FIXUPINDEX2 for each partition range 0..63 - // The data is saved as a packed INT = (BC7_FIXUPINDEX1 << 4 + BC7_FIXUPINDEX2) - CMP_STATIC uniform __constant CGV_FIXUPINDEX FIXUPINDEX[] = { - // 2 subset partitions 0..63 - 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, - 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0x20u, 0x20u, - 0xf0u, 0xf0u, 0x60u, 0x80u, 0x20u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0x60u, - 0x60u, 0x20u, 0x60u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, - // 3 subset partitions 64..128 - 0x3fu, 0x38u, 0xf8u, 0xf3u, 0x8fu, 0x3fu, 0xf3u, 0xf8u, 0x8fu, 0x8fu, 0x6fu, 0x6fu, 0x6fu, 0x5fu, 0x3fu, 0x38u, - 0x3fu, 0x38u, 0x8fu, 0xf3u, 0x3fu, 0x38u, 0x6fu, 0xa8u, 0x53u, 0x8fu, 0x86u, 0x6au, 0x8fu, 0x5fu, 0xfau, 0xf8u, - 0x8fu, 0xf3u, 0x3fu, 0x5au, 0x6au, 0xa8u, 0x89u, 0xfau, 0xf6u, 0x3fu, 0xf8u, 0x5fu, 0xf3u, 0xf6u, 0xf6u, 0xf8u, - 0x3fu, 0xf3u, 0x5fu, 0x5fu, 0x5fu, 0x8fu, 0x5fu, 0xafu, 0x5fu, 0xafu, 0x8fu, 0xdfu, 0xf3u, 0xcfu, 0x3fu, 0x38u - }; - CGV_FIXUPINDEX skip_packed = FIXUPINDEX[part_id];// gather_int2(FIXUPINDEX, part_id); fixup[0] = 0; fixup[1] = skip_packed>>4; @@ -1472,27 +1318,29 @@ CGV_ERROR quant_solid_color( if (error_t < error_0) { + // We have a solid color: Use image src if on GPU image_log = iclogBC7; image_idx = image_log; - CGU_BOOL srcIsWhite = FALSE; - if ((image_src[0] == 255.0f)&&(image_src[1] == 255.0f)&&(image_src[2] == 255.0f)) srcIsWhite = TRUE; +#ifdef ASPM_GPU // This needs improving + CGV_IMAGE MinC[4] = {255,255,255,255}; + CGV_IMAGE MaxC[4] = {0,0,0,0}; + // get min max colors + for (CGU_CHANNEL ch=0;ch MaxC[ch] ) MaxC[ch] = image_src[k+ch*SOURCE_BLOCK_SIZE]; + } + for (CGU_CHANNEL ch = 0; chsp_idx,index+1)&0xFF; } else { - epo_0[ch] = 0; + epo_0[ch] = 0; epo_0[4 + ch] = 0; } #else epo_0[ ch] = 0; epo_0[4+ch] = 0; -#endif #endif } +#endif error_0 = error_t; } //if (error_0 == 0) @@ -1980,7 +1828,11 @@ INLINE void cmp_encode_swap(CGV_EPOCODE endpoint[], CGU_INT channels, CGV_INDEX { cmp_swap_epo(&endpoint[0], &endpoint[channels], channels); for (CGU_INT k=0; k0) q = (levels-1)-q; - if (k1==0 && k2==0) cmp_Write8Bit(data, pPos, bits - 1, static_cast (q)); - else cmp_Write8Bit(data, pPos, bits, static_cast(q)); + if (k1==0 && k2==0) cmp_Write8Bit(data, pPos, bits-1, CMP_STATIC_CAST(CGV_BYTE,q)); + else cmp_Write8Bit(data, pPos, bits , CMP_STATIC_CAST(CGV_BYTE,q)); qbits_shifted >>= 4; flips_shifted >>= 1; } @@ -2236,10 +2089,10 @@ void Encode_mode4( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE], cmp_Write8Bit(cmp_out,&bitPosition,1,1); // rotation 2 bits - cmp_Write8Bit(cmp_out, &bitPosition, 2, static_cast (params->rotated_channel)); + cmp_Write8Bit(cmp_out,&bitPosition,2, CMP_STATIC_CAST(CGV_BYTE,params->rotated_channel)); // idxMode 1 bit - cmp_Write8Bit(cmp_out, &bitPosition, 1, static_cast (params->idxMode)); + cmp_Write8Bit(cmp_out, &bitPosition, 1,CMP_STATIC_CAST(CGV_BYTE,params->idxMode)); CGU_INT idxBits[2] = {2,3}; @@ -2264,14 +2117,14 @@ void Encode_mode4( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE], // B0 : B1 for (CGU_INT component=0; component < 3; component++) { - cmp_Write8Bit(cmp_out, &bitPosition, 5, static_cast (params->color_qendpoint[component])); - cmp_Write8Bit(cmp_out, &bitPosition, 5, static_cast (params->color_qendpoint[4 + component])); + cmp_Write8Bit(cmp_out,&bitPosition,5,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[component])); + cmp_Write8Bit(cmp_out,&bitPosition,5,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[4 + component])); } // alpha endpoints (6 bits each) // A0 : A1 - cmp_Write8Bit(cmp_out, &bitPosition, 6, static_cast (params->alpha_qendpoint[0])); - cmp_Write8Bit(cmp_out, &bitPosition, 6, static_cast (params->alpha_qendpoint[4])); + cmp_Write8Bit(cmp_out,&bitPosition,6,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[0])); + cmp_Write8Bit(cmp_out,&bitPosition,6,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[4])); // index 2 bits each (31 bits total) cmp_encode_index(cmp_out, &bitPosition, params->color_index, 2); @@ -2289,7 +2142,7 @@ void Encode_mode5( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE], cmp_Write8Bit(cmp_out,&bitPosition,1,1); // Write 2 bit rotation - cmp_Write8Bit(cmp_out, &bitPosition, 2, static_cast (params->rotated_channel)); + cmp_Write8Bit(cmp_out,&bitPosition,2, CMP_STATIC_CAST(CGV_BYTE,params->rotated_channel)); cmp_encode_swap(params->color_qendpoint, 4, params->color_index,2); cmp_encode_swap(params->alpha_qendpoint, 4, params->alpha_index,2); @@ -2300,14 +2153,14 @@ void Encode_mode5( CGV_CMPOUT cmp_out[COMPRESSED_BLOCK_SIZE], // B0 : B1 for (CGU_INT component=0; component < 3; component++) { - cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast (params->color_qendpoint[component])); - cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast (params->color_qendpoint[4 + component])); + cmp_Write8Bit(cmp_out,&bitPosition,7,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[component])); + cmp_Write8Bit(cmp_out,&bitPosition,7,CMP_STATIC_CAST(CGV_BYTE,params->color_qendpoint[4 + component])); } // alpha endpoints (8 bits each) // A0 : A1 - cmp_Write8Bit(cmp_out, &bitPosition, 8, static_cast (params->alpha_qendpoint[0])); - cmp_Write8Bit(cmp_out, &bitPosition, 8, static_cast (params->alpha_qendpoint[4])); + cmp_Write8Bit(cmp_out,&bitPosition,8,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[0])); + cmp_Write8Bit(cmp_out,&bitPosition,8,CMP_STATIC_CAST(CGV_BYTE,params->alpha_qendpoint[4])); // color index 2 bits each (31 bits total) @@ -2332,8 +2185,8 @@ void Encode_mode6( // endpoints for (CGU_INT p=0; p<4; p++) { - cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast (epo_code[0 + p] >> 1)); - cmp_Write8Bit(cmp_out, &bitPosition, 7, static_cast (epo_code[4 + p] >> 1)); + cmp_Write8Bit(cmp_out, &bitPosition, 7, CMP_STATIC_CAST(CGV_BYTE,epo_code[0 + p] >> 1)); + cmp_Write8Bit(cmp_out, &bitPosition, 7, CMP_STATIC_CAST(CGV_BYTE,epo_code[4 + p] >> 1)); } // p bits @@ -2348,7 +2201,7 @@ void Encode_mode6( void Compress_mode01237( CGU_INT blockMode, BC7_EncodeState EncodeState[], -uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) +uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) { CGV_INDEX storedBestindex[MAX_PARTITIONS][MAX_SUBSETS][MAX_SUBSET_SIZE]; CGV_ERROR storedError[MAX_PARTITIONS]; @@ -2417,7 +2270,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) GetPartitionSubSet_mode01237( image_subsets, subset_entryCount, - static_cast(mode_blockPartition), + CMP_STATIC_CAST(CGV_UINT8,mode_blockPartition), EncodeState->image_src, blockMode, EncodeState->channels3or4); @@ -2526,7 +2379,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) tmp_epo_code, src_image_block, numEntries, - static_cast(EncodeState->clusters), // Mi_ + CMP_STATIC_CAST(CGU_INT8,EncodeState->clusters), // Mi_ EncodeState->bits, EncodeState->channels3or4, u_BC7Encode); @@ -2735,7 +2588,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) src_color_Block, SOURCE_BLOCK_SIZE, EncodeState->numClusters0[idxMode], - static_cast(EncodeState->modeBits[0]), + CMP_STATIC_CAST(CGU_UINT8,EncodeState->modeBits[0]), 3, u_BC7Encode); @@ -2746,7 +2599,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) src_alpha_Block, SOURCE_BLOCK_SIZE, EncodeState->numClusters1[idxMode], - static_cast(EncodeState->modeBits[1]), + CMP_STATIC_CAST(CGU_UINT8,EncodeState->modeBits[1]), 3, u_BC7Encode) / 3.0f; @@ -4574,6 +4427,7 @@ uniform CMP_GLOBAL BC7_Encode u_BC7Encode[]) CGU_INT Mode = 0x0001 << blockMode; if (!(u_BC7Encode->validModeMask & Mode)) continue; + switch (blockMode) { // image processing with no alpha @@ -4802,8 +4656,8 @@ void GetBC7Ramp(CGU_UINT32 endpoint[][MAX_DIMENSION_BIG], ep[0][i] += (CGU_UINT32)(ep[0][i] >> componentBits[i]); ep[1][i] += (CGU_UINT32)(ep[1][i] >> componentBits[i]); - ep[0][i] = min8(255, max8(0, static_cast(ep[0][i]))); - ep[1][i] = min8(255, max8(0, static_cast(ep[1][i]))); + ep[0][i] = min8(255, max8(0,CMP_STATIC_CAST(CGU_UINT8,ep[0][i]))); + ep[1][i] = min8(255, max8(0,CMP_STATIC_CAST(CGU_UINT8,ep[1][i]))); } } @@ -4926,7 +4780,7 @@ void DecompressDualIndexBlock( // If this is a fixup index then clear the implicit bit if(j==0) { - blockIndices[i][j] &= ~(1 << (bti[m_blockMode].indexBits[i]-1)); + blockIndices[i][j] &= ~(1 << (bti[m_blockMode].indexBits[i]-1U)); for(k=0;k(bti[m_blockMode].indexBits[i] - 1); k++) { blockIndices[i][j] |= (CGU_UINT32)ReadBit(in,m_bitPosition) << k; @@ -5377,7 +5231,7 @@ int CMP_CDECL CompressBlockBC7( const unsigned char *srcBlock, EncodeState.best_err = CMP_FLOAT_MAX; EncodeState.validModeMask = u_BC7Encode->validModeMask; EncodeState.part_count = u_BC7Encode->part_count; - EncodeState.channels = static_cast(u_BC7Encode->channels); + EncodeState.channels = CMP_STATIC_CAST(CGU_CHANNEL,u_BC7Encode->channels); CGU_UINT8 offsetR = 0; CGU_UINT8 offsetG = 16; @@ -5410,6 +5264,7 @@ int CMP_CDECL CompressBlockBC7( const unsigned char *srcBlock, return CGU_CORE_OK; } + int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options = NULL) { @@ -5429,7 +5284,7 @@ int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16], #endif //============================================== OpenCL USER INTERFACE ==================================================== -#ifdef ASPM_GPU +#ifdef ASPM_OPENCL CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc ImageSource[], CMP_GLOBAL CGV_CMPOUT ImageDestination[], uniform CMP_GLOBAL Source_Info SourceInfo[], @@ -5438,21 +5293,21 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc CGU_INT xID=0; CGU_INT yID=0; - xID = get_global_id(0); // ToDo: Define a size_t 32 bit and 64 bit basd on clGetDeviceInfo + xID = get_global_id(0); // ToDo: Define a size_t 32 bit and 64 bit based on clGetDeviceInfo yID = get_global_id(1); - CGU_INT srcWidth = SourceInfo->m_src_width; CGU_INT srcHeight = SourceInfo->m_src_height; if (xID >= (srcWidth / BlockX)) return; if (yID >= (srcHeight / BlockY)) return; + //ASPM_PRINT(("[ASPM_OCL] %d %d size %d\n",xID,yID,sizeof(BC7_Encode))); + CGU_INT destI = (xID*COMPRESSED_BLOCK_SIZE) + (yID*(srcWidth / BlockX)*COMPRESSED_BLOCK_SIZE); CGU_INT srcindex = 4 * (yID * srcWidth + xID); CGU_INT blkindex = 0; BC7_EncodeState EncodeState; - varying BC7_EncodeState* uniform state = &EncodeState; - - copy_BC7_Encode_settings(state, BC7Encode); + cmp_memsetBC7(&EncodeState,0,sizeof(EncodeState)); + copy_BC7_Encode_settings(&EncodeState, BC7Encode); //Check if it is a complete 4X4 block if (((xID + 1)*BlockX <= srcWidth) && ((yID + 1)*BlockY <= srcHeight)) @@ -5460,10 +5315,10 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc srcWidth = srcWidth - 4; for (CGU_INT j = 0; j < 4; j++) { for (CGU_INT i = 0; i < 4; i++) { - state->image_src[blkindex+0*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].x; - state->image_src[blkindex+1*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].y; - state->image_src[blkindex+2*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].z; - state->image_src[blkindex+3*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].w; + EncodeState.image_src[blkindex+0*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].x; + EncodeState.image_src[blkindex+1*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].y; + EncodeState.image_src[blkindex+2*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].z; + EncodeState.image_src[blkindex+3*SOURCE_BLOCK_SIZE] = ImageSource[srcindex].w; blkindex++; srcindex++; } @@ -5471,13 +5326,21 @@ CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(uniform CMP_GLOBAL const CGU_Vec4uc srcindex += srcWidth; } - copy_BC7_Encode_settings(state, BC7Encode); - BC7_CompressBlock(&EncodeState, BC7Encode); + // printf("CMP %x %x %x %x %x %x %x", + // state->cmp_out[0], + // state->cmp_out[1], + // state->cmp_out[2], + // state->cmp_out[3], + // state->cmp_out[4], + // state->cmp_out[5], + // state->cmp_out[6] + // ); + for (CGU_INT i=0; icmp_out[i]; + ImageDestination[destI+i] = EncodeState.cmp_out[i]; } } diff --git a/extern/CMP_Core/shaders/BC7_Encode_Kernel.h b/extern/CMP_Core/shaders/BC7_Encode_Kernel.h index 1a812b9..58e115a 100644 --- a/extern/CMP_Core/shaders/BC7_Encode_Kernel.h +++ b/extern/CMP_Core/shaders/BC7_Encode_Kernel.h @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -23,6 +23,8 @@ #ifndef BC7_ENCODE_KERNEL_H #define BC7_ENCODE_KERNEL_H +#pragma warning(disable:4505) // disable warnings on unreferenced local function has been removed + #if defined(ISPC)||defined(ASPM) //#include "..\..\Common\Common_Def.h" #include "Common_Def.h" @@ -252,7 +254,6 @@ BC7_Encode CGU_INT refineIterations; CGU_INT part_count; CGU_INT channels; - } #ifndef ASPM BC7_Encode @@ -569,6 +570,163 @@ CMP_CONSTANT CGU_UINT8 par_vectors_nd[2][8][64][2][4] = { }, }; +CMP_CONSTANT CGU_UINT32 subset_mask_table[] = { + // 2 subset region patterns + 0x0000CCCCu, // 0 1100 1100 1100 1100 (MSB..LSB) + 0x00008888u, // 1 1000 1000 1000 1000 + 0x0000EEEEu, // 2 1110 1110 1110 1110 + 0x0000ECC8u, // 3 1110 1100 1100 1000 + 0x0000C880u, // 4 1100 1000 1000 0000 + 0x0000FEECu, // 5 1111 1110 1110 1100 + 0x0000FEC8u, // 6 1111 1110 1100 1000 + 0x0000EC80u, // 7 1110 1100 1000 0000 + 0x0000C800u, // 8 1100 1000 0000 0000 + 0x0000FFECu, // 9 1111 1111 1110 1100 + 0x0000FE80u, // 10 1111 1110 1000 0000 + 0x0000E800u, // 11 1110 1000 0000 0000 + 0x0000FFE8u, // 12 1111 1111 1110 1000 + 0x0000FF00u, // 13 1111 1111 0000 0000 + 0x0000FFF0u, // 14 1111 1111 1111 0000 + 0x0000F000u, // 15 1111 0000 0000 0000 + 0x0000F710u, // 16 1111 0111 0001 0000 + 0x0000008Eu, // 17 0000 0000 1000 1110 + 0x00007100u, // 18 0111 0001 0000 0000 + 0x000008CEu, // 19 0000 1000 1100 1110 + 0x0000008Cu, // 20 0000 0000 1000 1100 + 0x00007310u, // 21 0111 0011 0001 0000 + 0x00003100u, // 22 0011 0001 0000 0000 + 0x00008CCEu, // 23 1000 1100 1100 1110 + 0x0000088Cu, // 24 0000 1000 1000 1100 + 0x00003110u, // 25 0011 0001 0001 0000 + 0x00006666u, // 26 0110 0110 0110 0110 + 0x0000366Cu, // 27 0011 0110 0110 1100 + 0x000017E8u, // 28 0001 0111 1110 1000 + 0x00000FF0u, // 29 0000 1111 1111 0000 + 0x0000718Eu, // 30 0111 0001 1000 1110 + 0x0000399Cu, // 31 0011 1001 1001 1100 + 0x0000AAAAu, // 32 1010 1010 1010 1010 + 0x0000F0F0u, // 33 1111 0000 1111 0000 + 0x00005A5Au, // 34 0101 1010 0101 1010 + 0x000033CCu, // 35 0011 0011 1100 1100 + 0x00003C3Cu, // 36 0011 1100 0011 1100 + 0x000055AAu, // 37 0101 0101 1010 1010 + 0x00009696u, // 38 1001 0110 1001 0110 + 0x0000A55Au, // 39 1010 0101 0101 1010 + 0x000073CEu, // 40 0111 0011 1100 1110 + 0x000013C8u, // 41 0001 0011 1100 1000 + 0x0000324Cu, // 42 0011 0010 0100 1100 + 0x00003BDCu, // 43 0011 1011 1101 1100 + 0x00006996u, // 44 0110 1001 1001 0110 + 0x0000C33Cu, // 45 1100 0011 0011 1100 + 0x00009966u, // 46 1001 1001 0110 0110 + 0x00000660u, // 47 0000 0110 0110 0000 + 0x00000272u, // 48 0000 0010 0111 0010 + 0x000004E4u, // 49 0000 0100 1110 0100 + 0x00004E40u, // 50 0100 1110 0100 0000 + 0x00002720u, // 51 0010 0111 0010 0000 + 0x0000C936u, // 52 1100 1001 0011 0110 + 0x0000936Cu, // 53 1001 0011 0110 1100 + 0x000039C6u, // 54 0011 1001 1100 0110 + 0x0000639Cu, // 55 0110 0011 1001 1100 + 0x00009336u, // 56 1001 0011 0011 0110 + 0x00009CC6u, // 57 1001 1100 1100 0110 + 0x0000817Eu, // 58 1000 0001 0111 1110 + 0x0000E718u, // 59 1110 0111 0001 1000 + 0x0000CCF0u, // 60 1100 1100 1111 0000 + 0x00000FCCu, // 61 0000 1111 1100 1100 + 0x00007744u, // 62 0111 0111 0100 0100 + 0x0000EE22u, // 63 1110 1110 0010 0010 + // 3 Subset region patterns + 0xF60008CCu,// 0 1111 0110 0000 0000 : 0000 1000 1100 1100 = 2222122011001100 (MSB...LSB) + 0x73008CC8u,// 1 0111 0011 0000 0000 : 1000 1100 1100 1000 = 1222112211001000 + 0x3310CC80u,// 2 0011 0011 0001 0000 : 1100 1100 1000 0000 = 1122112210020000 + 0x00CEEC00u,// 3 0000 0000 1100 1110 : 1110 1100 0000 0000 = 1110110022002220 + 0xCC003300u,// 4 1100 1100 0000 0000 : 0011 0011 0000 0000 = 2211221100000000 + 0xCC0000CCu,// 5 1100 1100 0000 0000 : 0000 0000 1100 1100 = 2200220011001100 + 0x00CCFF00u,// 6 0000 0000 1100 1100 : 1111 1111 0000 0000 = 1111111122002200 + 0x3300CCCCu,// 7 0011 0011 0000 0000 : 1100 1100 1100 1100 = 1122112211001100 + 0xF0000F00u,// 8 1111 0000 0000 0000 : 0000 1111 0000 0000 = 2222111100000000 + 0xF0000FF0u,// 9 1111 0000 0000 0000 : 0000 1111 1111 0000 = 2222111111110000 + 0xFF0000F0u,// 10 1111 1111 0000 0000 : 0000 0000 1111 0000 = 2222222211110000 + 0x88884444u,// 11 1000 1000 1000 1000 : 0100 0100 0100 0100 = 2100210021002100 + 0x88886666u,// 12 1000 1000 1000 1000 : 0110 0110 0110 0110 = 2110211021102110 + 0xCCCC2222u,// 13 1100 1100 1100 1100 : 0010 0010 0010 0010 = 2210221022102210 + 0xEC80136Cu,// 14 1110 1100 1000 0000 : 0001 0011 0110 1100 = 2221221121101100 + 0x7310008Cu,// 15 0111 0011 0001 0000 : 0000 0000 1000 1100 = 0222002210021100 + 0xC80036C8u,// 16 1100 1000 0000 0000 : 0011 0110 1100 1000 = 2211211011001000 + 0x310008CEu,// 17 0011 0001 0000 0000 : 0000 1000 1100 1110 = 0022100211001110 + 0xCCC03330u,// 18 1100 1100 1100 0000 : 0011 0011 0011 0000 = 2211221122110000 + 0x0CCCF000u,// 19 0000 1100 1100 1100 : 1111 0000 0000 0000 = 1111220022002200 + 0xEE0000EEu,// 20 1110 1110 0000 0000 : 0000 0000 1110 1110 = 2220222011101110 + 0x77008888u,// 21 0111 0111 0000 0000 : 1000 1000 1000 1000 = 1222122210001000 + 0xCC0022C0u,// 22 1100 1100 0000 0000 : 0010 0010 1100 0000 = 2210221011000000 + 0x33004430u,// 23 0011 0011 0000 0000 : 0100 0100 0011 0000 = 0122012200110000 + 0x00CC0C22u,// 24 0000 0000 1100 1100 : 0000 1100 0010 0010 = 0000110022102210 + 0xFC880344u,// 25 1111 1100 1000 1000 : 0000 0011 0100 0100 = 2222221121002100 + 0x06606996u,// 26 0000 0110 0110 0000 : 0110 1001 1001 0110 = 0110122112210110 + 0x66009960u,// 27 0110 0110 0000 0000 : 1001 1001 0110 0000 = 1221122101100000 + 0xC88C0330u,// 28 1100 1000 1000 1100 : 0000 0011 0011 0000 = 2200201120112200 + 0xF9000066u,// 29 1111 1001 0000 0000 : 0000 0000 0110 0110 = 2222200201100110 + 0x0CC0C22Cu,// 30 0000 1100 1100 0000 : 1100 0010 0010 1100 = 1100221022101100 + 0x73108C00u,// 31 0111 0011 0001 0000 : 1000 1100 0000 0000 = 1222112200020000 + 0xEC801300u,// 32 1110 1100 1000 0000 : 0001 0011 0000 0000 = 2221221120000000 + 0x08CEC400u,// 33 0000 1000 1100 1110 : 1100 0100 0000 0000 = 1100210022002220 + 0xEC80004Cu,// 34 1110 1100 1000 0000 : 0000 0000 0100 1100 = 2220220021001100 + 0x44442222u,// 35 0100 0100 0100 0100 : 0010 0010 0010 0010 = 0210021002100210 + 0x0F0000F0u,// 36 0000 1111 0000 0000 : 0000 0000 1111 0000 = 0000222211110000 + 0x49242492u,// 37 0100 1001 0010 0100 : 0010 0100 1001 0010 = 0210210210210210 + 0x42942942u,// 38 0100 0010 1001 0100 : 0010 1001 0100 0010 = 0210102121020210 + 0x0C30C30Cu,// 39 0000 1100 0011 0000 : 1100 0011 0000 1100 = 1100221100221100 + 0x03C0C03Cu,// 40 0000 0011 1100 0000 : 1100 0000 0011 1100 = 1100002222111100 + 0xFF0000AAu,// 41 1111 1111 0000 0000 : 0000 0000 1010 1010 = 2222222210101010 + 0x5500AA00u,// 42 0101 0101 0000 0000 : 1010 1010 0000 0000 = 1212121200000000 + 0xCCCC3030u,// 43 1100 1100 1100 1100 : 0011 0000 0011 0000 = 2211220022112200 + 0x0C0CC0C0u,// 44 0000 1100 0000 1100 : 1100 0000 1100 0000 = 1100220011002200 + 0x66669090u,// 45 0110 0110 0110 0110 : 1001 0000 1001 0000 = 1221022012210220 + 0x0FF0A00Au,// 46 0000 1111 1111 0000 : 1010 0000 0000 1010 = 1010222222221010 + 0x5550AAA0u,// 47 0101 0101 0101 0000 : 1010 1010 1010 0000 = 1212121212120000 + 0xF0000AAAu,// 48 1111 0000 0000 0000 : 0000 1010 1010 1010 = 2222101010101010 + 0x0E0EE0E0u,// 49 0000 1110 0000 1110 : 1110 0000 1110 0000 = 1110222011102220 + 0x88887070u,// 50 1000 1000 1000 1000 : 0111 0000 0111 0000 = 2111200021112000 + 0x99906660u,// 51 1001 1001 1001 0000 : 0110 0110 0110 0000 = 2112211221120000 + 0xE00E0EE0u,// 52 1110 0000 0000 1110 : 0000 1110 1110 0000 = 2220111011102220 + 0x88880770u,// 53 1000 1000 1000 1000 : 0000 0111 0111 0000 = 2000211121112000 + 0xF0000666u,// 54 1111 0000 0000 0000 : 0000 0110 0110 0110 = 2222011001100110 + 0x99006600u,// 55 1001 1001 0000 0000 : 0110 0110 0000 0000 = 2112211200000000 + 0xFF000066u,// 56 1111 1111 0000 0000 : 0000 0000 0110 0110 = 2222222201100110 + 0xC00C0CC0u,// 57 1100 0000 0000 1100 : 0000 1100 1100 0000 = 2200110011002200 + 0xCCCC0330u,// 58 1100 1100 1100 1100 : 0000 0011 0011 0000 = 2200221122112200 + 0x90006000u,// 59 1001 0000 0000 0000 : 0110 0000 0000 0000 = 2112000000000000 + 0x08088080u,// 60 0000 1000 0000 1000 : 1000 0000 1000 0000 = 1000200010002000 + 0xEEEE1010u,// 61 1110 1110 1110 1110 : 0001 0000 0001 0000 = 2221222022212220 + 0xFFF0000Au,// 62 1111 1111 1111 0000 : 0000 0000 0000 1010 = 2222222222221010 + 0x731008CEu,// 63 0111 0011 0001 0000 : 0000 1000 1100 1110 = 0222102211021110 + }; + + CMP_CONSTANT CGV_EPOCODE rampI[5*SOURCE_BLOCK_SIZE] = { + 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 0 bit index + 0 ,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 1 bit index + 0 ,21,43,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 2 bit index + 0 ,9 ,18,27,37,46,55,64,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , // 3 bit index + 0 ,4 ,9 ,13,17,21,26,30,34,38,43,47,51,55,60,64 // 4 bit index + }; + + // same as CMP SDK v3.1 BC7_FIXUPINDEX1 & BC7_FIXUPINDEX2 for each partition range 0..63 + // The data is saved as a packed INT = (BC7_FIXUPINDEX1 << 4 + BC7_FIXUPINDEX2) + CMP_CONSTANT CGV_FIXUPINDEX FIXUPINDEX[] = { + // 2 subset partitions 0..63 + 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, + 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x80u, 0x80u, 0x20u, 0x20u, + 0xf0u, 0xf0u, 0x60u, 0x80u, 0x20u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x80u, 0x20u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0x60u, + 0x60u, 0x20u, 0x60u, 0x80u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0xf0u, 0x20u, 0x20u, 0xf0u, + // 3 subset partitions 64..128 + 0x3fu, 0x38u, 0xf8u, 0xf3u, 0x8fu, 0x3fu, 0xf3u, 0xf8u, 0x8fu, 0x8fu, 0x6fu, 0x6fu, 0x6fu, 0x5fu, 0x3fu, 0x38u, + 0x3fu, 0x38u, 0x8fu, 0xf3u, 0x3fu, 0x38u, 0x6fu, 0xa8u, 0x53u, 0x8fu, 0x86u, 0x6au, 0x8fu, 0x5fu, 0xfau, 0xf8u, + 0x8fu, 0xf3u, 0x3fu, 0x5au, 0x6au, 0xa8u, 0x89u, 0xfau, 0xf6u, 0x3fu, 0xf8u, 0x5fu, 0xf3u, 0xf6u, 0xf6u, 0xf8u, + 0x3fu, 0xf3u, 0x5fu, 0x5fu, 0x5fu, 0x8fu, 0x5fu, 0xafu, 0x5fu, 0xafu, 0x8fu, 0xdfu, 0xf3u, 0xcfu, 0x3fu, 0x38u + }; + + #ifndef ASPM_GPU // =============================== USED BY DECODER THIS CODE NEEDS TO BE UPDATED ========================================= CMP_CONSTANT CGU_UINT32 BC7_FIXUPINDICES_LOCAL[MAX_SUBSETS][MAX_PARTITIONS][3] = diff --git a/extern/CMP_Core/shaders/BC7_Encode_kernel.hlsl b/extern/CMP_Core/shaders/BC7_Encode_kernel.hlsl new file mode 100644 index 0000000..216b021 --- /dev/null +++ b/extern/CMP_Core/shaders/BC7_Encode_kernel.hlsl @@ -0,0 +1,1936 @@ +//-------------------------------------------------------------------------------------- +// File: BC7Encode.hlsl +// +// The Compute Shader for BC7 Encoder +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//-------------------------------------------------------------------------------------- + +#define REF_DEVICE +#ifndef ASPM_HLSL +#define ASPM_HLSL +#endif + +#define CHAR_LENGTH 8 +#define NCHANNELS 4 +#define BC7_UNORM 98 +#define MAX_UINT 0xFFFFFFFF +#define MIN_UINT 0 + +static const uint candidateSectionBit[64] = //Associated to partition 0-63 +{ + 0xCCCC, 0x8888, 0xEEEE, 0xECC8, + 0xC880, 0xFEEC, 0xFEC8, 0xEC80, + 0xC800, 0xFFEC, 0xFE80, 0xE800, + 0xFFE8, 0xFF00, 0xFFF0, 0xF000, + 0xF710, 0x008E, 0x7100, 0x08CE, + 0x008C, 0x7310, 0x3100, 0x8CCE, + 0x088C, 0x3110, 0x6666, 0x366C, + 0x17E8, 0x0FF0, 0x718E, 0x399C, + 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, + 0x3c3c, 0x55aa, 0x9696, 0xa55a, + 0x73ce, 0x13c8, 0x324c, 0x3bdc, + 0x6996, 0xc33c, 0x9966, 0x660, + 0x272, 0x4e4, 0x4e40, 0x2720, + 0xc936, 0x936c, 0x39c6, 0x639c, + 0x9336, 0x9cc6, 0x817e, 0xe718, + 0xccf0, 0xfcc, 0x7744, 0xee22, +}; +static const uint candidateSectionBit2[64] = //Associated to partition 64-127 +{ + 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, + 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050, + 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090, + 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250, + 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, + 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500, + 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400, + 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, + 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424, + 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, + 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0, + 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, + 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600, + 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000, + 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, + 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254, +}; +static const uint2 candidateFixUpIndex1D[128] = +{ + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{ 2, 0},{ 8, 0},{ 2, 0}, + { 2, 0},{ 8, 0},{ 8, 0},{15, 0}, + { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + + {15, 0},{15, 0},{ 6, 0},{ 8, 0}, + { 2, 0},{ 8, 0},{15, 0},{15, 0}, + { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + { 2, 0},{15, 0},{15, 0},{ 6, 0}, + { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0}, + {15, 0},{15, 0},{ 2, 0},{ 2, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{ 2, 0},{ 2, 0},{15, 0}, + //candidateFixUpIndex1D[i][1], i < 64 should not be used + + { 3,15},{ 3, 8},{15, 8},{15, 3}, + { 8,15},{ 3,15},{15, 3},{15, 8}, + { 8,15},{ 8,15},{ 6,15},{ 6,15}, + { 6,15},{ 5,15},{ 3,15},{ 3, 8}, + { 3,15},{ 3, 8},{ 8,15},{15, 3}, + { 3,15},{ 3, 8},{ 6,15},{10, 8}, + { 5, 3},{ 8,15},{ 8, 6},{ 6,10}, + { 8,15},{ 5,15},{15,10},{15, 8}, + + { 8,15},{15, 3},{ 3,15},{ 5,10}, + { 6,10},{10, 8},{ 8, 9},{15,10}, + {15, 6},{ 3,15},{15, 8},{ 5,15}, + {15, 3},{15, 6},{15, 6},{15, 8}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct + { 3,15},{15, 3},{ 5,15},{ 5,15}, + { 5,15},{ 8,15},{ 5,15},{10,15}, + { 5,15},{10,15},{ 8,15},{13,15}, + {15, 3},{12,15},{ 3,15},{ 3, 8}, +}; +static const uint2 candidateFixUpIndex1DOrdered[128] = //Same with candidateFixUpIndex1D but order the result when i >= 64 +{ + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{ 2, 0},{ 8, 0},{ 2, 0}, + { 2, 0},{ 8, 0},{ 8, 0},{15, 0}, + { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + + {15, 0},{15, 0},{ 6, 0},{ 8, 0}, + { 2, 0},{ 8, 0},{15, 0},{15, 0}, + { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + { 2, 0},{15, 0},{15, 0},{ 6, 0}, + { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0}, + {15, 0},{15, 0},{ 2, 0},{ 2, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{ 2, 0},{ 2, 0},{15, 0}, + //candidateFixUpIndex1DOrdered[i][1], i < 64 should not be used + + { 3,15},{ 3, 8},{ 8,15},{ 3,15}, + { 8,15},{ 3,15},{ 3,15},{ 8,15}, + { 8,15},{ 8,15},{ 6,15},{ 6,15}, + { 6,15},{ 5,15},{ 3,15},{ 3, 8}, + { 3,15},{ 3, 8},{ 8,15},{ 3,15}, + { 3,15},{ 3, 8},{ 6,15},{ 8,10}, + { 3, 5},{ 8,15},{ 6, 8},{ 6,10}, + { 8,15},{ 5,15},{10,15},{ 8,15}, + + { 8,15},{ 3,15},{ 3,15},{ 5,10}, + { 6,10},{ 8,10},{ 8, 9},{10,15}, + { 6,15},{ 3,15},{ 8,15},{ 5,15}, + { 3,15},{ 6,15},{ 6,15},{ 8,15}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct + { 3,15},{ 3,15},{ 5,15},{ 5,15}, + { 5,15},{ 8,15},{ 5,15},{10,15}, + { 5,15},{10,15},{ 8,15},{13,15}, + { 3,15},{12,15},{ 3,15},{ 3, 8}, +}; +//static const uint4x4 candidateRotation[4] = +//{ +// {1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}, +// {0,0,0,1},{0,1,0,0},{0,0,1,0},{1,0,0,0}, +// {1,0,0,0},{0,0,0,1},{0,0,1,0},{0,1,0,0}, +// {1,0,0,0},{0,1,0,0},{0,0,0,1},{0,0,1,0} +//}; +//static const uint2 candidateIndexPrec[8] = {{3,0},{3,0},{2,0},{2,0}, +// {2,3}, //color index and alpha index can exchange +// {2,2},{4,4},{2,2}}; + +static const uint aWeight[3][16] = { {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}, + {0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; + + //4 bit index: 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 +static const uint aStep[3][64] = { { 0, 0, 0, 1, 1, 1, 1, 2, + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 7, 7, 7, + 7, 8, 8, 8, 8, 9, 9, 9, + 9,10,10,10,10,10,11,11, + 11,11,12,12,12,12,13,13, + 13,13,14,14,14,14,15,15 }, + //3 bit index: 0, 9, 18, 27, 37, 46, 55, 64 + { 0,0,0,0,0,1,1,1, + 1,1,1,1,1,1,2,2, + 2,2,2,2,2,2,2,3, + 3,3,3,3,3,3,3,3, + 3,4,4,4,4,4,4,4, + 4,4,5,5,5,5,5,5, + 5,5,5,6,6,6,6,6, + 6,6,6,6,7,7,7,7 }, + //2 bit index: 0, 21, 43, 64 + { 0,0,0,0,0,0,0,0, + 0,0,0,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,3,3, + 3,3,3,3,3,3,3,3 } }; + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; + float g_alpha_weight; + float g_quality; +}; + +//Forward declaration +uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P ); //Mode = 0 +uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P ); //Mode = 1 +uint2x4 compress_endpoints2( inout uint2x4 endPoint ); //Mode = 2 +uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P ); //Mode = 3 +uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P ); //Mode = 7 +uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P ); //Mode = 6 +uint2x4 compress_endpoints4( inout uint2x4 endPoint ); //Mode = 4 +uint2x4 compress_endpoints5( inout uint2x4 endPoint ); //Mode = 5 + +void block_package0( out uint4 block, uint partition, uint threadBase ); //Mode0 +void block_package1( out uint4 block, uint partition, uint threadBase ); //Mode1 +void block_package2( out uint4 block, uint partition, uint threadBase ); //Mode2 +void block_package3( out uint4 block, uint partition, uint threadBase ); //Mode3 +void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase ); //Mode4 +void block_package5( out uint4 block, uint rotation, uint threadBase ); //Mode5 +void block_package6( out uint4 block, uint threadBase ); //Mode6 +void block_package7( out uint4 block, uint partition, uint threadBase ); //Mode7 + + +void swap(inout uint4 lhs, inout uint4 rhs) +{ + uint4 tmp = lhs; + lhs = rhs; + rhs = tmp; +} +void swap(inout uint3 lhs, inout uint3 rhs) +{ + uint3 tmp = lhs; + lhs = rhs; + rhs = tmp; +} +void swap(inout uint lhs, inout uint rhs) +{ + uint tmp = lhs; + lhs = rhs; + rhs = tmp; +} + +uint ComputeError(in uint4 a, in uint4 b) +{ + return dot(a.rgb, b.rgb) + g_alpha_weight * a.a*b.a; +} + +void Ensure_A_Is_Larger( inout uint4 a, inout uint4 b ) +{ + if ( a.x < b.x ) + swap( a.x, b.x ); + if ( a.y < b.y ) + swap( a.y, b.y ); + if ( a.z < b.z ) + swap( a.z, b.z ); + if ( a.w < b.w ) + swap( a.w, b.w ); +} + + +Texture2D g_Input : register( t0 ); +StructuredBuffer g_InBuff : register( t1 ); + +RWStructuredBuffer g_OutBuff : register( u0 ); + +#define THREAD_GROUP_SIZE 64 +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 +#define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X) + +struct BufferShared +{ + uint4 pixel; + uint error; + uint mode; + uint partition; + uint index_selector; + uint rotation; + uint4 endPoint_low; + uint4 endPoint_high; + uint4 endPoint_low_quantized; + uint4 endPoint_high_quantized; +}; +groupshared BufferShared shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryMode456CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 4 5 6 all have 1 subset per block, and fix-up index is always index 0 +{ + // we process 4 BC blocks per thread group + const uint MAX_USED_THREAD = 16; // pixels in a BC (block compressed) block + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; // the number of BC blocks a thread group processes = 64 / 16 = 4 + uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on + uint threadBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group + uint threadInBlock = GI - threadBase; // id of the pixel in this BC block + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); + + shared_temp[GI].endPoint_low = shared_temp[GI].pixel; + shared_temp[GI].endPoint_high = shared_temp[GI].pixel; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 8) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + uint2x4 endPoint; + endPoint[0] = shared_temp[threadBase].endPoint_low; + endPoint[1] = shared_temp[threadBase].endPoint_high; + + uint error = 0xFFFFFFFF; + uint mode = 0; + uint index_selector = 0; + uint rotation = 0; + + uint2 indexPrec; + if (threadInBlock < 8) // all threads of threadInBlock < 8 will be working on trying out mode 4, since only mode 4 has index selector bit + { + if (0 == (threadInBlock & 1)) // thread 0, 2, 4, 6 + { + //2 represents 2bit index precision; 1 represents 3bit index precision + index_selector = 0; + indexPrec = uint2( 2, 1 ); + } + else // thread 1, 3, 5, 7 + { + //2 represents 2bit index precision; 1 represents 3bit index precision + index_selector = 1; + indexPrec = uint2( 1, 2 ); + } + } + else + { + //2 represents 2bit index precision + indexPrec = uint2( 2, 2 ); + } + + uint4 pixel_r; + uint color_index; + uint alpha_index; + int4 span; + int2 span_norm_sqr; + int2 dotProduct; + if (threadInBlock < 12) // Try mode 4 5 in threads 0..11 + { + // mode 4 5 have component rotation + if ((threadInBlock < 2) || (8 == threadInBlock)) // rotation = 0 in thread 0, 1 + { + rotation = 0; + } + else if ((threadInBlock < 4) || (9 == threadInBlock)) // rotation = 1 in thread 2, 3 + { + endPoint[0].ra = endPoint[0].ar; + endPoint[1].ra = endPoint[1].ar; + + rotation = 1; + } + else if ((threadInBlock < 6) || (10 == threadInBlock)) // rotation = 2 in thread 4, 5 + { + endPoint[0].ga = endPoint[0].ag; + endPoint[1].ga = endPoint[1].ag; + + rotation = 2; + } + else if ((threadInBlock < 8) || (11 == threadInBlock)) // rotation = 3 in thread 6, 7 + { + endPoint[0].ba = endPoint[0].ab; + endPoint[1].ba = endPoint[1].ab; + + rotation = 3; + } + + if (threadInBlock < 8) // try mode 4 in threads 0..7 + { + // mode 4 thread distribution + // Thread 0 1 2 3 4 5 6 7 + // Rotation 0 0 1 1 2 2 3 3 + // Index selector 0 1 0 1 0 1 0 1 + + mode = 4; + compress_endpoints4( endPoint ); + } + else // try mode 5 in threads 8..11 + { + // mode 5 thread distribution + // Thread 8 9 10 11 + // Rotation 0 1 2 3 + + mode = 5; + compress_endpoints5( endPoint ); + } + + uint4 pixel = shared_temp[threadBase + 0].pixel; + if (1 == rotation) + { + pixel.ra = pixel.ar; + } + else if (2 == rotation) + { + pixel.ga = pixel.ag; + } + else if (3 == rotation) + { + pixel.ba = pixel.ab; + } + + span = endPoint[1] - endPoint[0]; + span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a ); + + // in mode 4 5 6, end point 0 must be closer to pixel 0 than end point 1, because of the fix-up index is always index 0 + // TODO: this shouldn't be necessary here in error calculation + /* + dotProduct = int2( dot( span.rgb, pixel.rgb - endPoint[0].rgb ), span.a * ( pixel.a - endPoint[0].a ) ); + if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) + { + span.rgb = -span.rgb; + swap(endPoint[0].rgb, endPoint[1].rgb); + } + if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) ) + { + span.a = -span.a; + swap(endPoint[0].a, endPoint[1].a); + } + */ + + // should be the same as above + dotProduct = int2( dot( pixel.rgb - endPoint[0].rgb, pixel.rgb - endPoint[0].rgb ), dot( pixel.rgb - endPoint[1].rgb, pixel.rgb - endPoint[1].rgb ) ); + if ( dotProduct.x > dotProduct.y ) + { + span.rgb = -span.rgb; + swap(endPoint[0].rgb, endPoint[1].rgb); + } + dotProduct = int2( dot( pixel.a - endPoint[0].a, pixel.a - endPoint[0].a ), dot( pixel.a - endPoint[1].a, pixel.a - endPoint[1].a ) ); + if ( dotProduct.x > dotProduct.y ) + { + span.a = -span.a; + swap(endPoint[0].a, endPoint[1].a); + } + + error = 0; + for ( uint i = 0; i < 16; i ++ ) + { + pixel = shared_temp[threadBase + i].pixel; + if (1 == rotation) + { + pixel.ra = pixel.ar; + } + else if (2 == rotation) + { + pixel.ga = pixel.ag; + } + else if (3 == rotation) + { + pixel.ba = pixel.ab; + } + + dotProduct.x = dot( span.rgb, pixel.rgb - endPoint[0].rgb ); + color_index = ( span_norm_sqr.x <= 0 /*endPoint[0] == endPoint[1]*/ || dotProduct.x <= 0 /*pixel == endPoint[0]*/ ) ? 0 + : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] ); + dotProduct.y = dot( span.a, pixel.a - endPoint[0].a ); + alpha_index = ( span_norm_sqr.y <= 0 || dotProduct.y <= 0 ) ? 0 + : ( ( dotProduct.y < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct.y * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] ); + + // the same color_index and alpha_index should be used for reconstruction, so this should be left commented out + /*if (index_selector) + { + swap(color_index, alpha_index); + }*/ + + pixel_r.rgb = ( ( 64 - aWeight[indexPrec.x][color_index] ) * endPoint[0].rgb + + aWeight[indexPrec.x][color_index] * endPoint[1].rgb + + 32 ) >> 6; + pixel_r.a = ( ( 64 - aWeight[indexPrec.y][alpha_index] ) * endPoint[0].a + + aWeight[indexPrec.y][alpha_index] * endPoint[1].a + + 32 ) >> 6; + + Ensure_A_Is_Larger( pixel_r, pixel ); + pixel_r -= pixel; + if (1 == rotation) + { + pixel_r.ra = pixel_r.ar; + } + else if (2 == rotation) + { + pixel_r.ga = pixel_r.ag; + } + else if (3 == rotation) + { + pixel_r.ba = pixel_r.ab; + } + error += ComputeError(pixel_r, pixel_r); + } + } + else if (threadInBlock < 16) // Try mode 6 in threads 12..15, since in mode 4 5 6, only mode 6 has p bit + { + uint p = threadInBlock - 12; + + compress_endpoints6( endPoint, uint2(p >> 0, p >> 1) & 1 ); + + uint4 pixel = shared_temp[threadBase + 0].pixel; + + span = endPoint[1] - endPoint[0]; + span_norm_sqr = dot( span, span ); + dotProduct = dot( span, pixel - endPoint[0] ); + if ( span_norm_sqr.x > 0 && dotProduct.x >= 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) + { + span = -span; + swap(endPoint[0], endPoint[1]); + } + + error = 0; + for ( uint i = 0; i < 16; i ++ ) + { + pixel = shared_temp[threadBase + i].pixel; + + dotProduct.x = dot( span, pixel - endPoint[0] ); + color_index = ( span_norm_sqr.x <= 0 || dotProduct.x <= 0 ) ? 0 + : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[0][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[0][63] ); + + pixel_r = ( ( 64 - aWeight[0][color_index] ) * endPoint[0] + + aWeight[0][color_index] * endPoint[1] + 32 ) >> 6; + + Ensure_A_Is_Larger( pixel_r, pixel ); + pixel_r -= pixel; + error += ComputeError(pixel_r, pixel_r); + } + + mode = 6; + rotation = p; // Borrow rotation for p + } + + shared_temp[GI].error = error; + shared_temp[GI].mode = mode; + shared_temp[GI].index_selector = index_selector; + shared_temp[GI].rotation = rotation; + +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 8) + { + if ( shared_temp[GI].error > shared_temp[GI + 8].error ) + { + shared_temp[GI].error = shared_temp[GI + 8].error; + shared_temp[GI].mode = shared_temp[GI + 8].mode; + shared_temp[GI].index_selector = shared_temp[GI + 8].index_selector; + shared_temp[GI].rotation = shared_temp[GI + 8].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if ( shared_temp[GI].error > shared_temp[GI + 4].error ) + { + shared_temp[GI].error = shared_temp[GI + 4].error; + shared_temp[GI].mode = shared_temp[GI + 4].mode; + shared_temp[GI].index_selector = shared_temp[GI + 4].index_selector; + shared_temp[GI].rotation = shared_temp[GI + 4].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].mode = shared_temp[GI + 2].mode; + shared_temp[GI].index_selector = shared_temp[GI + 2].index_selector; + shared_temp[GI].rotation = shared_temp[GI + 2].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].mode = shared_temp[GI + 1].mode; + shared_temp[GI].index_selector = shared_temp[GI + 1].index_selector; + shared_temp[GI].rotation = shared_temp[GI + 1].rotation; + } + + g_OutBuff[blockID] = uint4(shared_temp[GI].error, (shared_temp[GI].index_selector << 31) | shared_temp[GI].mode, + 0, shared_temp[GI].rotation); // rotation is indeed rotation for mode 4 5. for mode 6, rotation is p bit + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 1 3 7 all have 2 subsets per block +{ + const uint MAX_USED_THREAD = 64; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); + } + GroupMemoryBarrierWithGroupSync(); + + shared_temp[GI].error = 0xFFFFFFFF; + + uint4 pixel_r; + uint2x4 endPoint[2]; // endPoint[0..1 for subset id][0..1 for low and high in the subset] + uint2x4 endPointBackup[2]; + uint color_index; + if (threadInBlock < 64) + { + uint partition = threadInBlock; + + endPoint[0][0] = MAX_UINT; + endPoint[0][1] = MIN_UINT; + endPoint[1][0] = MAX_UINT; + endPoint[1][1] = MIN_UINT; + uint bits = candidateSectionBit[partition]; + for ( uint i = 0; i < 16; i ++ ) + { + uint4 pixel = shared_temp[threadBase + i].pixel; + if ( (( bits >> i ) & 0x01) == 1 ) + { + endPoint[1][0] = min( endPoint[1][0], pixel ); + endPoint[1][1] = max( endPoint[1][1], pixel ); + } + else + { + endPoint[0][0] = min( endPoint[0][0], pixel ); + endPoint[0][1] = max( endPoint[0][1], pixel ); + } + } + + endPointBackup[0] = endPoint[0]; + endPointBackup[1] = endPoint[1]; + + uint max_p; + if (1 == g_mode_id) + { + // in mode 1, there is only one p bit per subset + max_p = 2; + } + else + { + // in mode 3 7, there are two p bits per subset, one for each end point + max_p = 4; + } + + uint final_p[2] = { 0, 0 }; + uint error[2] = { MAX_UINT, MAX_UINT }; + for ( uint p = 0; p < max_p; p ++ ) + { + endPoint[0] = endPointBackup[0]; + endPoint[1] = endPointBackup[1]; + + for ( i = 0; i < 2; i ++ ) // loop through 2 subsets + { + if (g_mode_id == 1) + { + compress_endpoints1( endPoint[i], p ); + } + else if (g_mode_id == 3) + { + compress_endpoints3( endPoint[i], uint2(p, p >> 1) & 1 ); + } + else if (g_mode_id == 7) + { + compress_endpoints7( endPoint[i], uint2(p, p >> 1) & 1 ); + } + } + + int4 span[2]; + span[0] = endPoint[0][1] - endPoint[0][0]; + span[1] = endPoint[1][1] - endPoint[1][0]; + + if (g_mode_id != 7) + { + span[0].w = span[1].w = 0; + } + + int span_norm_sqr[2]; + span_norm_sqr[0] = dot( span[0], span[0] ); + span_norm_sqr[1] = dot( span[1], span[1] ); + + // TODO: again, this shouldn't be necessary here in error calculation + int dotProduct = dot( span[0], shared_temp[threadBase + 0].pixel - endPoint[0][0] ); + if ( span_norm_sqr[0] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[0] ) ) + { + span[0] = -span[0]; + swap(endPoint[0][0], endPoint[0][1]); + } + dotProduct = dot( span[1], shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel - endPoint[1][0] ); + if ( span_norm_sqr[1] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[1] ) ) + { + span[1] = -span[1]; + swap(endPoint[1][0], endPoint[1][1]); + } + + uint step_selector; + if (g_mode_id != 1) + { + step_selector = 2; // mode 3 7 have 2 bit index + } + else + { + step_selector = 1; // mode 1 has 3 bit index + } + + uint p_error[2] = { 0, 0 }; + for ( i = 0; i < 16; i ++ ) + { + uint subset_index = (bits >> i) & 0x01; + + if (subset_index == 1) + { + dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] ); + color_index = (span_norm_sqr[1] <= 0 || dotProduct <= 0) ? 0 + : ((dotProduct < span_norm_sqr[1]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[1])] : aStep[step_selector][63]); + } + else + { + dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] ); + color_index = (span_norm_sqr[0] <= 0 || dotProduct <= 0) ? 0 + : ((dotProduct < span_norm_sqr[0]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[0])] : aStep[step_selector][63]); + } + + pixel_r = ((64 - aWeight[step_selector][color_index]) * endPoint[subset_index][0] + + aWeight[step_selector][color_index] * endPoint[subset_index][1] + 32) >> 6; + if (g_mode_id != 7) + { + pixel_r.a = 255; + } + + uint4 pixel = shared_temp[threadBase + i].pixel; + Ensure_A_Is_Larger( pixel_r, pixel ); + pixel_r -= pixel; + uint pixel_error = ComputeError(pixel_r, pixel_r); + if ( subset_index == 1 ) + p_error[1] += pixel_error; + else + p_error[0] += pixel_error; + } + + for ( i = 0; i < 2; i++ ) + { + if (p_error[i] < error[i]) + { + error[i] = p_error[i]; + final_p[i] = p; + } + } + } + + shared_temp[GI].error = error[0] + error[1]; + shared_temp[GI].mode = g_mode_id; + shared_temp[GI].partition = partition; + + // mode 1 3 7 don't have rotation, we use rotation for p bits + if ( g_mode_id == 1 ) + shared_temp[GI].rotation = (final_p[1] << 1) | final_p[0]; + else + shared_temp[GI].rotation = (final_p[1] << 2) | final_p[0]; + } + GroupMemoryBarrierWithGroupSync(); + + if (threadInBlock < 32) + { + if ( shared_temp[GI].error > shared_temp[GI + 32].error ) + { + shared_temp[GI].error = shared_temp[GI + 32].error; + shared_temp[GI].mode = shared_temp[GI + 32].mode; + shared_temp[GI].partition = shared_temp[GI + 32].partition; + shared_temp[GI].rotation = shared_temp[GI + 32].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif +if (threadInBlock < 16) + { + if ( shared_temp[GI].error > shared_temp[GI + 16].error ) + { + shared_temp[GI].error = shared_temp[GI + 16].error; + shared_temp[GI].mode = shared_temp[GI + 16].mode; + shared_temp[GI].partition = shared_temp[GI + 16].partition; + shared_temp[GI].rotation = shared_temp[GI + 16].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 8) + { + if ( shared_temp[GI].error > shared_temp[GI + 8].error ) + { + shared_temp[GI].error = shared_temp[GI + 8].error; + shared_temp[GI].mode = shared_temp[GI + 8].mode; + shared_temp[GI].partition = shared_temp[GI + 8].partition; + shared_temp[GI].rotation = shared_temp[GI + 8].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if ( shared_temp[GI].error > shared_temp[GI + 4].error ) + { + shared_temp[GI].error = shared_temp[GI + 4].error; + shared_temp[GI].mode = shared_temp[GI + 4].mode; + shared_temp[GI].partition = shared_temp[GI + 4].partition; + shared_temp[GI].rotation = shared_temp[GI + 4].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].mode = shared_temp[GI + 2].mode; + shared_temp[GI].partition = shared_temp[GI + 2].partition; + shared_temp[GI].rotation = shared_temp[GI + 2].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].mode = shared_temp[GI + 1].mode; + shared_temp[GI].partition = shared_temp[GI + 1].partition; + shared_temp[GI].rotation = shared_temp[GI + 1].rotation; + } + + if (g_InBuff[blockID].x > shared_temp[GI].error) + { + g_OutBuff[blockID] = uint4(shared_temp[GI].error, shared_temp[GI].mode, shared_temp[GI].partition, shared_temp[GI].rotation); // mode 1 3 7 don't have rotation, we use rotation for p bits + } + else + { + g_OutBuff[blockID] = g_InBuff[blockID]; + } + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 0 2 have 3 subsets per block +{ + const uint MAX_USED_THREAD = 64; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); + } + GroupMemoryBarrierWithGroupSync(); + + shared_temp[GI].error = 0xFFFFFFFF; + + uint num_partitions; + if (0 == g_mode_id) + { + num_partitions = 16; + } + else + { + num_partitions = 64; + } + + uint4 pixel_r; + uint2x4 endPoint[3]; // endPoint[0..1 for subset id][0..1 for low and high in the subset] + uint2x4 endPointBackup[3]; + uint color_index[16]; + if (threadInBlock < num_partitions) + { + uint partition = threadInBlock + 64; + + endPoint[0][0] = MAX_UINT; + endPoint[0][1] = MIN_UINT; + endPoint[1][0] = MAX_UINT; + endPoint[1][1] = MIN_UINT; + endPoint[2][0] = MAX_UINT; + endPoint[2][1] = MIN_UINT; + uint bits2 = candidateSectionBit2[partition - 64]; + for ( uint i = 0; i < 16; i ++ ) + { + uint4 pixel = shared_temp[threadBase + i].pixel; + uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03; + if ( subset_index == 2 ) + { + endPoint[2][0] = min( endPoint[2][0], pixel ); + endPoint[2][1] = max( endPoint[2][1], pixel ); + } + else if ( subset_index == 1 ) + { + endPoint[1][0] = min( endPoint[1][0], pixel ); + endPoint[1][1] = max( endPoint[1][1], pixel ); + } + else + { + endPoint[0][0] = min( endPoint[0][0], pixel ); + endPoint[0][1] = max( endPoint[0][1], pixel ); + } + } + + endPointBackup[0] = endPoint[0]; + endPointBackup[1] = endPoint[1]; + endPointBackup[2] = endPoint[2]; + + uint max_p; + if (0 == g_mode_id) + { + max_p = 4; + } + else + { + max_p = 1; + } + + uint final_p[3] = { 0, 0, 0 }; + uint error[3] = { MAX_UINT, MAX_UINT, MAX_UINT }; + for ( uint p = 0; p < max_p; p ++ ) + { + endPoint[0] = endPointBackup[0]; + endPoint[1] = endPointBackup[1]; + endPoint[2] = endPointBackup[2]; + + for ( i = 0; i < 3; i ++ ) + { + if (0 == g_mode_id) + { + compress_endpoints0( endPoint[i], uint2(p, p >> 1) & 1 ); + } + else + { + compress_endpoints2( endPoint[i] ); + } + } + + uint step_selector = 1 + (2 == g_mode_id); + + int4 span[3]; + span[0] = endPoint[0][1] - endPoint[0][0]; + span[1] = endPoint[1][1] - endPoint[1][0]; + span[2] = endPoint[2][1] - endPoint[2][0]; + span[0].w = span[1].w = span[2].w = 0; + int span_norm_sqr[3]; + span_norm_sqr[0] = dot( span[0], span[0] ); + span_norm_sqr[1] = dot( span[1], span[1] ); + span_norm_sqr[2] = dot( span[2], span[2] ); + + // TODO: again, this shouldn't be necessary here in error calculation + uint ci[3] = { 0, candidateFixUpIndex1D[partition].x, candidateFixUpIndex1D[partition].y }; + for (i = 0; i < 3; i ++) + { + int dotProduct = dot( span[i], shared_temp[threadBase + ci[i]].pixel - endPoint[i][0] ); + if ( span_norm_sqr[i] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[i] ) ) + { + span[i] = -span[i]; + swap(endPoint[i][0], endPoint[i][1]); + } + } + + uint p_error[3] = { 0, 0, 0 }; + for ( i = 0; i < 16; i ++ ) + { + uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03; + if ( subset_index == 2 ) + { + int dotProduct = dot( span[2], shared_temp[threadBase + i].pixel - endPoint[2][0] ); + color_index[i] = ( span_norm_sqr[2] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[2] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[2] ) ] : aStep[step_selector][63] ); + } + else if ( subset_index == 1 ) + { + int dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] ); + color_index[i] = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[1] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep[step_selector][63] ); + } + else + { + int dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] ); + color_index[i] = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[0] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep[step_selector][63] ); + } + + pixel_r = ( ( 64 - aWeight[step_selector][color_index[i]] ) * endPoint[subset_index][0] + + aWeight[step_selector][color_index[i]] * endPoint[subset_index][1] + 32 ) >> 6; + pixel_r.a = 255; + + uint4 pixel = shared_temp[threadBase + i].pixel; + Ensure_A_Is_Larger( pixel_r, pixel ); + pixel_r -= pixel; + + uint pixel_error = ComputeError(pixel_r, pixel_r); + + if ( subset_index == 2 ) + p_error[2] += pixel_error; + else if ( subset_index == 1 ) + p_error[1] += pixel_error; + else + p_error[0] += pixel_error; + } + + for ( i = 0; i < 3; i++ ) + { + if (p_error[i] < error[i]) + { + error[i] = p_error[i]; + final_p[i] = p; // Borrow rotation for p + } + } + } + + shared_temp[GI].error = error[0] + error[1] + error[2]; + shared_temp[GI].partition = partition; + shared_temp[GI].rotation = (final_p[2] << 4) | (final_p[1] << 2) | final_p[0]; + } + GroupMemoryBarrierWithGroupSync(); + + if (threadInBlock < 32) + { + if ( shared_temp[GI].error > shared_temp[GI + 32].error ) + { + shared_temp[GI].error = shared_temp[GI + 32].error; + shared_temp[GI].partition = shared_temp[GI + 32].partition; + shared_temp[GI].rotation = shared_temp[GI + 32].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 16) + { + if ( shared_temp[GI].error > shared_temp[GI + 16].error ) + { + shared_temp[GI].error = shared_temp[GI + 16].error; + shared_temp[GI].partition = shared_temp[GI + 16].partition; + shared_temp[GI].rotation = shared_temp[GI + 16].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 8) + { + if ( shared_temp[GI].error > shared_temp[GI + 8].error ) + { + shared_temp[GI].error = shared_temp[GI + 8].error; + shared_temp[GI].partition = shared_temp[GI + 8].partition; + shared_temp[GI].rotation = shared_temp[GI + 8].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if ( shared_temp[GI].error > shared_temp[GI + 4].error ) + { + shared_temp[GI].error = shared_temp[GI + 4].error; + shared_temp[GI].partition = shared_temp[GI + 4].partition; + shared_temp[GI].rotation = shared_temp[GI + 4].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].partition = shared_temp[GI + 2].partition; + shared_temp[GI].rotation = shared_temp[GI + 2].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].partition = shared_temp[GI + 1].partition; + shared_temp[GI].rotation = shared_temp[GI + 1].rotation; + } + + if (g_InBuff[blockID].x > shared_temp[GI].error) + { + g_OutBuff[blockID] = uint4(shared_temp[GI].error, g_mode_id, shared_temp[GI].partition, shared_temp[GI].rotation); // rotation is actually p bit for mode 0. for mode 2, rotation is always 0 + } + else + { + g_OutBuff[blockID] = g_InBuff[blockID]; + } + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlocks(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + const uint MAX_USED_THREAD = 16; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + uint mode = g_InBuff[blockID].y & 0x7FFFFFFF; + uint partition = g_InBuff[blockID].z; + uint index_selector = (g_InBuff[blockID].y >> 31) & 1; + uint rotation = g_InBuff[blockID].w; + + if (threadInBlock < 16) + { + uint4 pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); + + if ((4 == mode) || (5 == mode)) + { + if (1 == rotation) + { + pixel.ra = pixel.ar; + } + else if (2 == rotation) + { + pixel.ga = pixel.ag; + } + else if (3 == rotation) + { + pixel.ba = pixel.ab; + } + } + + shared_temp[GI].pixel = pixel; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + uint bits = candidateSectionBit[partition]; + uint bits2 = candidateSectionBit2[partition - 64]; + + uint2x4 ep; + ep[0] = MAX_UINT; + ep[1] = MIN_UINT; + uint2x4 ep_quantized; + [unroll] + for (int ii = 2; ii >= 0; -- ii) + { + if (threadInBlock < 16) + { + uint2x4 ep; + ep[0] = MAX_UINT; + ep[1] = MIN_UINT; + + uint4 pixel = shared_temp[GI].pixel; + + uint subset_index = ( bits >> threadInBlock ) & 0x01; + uint subset_index2 = ( bits2 >> ( threadInBlock * 2 ) ) & 0x03; + if (0 == ii) + { + if ((0 == mode) || (2 == mode)) + { + if (0 == subset_index2) + { + ep[0] = ep[1] = pixel; + } + } + else if ((1 == mode) || (3 == mode) || (7 == mode)) + { + if (0 == subset_index) + { + ep[0] = ep[1] = pixel; + } + } + else if ((4 == mode) || (5 == mode) || (6 == mode)) + { + ep[0] = ep[1] = pixel; + } + } + else if (1 == ii) + { + if ((0 == mode) || (2 == mode)) + { + if (1 == subset_index2) + { + ep[0] = ep[1] = pixel; + } + } + else if ((1 == mode) || (3 == mode) || (7 == mode)) + { + if (1 == subset_index) + { + ep[0] = ep[1] = pixel; + } + } + } + else + { + if ((0 == mode) || (2 == mode)) + { + if (2 == subset_index2) + { + ep[0] = ep[1] = pixel; + } + } + } + + shared_temp[GI].endPoint_low = ep[0]; + shared_temp[GI].endPoint_high = ep[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 8) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (ii == (int)threadInBlock) + { + ep[0] = shared_temp[threadBase].endPoint_low; + ep[1] = shared_temp[threadBase].endPoint_high; + } + } + + if (threadInBlock < 3) + { + uint2 P; + if (1 == mode) + { + P = (rotation >> threadInBlock) & 1; + } + else + { + P = uint2(rotation >> (threadInBlock * 2 + 0), rotation >> (threadInBlock * 2 + 1)) & 1; + } + + if (0 == mode) + { + ep_quantized = compress_endpoints0( ep, P ); + } + else if (1 == mode) + { + ep_quantized = compress_endpoints1( ep, P ); + } + else if (2 == mode) + { + ep_quantized = compress_endpoints2( ep ); + } + else if (3 == mode) + { + ep_quantized = compress_endpoints3( ep, P ); + } + else if (4 == mode) + { + ep_quantized = compress_endpoints4( ep ); + } + else if (5 == mode) + { + ep_quantized = compress_endpoints5( ep ); + } + else if (6 == mode) + { + ep_quantized = compress_endpoints6( ep, P ); + } + else //if (7 == mode) + { + ep_quantized = compress_endpoints7( ep, P ); + } + + int4 span = ep[1] - ep[0]; + if (mode < 4) + { + span.w = 0; + } + + if ((4 == mode) || (5 == mode)) + { + if (0 == threadInBlock) + { + int2 span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a ); + int2 dotProduct = int2( dot( span.rgb, shared_temp[threadBase + 0].pixel.rgb - ep[0].rgb ), span.a * ( shared_temp[threadBase + 0].pixel.a - ep[0].a ) ); + if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) + { + swap(ep[0].rgb, ep[1].rgb); + swap(ep_quantized[0].rgb, ep_quantized[1].rgb); + } + if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) ) + { + swap(ep[0].a, ep[1].a); + swap(ep_quantized[0].a, ep_quantized[1].a); + } + } + } + else //if ((0 == mode) || (2 == mode) || (1 == mode) || (3 == mode) || (7 == mode) || (6 == mode)) + { + int p; + if (0 == threadInBlock) + { + p = 0; + } + else if (1 == threadInBlock) + { + p = candidateFixUpIndex1D[partition].x; + } + else //if (2 == threadInBlock) + { + p = candidateFixUpIndex1D[partition].y; + } + + int span_norm_sqr = dot( span, span ); + int dotProduct = dot( span, shared_temp[threadBase + p].pixel - ep[0] ); + if ( span_norm_sqr > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr ) ) + { + swap(ep[0], ep[1]); + swap(ep_quantized[0], ep_quantized[1]); + } + } + + shared_temp[GI].endPoint_low = ep[0]; + shared_temp[GI].endPoint_high = ep[1]; + shared_temp[GI].endPoint_low_quantized = ep_quantized[0]; + shared_temp[GI].endPoint_high_quantized = ep_quantized[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 16) + { + uint color_index = 0; + uint alpha_index = 0; + + uint2x4 ep; + + uint2 indexPrec; + if ((0 == mode) || (1 == mode)) + { + indexPrec = 1; + } + else if (6 == mode) + { + indexPrec = 0; + } + else if (4 == mode) + { + if (0 == index_selector) + { + indexPrec = uint2(2, 1); + } + else + { + indexPrec = uint2(1, 2); + } + } + else + { + indexPrec = 2; + } + + int subset_index; + if ((0 == mode) || (2 == mode)) + { + subset_index = (bits2 >> (threadInBlock * 2)) & 0x03; + } + else if ((1 == mode) || (3 == mode) || (7 == mode)) + { + subset_index = (bits >> threadInBlock) & 0x01; + } + else + { + subset_index = 0; + } + + ep[0] = shared_temp[threadBase + subset_index].endPoint_low; + ep[1] = shared_temp[threadBase + subset_index].endPoint_high; + + int4 span = ep[1] - ep[0]; + if (mode < 4) + { + span.w = 0; + } + + if ((4 == mode) || (5 == mode)) + { + int2 span_norm_sqr; + span_norm_sqr.x = dot( span.rgb, span.rgb ); + span_norm_sqr.y = span.a * span.a; + + int dotProduct = dot( span.rgb, shared_temp[threadBase + threadInBlock].pixel.rgb - ep[0].rgb ); + color_index = ( span_norm_sqr.x <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] ); + dotProduct = dot( span.a, shared_temp[threadBase + threadInBlock].pixel.a - ep[0].a ); + alpha_index = ( span_norm_sqr.y <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] ); + + if (index_selector) + { + swap(color_index, alpha_index); + } + } + else + { + int span_norm_sqr = dot( span, span ); + + int dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel - ep[0] ); + color_index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep[indexPrec.x][63] ); + } + + shared_temp[GI].error = color_index; + shared_temp[GI].mode = alpha_index; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (0 == threadInBlock) + { + uint4 block; + if (0 == mode) + { + block_package0( block, partition, threadBase ); + } + else if (1 == mode) + { + block_package1( block, partition, threadBase ); + } + else if (2 == mode) + { + block_package2( block, partition, threadBase ); + } + else if (3 == mode) + { + block_package3( block, partition, threadBase ); + } + else if (4 == mode) + { + block_package4( block, rotation, index_selector, threadBase ); + } + else if (5 == mode) + { + block_package5( block, rotation, threadBase ); + } + else if (6 == mode) + { + block_package6( block, threadBase ); + } + else //if (7 == mode) + { + block_package7( block, partition, threadBase ); + } + + g_OutBuff[blockID] = block; + } +} + +//uint4 truncate_and_round( uint4 color, uint bits) +//{ +// uint precisionMask = ((1 << bits) - 1) << (8 - bits); +// uint precisionHalf = (1 << (7-bits)); +// +// uint4 truncated = color & precisionMask; +// uint4 rounded = min(255, color + precisionHalf) & precisionMask; +// +// uint4 truncated_bak = truncated = truncated | (truncated >> bits); +// uint4 rounded_bak = rounded = rounded | (rounded >> bits); +// +// uint4 color_bak = color; +// +// Ensure_A_Is_Larger( rounded, color ); +// Ensure_A_Is_Larger( truncated, color_bak ); +// +// if (dot(rounded - color, rounded - color) < +// dot(truncated - color_bak, truncated - color_bak)) +// { +// return rounded_bak; +// } +// else +// { +// return truncated_bak; +// } +//} + +uint4 quantize( uint4 color, uint uPrec ) +{ + return (((color << 8) + color) * ((1 << uPrec) - 1) + 32768) >> 16; +} + +uint4 unquantize( uint4 color, uint uPrec ) +{ + color = color << (8 - uPrec); + return color | (color >> uPrec); +} + +uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + [unroll] for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb & 0xFFFFFFFE; + quantized[j].rgb |= P[j]; + quantized[j].a = 0xFF; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; + endPoint[j].a = 0xFF; + + quantized[j] <<= 3; + } + return quantized; +} +uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + [unroll] for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb & 0xFFFFFFFE; + quantized[j].rgb |= P[j]; + quantized[j].a = 0xFF; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb; + endPoint[j].a = 0xFF; + + quantized[j] <<= 1; + } + return quantized; +} +uint2x4 compress_endpoints2( inout uint2x4 endPoint ) +{ + uint2x4 quantized; + [unroll] for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb; + quantized[j].a = 0xFF; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; + endPoint[j].a = 0xFF; + + quantized[j] <<= 3; + } + return quantized; +} +uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = endPoint[j].rgb & 0xFFFFFFFE; + quantized[j].rgb |= P[j]; + quantized[j].a = 0xFF; + + endPoint[j].rgb = quantized[j].rgb; + endPoint[j].a = 0xFF; + } + return quantized; +} +uint2x4 compress_endpoints4( inout uint2x4 endPoint ) +{ + uint2x4 quantized; + [unroll] for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb; + quantized[j].a = quantize(endPoint[j].a, 6).r; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; + endPoint[j].a = unquantize(quantized[j].a, 6).r; + + quantized[j].rgb <<= 3; + quantized[j].a <<= 2; + } + return quantized; +} +uint2x4 compress_endpoints5( inout uint2x4 endPoint ) +{ + uint2x4 quantized; + [unroll] for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb; + quantized[j].a = endPoint[j].a; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb; + // endPoint[j].a Alpha is full precision + + quantized[j].rgb <<= 1; + } + return quantized; +} +uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j] = endPoint[j] & 0xFFFFFFFE; + quantized[j] |= P[j]; + + endPoint[j] = quantized[j]; + } + return quantized; +} +uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + [unroll] for ( uint j = 0; j < 2; j ++ ) + { + quantized[j] = quantize(endPoint[j], 6) & 0xFFFFFFFE; + quantized[j] |= P[j]; + + endPoint[j] = unquantize(quantized[j], 6); + } + return quantized << 2; +} + +#define get_end_point_l(subset) shared_temp[threadBase + subset].endPoint_low_quantized +#define get_end_point_h(subset) shared_temp[threadBase + subset].endPoint_high_quantized +#define get_color_index(index) shared_temp[threadBase + index].error +#define get_alpha_index(index) shared_temp[threadBase + index].mode + +void block_package0( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x01 | ( (partition - 64) << 1 ) + | ( ( get_end_point_l(0).r & 0xF0 ) << 1 ) | ( ( get_end_point_h(0).r & 0xF0 ) << 5 ) + | ( ( get_end_point_l(1).r & 0xF0 ) << 9 ) | ( ( get_end_point_h(1).r & 0xF0 ) << 13 ) + | ( ( get_end_point_l(2).r & 0xF0 ) << 17 ) | ( ( get_end_point_h(2).r & 0xF0 ) << 21 ) + | ( ( get_end_point_l(0).g & 0xF0 ) << 25 ); + block.y = ( ( get_end_point_l(0).g & 0xF0 ) >> 7 ) | ( ( get_end_point_h(0).g & 0xF0 ) >> 3 ) + | ( ( get_end_point_l(1).g & 0xF0 ) << 1 ) | ( ( get_end_point_h(1).g & 0xF0 ) << 5 ) + | ( ( get_end_point_l(2).g & 0xF0 ) << 9 ) | ( ( get_end_point_h(2).g & 0xF0 ) << 13 ) + | ( ( get_end_point_l(0).b & 0xF0 ) << 17 ) | ( ( get_end_point_h(0).b & 0xF0 ) << 21 ) + | ( ( get_end_point_l(1).b & 0xF0 ) << 25 ); + block.z = ( ( get_end_point_l(1).b & 0xF0 ) >> 7 ) | ( ( get_end_point_h(1).b & 0xF0 ) >> 3 ) + | ( ( get_end_point_l(2).b & 0xF0 ) << 1 ) | ( ( get_end_point_h(2).b & 0xF0 ) << 5 ) + | ( ( get_end_point_l(0).r & 0x08 ) << 10 ) | ( ( get_end_point_h(0).r & 0x08 ) << 11 ) + | ( ( get_end_point_l(1).r & 0x08 ) << 12 ) | ( ( get_end_point_h(1).r & 0x08 ) << 13 ) + | ( ( get_end_point_l(2).r & 0x08 ) << 14 ) | ( ( get_end_point_h(2).r & 0x08 ) << 15 ) + | ( get_color_index(0) << 19 ); + block.w = 0; + uint i = 1; + for ( ; i <= min( candidateFixUpIndex1DOrdered[partition][0], 4 ); i ++ ) + { + block.z |= get_color_index(i) << ( i * 3 + 18 ); + } + if ( candidateFixUpIndex1DOrdered[partition][0] < 4 ) //i = 4 + { + block.z |= get_color_index(4) << 29; + i += 1; + } + else //i = 5 + { + block.w |= ( get_color_index(4) & 0x04 ) >> 2; + for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) + block.w |= get_color_index(i) << ( i * 3 - 14 ); + } + for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 3 - 15 ); + } + for ( ; i < 16; i ++ ) + { + block.w |= get_color_index(i) << ( i * 3 - 16 ); + } +} +void block_package1( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x02 | ( partition << 2 ) + | ( ( get_end_point_l(0).r & 0xFC ) << 6 ) | ( ( get_end_point_h(0).r & 0xFC ) << 12 ) + | ( ( get_end_point_l(1).r & 0xFC ) << 18 ) | ( ( get_end_point_h(1).r & 0xFC ) << 24 ); + block.y = ( ( get_end_point_l(0).g & 0xFC ) >> 2 ) | ( ( get_end_point_h(0).g & 0xFC ) << 4 ) + | ( ( get_end_point_l(1).g & 0xFC ) << 10 ) | ( ( get_end_point_h(1).g & 0xFC ) << 16 ) + | ( ( get_end_point_l(0).b & 0xFC ) << 22 ) | ( ( get_end_point_h(0).b & 0xFC ) << 28 ); + block.z = ( ( get_end_point_h(0).b & 0xFC ) >> 4 ) | ( ( get_end_point_l(1).b & 0xFC ) << 2 ) + | ( ( get_end_point_h(1).b & 0xFC ) << 8 ) + | ( ( get_end_point_l(0).r & 0x02 ) << 15 ) | ( ( get_end_point_l(1).r & 0x02 ) << 16 ) + | ( get_color_index(0) << 18 ); + if ( candidateFixUpIndex1DOrdered[partition][0] == 15 ) + { + block.w = (get_color_index(15) << 30) | (get_color_index(14) << 27) | (get_color_index(13) << 24) | (get_color_index(12) << 21) | (get_color_index(11) << 18) | (get_color_index(10) << 15) + | (get_color_index(9) << 12) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5); + block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); + } + else if ( candidateFixUpIndex1DOrdered[partition][0] == 2 ) + { + block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) + | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 5) | (get_color_index(6) << 2) | (get_color_index(5) >> 1); + block.z |= (get_color_index(5) << 31) | (get_color_index(4) << 28) | (get_color_index(3) << 25) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); + } + else if ( candidateFixUpIndex1DOrdered[partition][0] == 8 ) + { + block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) + | (get_color_index(9) << 11) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5); + block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); + } + else //candidateFixUpIndex1DOrdered[partition] == 6 + { + block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) + | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 5) | (get_color_index(6) << 3) | get_color_index(5); + block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); + } +} +void block_package2( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x04 | ( (partition - 64) << 3 ) + | ( ( get_end_point_l(0).r & 0xF8 ) << 6 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 11 ) + | ( ( get_end_point_l(1).r & 0xF8 ) << 16 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 21 ) + | ( ( get_end_point_l(2).r & 0xF8 ) << 26 ); + block.y = ( ( get_end_point_l(2).r & 0xF8 ) >> 6 ) | ( ( get_end_point_h(2).r & 0xF8 ) >> 1 ) + | ( ( get_end_point_l(0).g & 0xF8 ) << 4 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 9 ) + | ( ( get_end_point_l(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_h(1).g & 0xF8 ) << 19 ) + | ( ( get_end_point_l(2).g & 0xF8 ) << 24 ); + block.z = ( ( get_end_point_h(2).g & 0xF8 ) >> 3 ) | ( ( get_end_point_l(0).b & 0xF8 ) << 2 ) + | ( ( get_end_point_h(0).b & 0xF8 ) << 7 ) | ( ( get_end_point_l(1).b & 0xF8 ) << 12 ) + | ( ( get_end_point_h(1).b & 0xF8 ) << 17 ) | ( ( get_end_point_l(2).b & 0xF8 ) << 22 ) + | ( ( get_end_point_h(2).b & 0xF8 ) << 27 ); + block.w = ( ( get_end_point_h(2).b & 0xF8 ) >> 5 ) + | ( get_color_index(0) << 3 ); + uint i = 1; + for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 + 2 ); + } + for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 + 1 ); + } + for ( ; i < 16; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 ); + } +} +void block_package3( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x08 | ( partition << 4 ) + | ( ( get_end_point_l(0).r & 0xFE ) << 9 ) | ( ( get_end_point_h(0).r & 0xFE ) << 16 ) + | ( ( get_end_point_l(1).r & 0xFE ) << 23 ) | ( ( get_end_point_h(1).r & 0xFE ) << 30 ); + block.y = ( ( get_end_point_h(1).r & 0xFE ) >> 2 ) | ( ( get_end_point_l(0).g & 0xFE ) << 5 ) + | ( ( get_end_point_h(0).g & 0xFE ) << 12 ) | ( ( get_end_point_l(1).g & 0xFE ) << 19 ) + | ( ( get_end_point_h(1).g & 0xFE ) << 26 ); + block.z = ( ( get_end_point_h(1).g & 0xFE ) >> 6 ) | ( ( get_end_point_l(0).b & 0xFE ) << 1 ) + | ( ( get_end_point_h(0).b & 0xFE ) << 8 ) | ( ( get_end_point_l(1).b & 0xFE ) << 15 ) + | ( ( get_end_point_h(1).b & 0xFE ) << 22 ) + | ( ( get_end_point_l(0).r & 0x01 ) << 30 ) | ( ( get_end_point_h(0).r & 0x01 ) << 31 ); + block.w = ( ( get_end_point_l(1).r & 0x01 ) << 0 ) | ( ( get_end_point_h(1).r & 0x01 ) << 1 ) + | ( get_color_index(0) << 2 ); + uint i = 1; + for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 + 1 ); + } + for ( ; i < 16; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 ); + } +} +void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase ) +{ + block.x = 0x10 | ( (rotation & 3) << 5 ) | ( (index_selector & 1) << 7 ) + | ( ( get_end_point_l(0).r & 0xF8 ) << 5 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 10 ) + | ( ( get_end_point_l(0).g & 0xF8 ) << 15 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 20 ) + | ( ( get_end_point_l(0).b & 0xF8 ) << 25 ); + + block.y = ( ( get_end_point_l(0).b & 0xF8 ) >> 7 ) | ( ( get_end_point_h(0).b & 0xF8 ) >> 2 ) + | ( ( get_end_point_l(0).a & 0xFC ) << 4 ) | ( ( get_end_point_h(0).a & 0xFC ) << 10 ) + | ( (get_color_index(0) & 1) << 18 ) | ( get_color_index(1) << 19 ) | ( get_color_index(2) << 21 ) | ( get_color_index(3) << 23 ) + | ( get_color_index(4) << 25 ) | ( get_color_index(5) << 27 ) | ( get_color_index(6) << 29 ) | ( get_color_index(7) << 31 ); + + block.z = ( get_color_index(7) >> 1 ) | ( get_color_index(8) << 1 ) | ( get_color_index(9) << 3 ) | ( get_color_index(10)<< 5 ) + | ( get_color_index(11)<< 7 ) | ( get_color_index(12)<< 9 ) | ( get_color_index(13)<< 11 ) | ( get_color_index(14)<< 13 ) + | ( get_color_index(15)<< 15 ) | ( (get_alpha_index(0) & 3) << 17 ) | ( get_alpha_index(1) << 19 ) | ( get_alpha_index(2) << 22 ) + | ( get_alpha_index(3) << 25 ) | ( get_alpha_index(4) << 28 ) | ( get_alpha_index(5) << 31 ); + + block.w = ( get_alpha_index(5) >> 1 ) | ( get_alpha_index(6) << 2 ) | ( get_alpha_index(7) << 5 ) | ( get_alpha_index(8) << 8 ) + | ( get_alpha_index(9) << 11 ) | ( get_alpha_index(10)<< 14 ) | ( get_alpha_index(11)<< 17 ) | ( get_alpha_index(12)<< 20 ) + | ( get_alpha_index(13)<< 23 ) | ( get_alpha_index(14)<< 26 ) | ( get_alpha_index(15)<< 29 ); +} +void block_package5( out uint4 block, uint rotation, uint threadBase ) +{ + block.x = 0x20 | ( rotation << 6 ) + | ( ( get_end_point_l(0).r & 0xFE ) << 7 ) | ( ( get_end_point_h(0).r & 0xFE ) << 14 ) + | ( ( get_end_point_l(0).g & 0xFE ) << 21 ) | ( ( get_end_point_h(0).g & 0xFE ) << 28 ); + block.y = ( ( get_end_point_h(0).g & 0xFE ) >> 4 ) | ( ( get_end_point_l(0).b & 0xFE ) << 3 ) + | ( ( get_end_point_h(0).b & 0xFE ) << 10 ) | ( get_end_point_l(0).a << 18 ) | ( get_end_point_h(0).a << 26 ); + block.z = ( get_end_point_h(0).a >> 6 ) + | ( get_color_index(0) << 2 ) | ( get_color_index(1) << 3 ) | ( get_color_index(2) << 5 ) | ( get_color_index(3) << 7 ) + | ( get_color_index(4) << 9 ) | ( get_color_index(5) << 11 ) | ( get_color_index(6) << 13 ) | ( get_color_index(7) << 15 ) + | ( get_color_index(8) << 17 ) | ( get_color_index(9) << 19 ) | ( get_color_index(10)<< 21 ) | ( get_color_index(11)<< 23 ) + | ( get_color_index(12)<< 25 ) | ( get_color_index(13)<< 27 ) | ( get_color_index(14)<< 29 ) | ( get_color_index(15)<< 31 ); + block.w = ( get_color_index(15)>> 1 ) | ( get_alpha_index(0) << 1 ) | ( get_alpha_index(1) << 2 ) | ( get_alpha_index(2) << 4 ) + | ( get_alpha_index(3) << 6 ) | ( get_alpha_index(4) << 8 ) | ( get_alpha_index(5) << 10 ) | ( get_alpha_index(6) << 12 ) + | ( get_alpha_index(7) << 14 ) | ( get_alpha_index(8) << 16 ) | ( get_alpha_index(9) << 18 ) | ( get_alpha_index(10)<< 20 ) + | ( get_alpha_index(11)<< 22 ) | ( get_alpha_index(12)<< 24 ) | ( get_alpha_index(13)<< 26 ) | ( get_alpha_index(14)<< 28 ) + | ( get_alpha_index(15)<< 30 ); +} +void block_package6( out uint4 block, uint threadBase ) +{ + block.x = 0x40 + | ( ( get_end_point_l(0).r & 0xFE ) << 6 ) | ( ( get_end_point_h(0).r & 0xFE ) << 13 ) + | ( ( get_end_point_l(0).g & 0xFE ) << 20 ) | ( ( get_end_point_h(0).g & 0xFE ) << 27 ); + block.y = ( ( get_end_point_h(0).g & 0xFE ) >> 5 ) | ( ( get_end_point_l(0).b & 0xFE ) << 2 ) + | ( ( get_end_point_h(0).b & 0xFE ) << 9 ) | ( ( get_end_point_l(0).a & 0xFE ) << 16 ) + | ( ( get_end_point_h(0).a & 0xFE ) << 23 ) + | ( get_end_point_l(0).r & 0x01 ) << 31; + block.z = ( get_end_point_h(0).r & 0x01 ) + | ( get_color_index(0) << 1 ) | ( get_color_index(1) << 4 ) | ( get_color_index(2) << 8 ) | ( get_color_index(3) << 12 ) + | ( get_color_index(4) << 16 ) | ( get_color_index(5) << 20 ) | ( get_color_index(6) << 24 ) | ( get_color_index(7) << 28 ); + block.w = ( get_color_index(8) << 0 ) | ( get_color_index(9) << 4 ) | ( get_color_index(10)<< 8 ) | ( get_color_index(11)<< 12 ) + | ( get_color_index(12)<< 16 ) | ( get_color_index(13)<< 20 ) | ( get_color_index(14)<< 24 ) | ( get_color_index(15)<< 28 ); +} +void block_package7( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x80 | ( partition << 8 ) + | ( ( get_end_point_l(0).r & 0xF8 ) << 11 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 16 ) + | ( ( get_end_point_l(1).r & 0xF8 ) << 21 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 26 ); + block.y = ( ( get_end_point_h(1).r & 0xF8 ) >> 6 ) | ( ( get_end_point_l(0).g & 0xF8 ) >> 1 ) + | ( ( get_end_point_h(0).g & 0xF8 ) << 4 ) | ( ( get_end_point_l(1).g & 0xF8 ) << 9 ) + | ( ( get_end_point_h(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_l(0).b & 0xF8 ) << 19 ) + | ( ( get_end_point_h(0).b & 0xF8 ) << 24 ); + block.z = ( ( get_end_point_l(1).b & 0xF8 ) >> 3 ) | ( ( get_end_point_h(1).b & 0xF8 ) << 2 ) + | ( ( get_end_point_l(0).a & 0xF8 ) << 7 ) | ( ( get_end_point_h(0).a & 0xF8 ) << 12 ) + | ( ( get_end_point_l(1).a & 0xF8 ) << 17 ) | ( ( get_end_point_h(1).a & 0xF8 ) << 22 ) + | ( ( get_end_point_l(0).r & 0x04 ) << 28 ) | ( ( get_end_point_h(0).r & 0x04 ) << 29 ); + block.w = ( ( get_end_point_l(1).r & 0x04 ) >> 2 ) | ( ( get_end_point_h(1).r & 0x04 ) >> 1 ) + | ( get_color_index(0) << 2 ); + uint i = 1; + for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 + 1 ); + } + for ( ; i < 16; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 ); + } +} \ No newline at end of file diff --git a/extern/CMP_Core/shaders/BCn_Common_Kernel.h b/extern/CMP_Core/shaders/BCn_Common_Kernel.h index e9db4a3..92b0479 100644 --- a/extern/CMP_Core/shaders/BCn_Common_Kernel.h +++ b/extern/CMP_Core/shaders/BCn_Common_Kernel.h @@ -1,5 +1,5 @@ -//===================================================================== -// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +//============================================================================= +// Copyright (c) 2018-2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -23,21 +23,34 @@ #ifndef _BCn_Common_Kernel_H #define _BCn_Common_Kernel_H +#pragma warning(disable:4505) // disable warnings on unreferenced local function has been removed + #include "Common_Def.h" -#ifndef ASPM_GPU +//----------------------------------------------------------------------- +// When build is for CPU, we have some missing API calls common to GPU +// Use CPU CMP_Core replacements +//----------------------------------------------------------------------- +#if defined(ASPM_GPU) || defined(ASPM_HLSL) || defined(ASPM_OPENCL) +#define ALIGN_16 +#else +#include INC_cmp_math_func #if defined(WIN32) || defined(_WIN64) #define ALIGN_16 __declspec(align(16)) #else // !WIN32 && !_WIN64 #define ALIGN_16 #endif // !WIN32 && !_WIN64 -#else -#define ALIGN_16 +#endif + +#ifdef ASPM_HLSL +#define fabs(x) abs(x) #endif #define DXTC_OFFSET_ALPHA 0 #define DXTC_OFFSET_RGB 2 +#define BC1CompBlockSize 8 + #define RC 2 #define GC 1 #define BC 0 @@ -46,10 +59,6 @@ /* Channel Bits */ -#define RG 5 -#define GG 6 -#define BG 5 - #define RGBA8888_CHANNEL_A 3 #define RGBA8888_CHANNEL_R 2 #define RGBA8888_CHANNEL_G 1 @@ -59,56 +68,54 @@ Channel Bits #define RGBA8888_OFFSET_G (RGBA8888_CHANNEL_G * 8) #define RGBA8888_OFFSET_B (RGBA8888_CHANNEL_B * 8) -#define MAX_BLOCK 64 -#define BLOCK_SIZE MAX_BLOCK - #ifndef MAX_ERROR #define MAX_ERROR 128000.f #endif -#define MAX_BLOCK 64 -#define MAX_POINTS 16 -#define BLOCK_SIZE MAX_BLOCK -#define NUM_CHANNELS 4 -#define NUM_ENDPOINTS 2 -#define BLOCK_SIZE_4X4 16 +#define MAX_BLOCK 64 +#define MAX_POINTS 16 +#define BLOCK_SIZE MAX_BLOCK +#define NUM_CHANNELS 4 +#define NUM_ENDPOINTS 2 +#define BLOCK_SIZE_4X4 16 +#define CMP_ALPHA_RAMP 8 // Number of Ramp Points used for Alpha Channels in BC5 #define ConstructColour(r, g, b) (((r) << 11) | ((g) << 5) | (b)) +#define BYTEPP 4 +#define CMP_QUALITY1 0.10f +#define CMP_QUALITY2 0.601f +#define POS(x,y) (pos_on_axis[(x)+(y)*4]) + // Find the first approximation of the line // Assume there is a linear relation // Z = a * X_In // Z = b * Y_In // Find a,b to minimize MSE between Z and Z_In -#define EPS (2.f / 255.f) * (2.f / 255.f) -#define EPS2 3.f * (2.f / 255.f) * (2.f / 255.f) +#define EPS (2.f / 255.f) * (2.f / 255.f) +#define EPS2 3.f * (2.f / 255.f) * (2.f / 255.f) // Grid precision #define PIX_GRID 8 #define BYTE_MASK 0x00ff -CMP_CONSTANT CGU_UINT8 nByteBitsMask[9] = {0x00, 0x80, 0xc0, 0xe0, 0xf0, - 0xf8, 0xfc, 0xfe, 0xff}; -CMP_CONSTANT CGU_DWORD dwRndAmount[9] = {0, 0, 0, 0, 1, 1, 2, 2, 3}; - -#define _INT_GRID (_bFixedRamp && _FracPrc == 0) #define SCH_STPS 3 // number of search steps to make at each end of interval static CMP_CONSTANT CGU_FLOAT sMvF[] = {0.f, -1.f, 1.f, -2.f, 2.f, -3.f, 3.f, -4.f, 4.f, -5.f, 5.f, -6.f, 6.f, -7.f, 7.f, -8.f, 8.f}; #ifndef GBL_SCH_STEP -#define GBL_SCH_STEP_MXS 0.018f -#define GBL_SCH_EXT_MXS 0.1f -#define LCL_SCH_STEP_MXS 0.6f -#define GBL_SCH_STEP_MXQ 0.0175f -#define GBL_SCH_EXT_MXQ 0.154f -#define LCL_SCH_STEP_MXQ 0.45f - -#define GBL_SCH_STEP GBL_SCH_STEP_MXS -#define GBL_SCH_EXT GBL_SCH_EXT_MXS -#define LCL_SCH_STEP LCL_SCH_STEP_MXS +#define GBL_SCH_STEP_MXS 0.018f +#define GBL_SCH_EXT_MXS 0.1f +#define LCL_SCH_STEP_MXS 0.6f +#define GBL_SCH_STEP_MXQ 0.0175f +#define GBL_SCH_EXT_MXQ 0.154f +#define LCL_SCH_STEP_MXQ 0.45f + +#define GBL_SCH_STEP GBL_SCH_STEP_MXS +#define GBL_SCH_EXT GBL_SCH_EXT_MXS +#define LCL_SCH_STEP LCL_SCH_STEP_MXS #endif typedef struct { @@ -123,74 +130,329 @@ typedef struct { typedef struct { // user setable - CGU_FLOAT m_fquality; - CGU_FLOAT m_fChannelWeights[3]; - CGU_BOOL m_bUseChannelWeighting; - CGU_BOOL m_bUseAdaptiveWeighting; - CGU_BOOL m_bUseFloat; - CGU_BOOL m_b3DRefinement; - CGU_UINT8 m_nRefinementSteps; - CGU_UINT8 m_nAlphaThreshold; - - CGU_BOOL m_mapDecodeRGBA; - - // ?? Remove this + CGU_FLOAT m_fquality; + CGU_FLOAT m_fChannelWeights[3]; + CGU_BOOL m_bUseChannelWeighting; + CGU_BOOL m_bUseAdaptiveWeighting; + CGU_BOOL m_bUseFloat; + CGU_BOOL m_b3DRefinement; + CGU_BOOL m_bUseAlpha; + CGU_BOOL m_bIsSRGB; // Use Linear to SRGB color conversion used in BC1, default is false + CGU_BOOL m_bIsSNORM; // Reserved for support in BC4&5, currently always false! + CGU_UINT32 m_nRefinementSteps; + CGU_UINT32 m_nAlphaThreshold; + CGU_BOOL m_mapDecodeRGBA; CGU_UINT32 m_src_width; CGU_UINT32 m_src_height; } CMP_BC15Options; -//---------------------------------------- Common Code ------------------------------------------------------- +typedef struct { + CGU_Vec3f Color0; + CGU_Vec3f Color1; +} CMP_EndPoints; + +// gets 2 bit values from a 32 bit variable at the kth index range (0..15) +// same as get values (0..3) from CGU_UINT32 variable[16] +static CGU_UINT32 cmp_get2Bit32(CGU_UINT32 value, CGU_UINT32 indexPos) +{ + return (value >> (indexPos*2))&0x3; +} + +// sets 2 bit values into a 32 bit variable +// same as set values (0..3) to CGU_UINT32 variable[16] +static CGU_UINT32 cmp_set2Bit32(CGU_UINT32 value, CGU_UINT32 indexPos) +{ + return ((value&0x3) << (indexPos*2)); +} + +static CGU_UINT32 cmp_constructColor(CGU_UINT32 R,CGU_UINT32 G, CGU_UINT32 B) +{ + return (((R & 0x000000F8) << 8) | ((G & 0x000000FC) << 3) | ((B & 0x000000F8) >> 3) ); +} + +static CGU_Vec3f cmp_powVec3f(CGU_Vec3f color, CGU_FLOAT ex) +{ +#ifdef ASPM_GPU + return pow(color, ex); +#else + CGU_Vec3f ColorSrgbPower; + ColorSrgbPower.x = pow(color.x, ex); + ColorSrgbPower.y = pow(color.y, ex); + ColorSrgbPower.z = pow(color.z, ex); + return ColorSrgbPower; +#endif +} + +static CGU_Vec3f cmp_clamp3f(CGU_Vec3f value, CGU_FLOAT minValue, CGU_FLOAT maxValue) +{ +#ifdef ASPM_GPU + return clamp(value,minValue,maxValue); +#else + CGU_Vec3f revalue = value; + if (revalue.x > maxValue) revalue.x = maxValue; + else + if (revalue.x < minValue) revalue.x = minValue; + + if (revalue.y > maxValue) revalue.y = maxValue; + else + if (revalue.y < minValue) revalue.y = minValue; + + if (revalue.z > maxValue) revalue.z = maxValue; + else + if (revalue.z < minValue) revalue.z = minValue; + return revalue; +#endif +} + +static CGU_Vec3f cmp_saturate(CGU_Vec3f value) +{ +#ifdef ASPM_HLSL + return saturate(value); +#else + return cmp_clamp3f(value,0.0f,1.0f); +#endif +} + +// Helper functions to cut precision of floats +// Prec is a power of 10 value from 1,10,100,...,10000... INT MAX power 10 +static CGU_BOOL cmp_compareprecision(CGU_FLOAT f1,CGU_FLOAT f2,CGU_INT Prec) +{ + CGU_INT scale1 = (CGU_INT)(f1*Prec); + CGU_INT scale2 = (CGU_INT)(f2*Prec); + return(scale1 == scale2); +} + +// Helper function to compare floats to a set precision +static CGU_FLOAT cmp_getfloatprecision(CGU_FLOAT f1,CGU_INT Prec) +{ + CGU_INT scale1 = (CGU_INT)(f1*Prec); + return((CGU_FLOAT)(scale1)/Prec); +} + +static CGU_FLOAT cmp_linearToSrgbf(CMP_IN CGU_FLOAT Color) +{ +if (Color <= 0.0f) return (0.0f); +if (Color >= 1.0f) return (1.0f); +// standard : 0.0031308f +if (Color <= 0.00313066844250063) return (Color*12.92f); +return(pow(Color, 1.0f/2.4f) * 1.055f - 0.055f); +} + + +static CGU_Vec3f cmp_linearToSrgb(CMP_IN CGU_Vec3f Color) +{ + Color.x = cmp_linearToSrgbf(Color.x); + Color.y = cmp_linearToSrgbf(Color.y); + Color.z = cmp_linearToSrgbf(Color.z); + return Color; +} + + +static CGU_FLOAT cmp_srgbToLinearf(CMP_IN CGU_FLOAT Color) +{ + if (Color <= 0.0f) return (0.0f); + if (Color >= 1.0f) return (1.0f); + // standard 0.04045f + if (Color <= 0.0404482362771082) return (Color/12.92f); + return pow((Color+0.055f)/1.055f, 2.4f); +} + +static CGU_Vec3f cmp_srgbToLinear(CMP_IN CGU_Vec3f Color) +{ + Color.x = cmp_srgbToLinearf(Color.x); + Color.y = cmp_srgbToLinearf(Color.y); + Color.z = cmp_srgbToLinearf(Color.z); + return Color; +} + +inline CGU_Vec3f cmp_min3f( CMP_IN CGU_Vec3f value1, CMP_IN CGU_Vec3f value2) +{ +#ifdef ASPM_GPU + return min(value1,value2); +#else + CGU_Vec3f res; + res.x = min(value1.x, value2.x); + res.y = min(value1.y, value2.y); + res.z = min(value1.z, value2.z); + return res; +#endif +} + +inline CGU_Vec3f cmp_max3f( CMP_IN CGU_Vec3f value1, CMP_IN CGU_Vec3f value2) +{ +#ifdef ASPM_GPU + return max(value1,value2); +#else + CGU_Vec3f res; + res.x = max(value1.x, value2.x); + res.y = max(value1.y, value2.y); + res.z = max(value1.z, value2.z); + return res; +#endif +} + +static CGU_FLOAT cmp_getIndicesRGB(CMP_INOUT CGU_UINT32 CMP_PTRINOUT cmpindex, const CGU_Vec3f block[16], CGU_Vec3f minColor, CGU_Vec3f maxColor,CGU_BOOL getErr) +{ + CGU_UINT32 PackedIndices = 0; + CGU_FLOAT err = 0.0f; + CGU_Vec3f cn[4]; + CGU_FLOAT minDistance; + + if (getErr) { + // remap to BC1 spec for decoding offsets, + // where cn[0] > cn[1] Max Color = index 0, 2/3 offset =index 2, 1/3 offset = index 3, Min Color = index 1 + cn[0] = maxColor; + cn[1] = minColor; + cn[2] = cn[0]*2.0f/3.0f + cn[1]*1.0f/3.0f; + cn[3] = cn[0]*1.0f/3.0f + cn[1]*2.0f/3.0f; + } + + CGU_FLOAT Scale = 3.f / dot(minColor - maxColor, minColor - maxColor); + CGU_Vec3f ScaledRange = (minColor - maxColor) * Scale; + CGU_FLOAT Bias = (dot(maxColor, maxColor) - dot(maxColor, minColor)) * Scale; + CGU_INT indexMap[4] = {0,2,3,1}; // mapping based on BC1 Spec for color0 > color1 + CGU_UINT32 index; + CGU_FLOAT diff; + + for (CGU_UINT32 i = 0; i < 16; i++) + { + // Get offset from base scale + diff = dot(block[i], ScaledRange) + Bias; + index = ((CGU_UINT32)round(diff))&0x3; + + // remap linear offset to spec offset + index = indexMap[index]; + + // use err calc for use in higher quality code + if (getErr) + { + minDistance = dot(block[i] - cn[index],block[i] - cn[index]); + err += minDistance; + } -static void SetDefaultBC15Options(CMP_BC15Options *BC15Options) { + // Map the 2 bit index into compress 32 bit block + if (index) + PackedIndices |= (index << (2*i)); + + } + + if (getErr) + err = err * 0.0208333f; + + CMP_PTRINOUT cmpindex = PackedIndices; + return err; +} + +//---------------------------------------- Common Utility Code ------------------------------------------------------- + +#ifndef ASPM_GPU +static void SetDefaultBC15Options(CMP_BC15Options *BC15Options) +{ if (BC15Options) { BC15Options->m_fquality = 1.0f; BC15Options->m_bUseChannelWeighting = false; - BC15Options->m_bUseAdaptiveWeighting = false; - BC15Options->m_fChannelWeights[0] = 0.3086f; - BC15Options->m_fChannelWeights[1] = 0.6094f; - BC15Options->m_fChannelWeights[2] = 0.0820f; - BC15Options->m_nAlphaThreshold = 128; - BC15Options->m_bUseFloat = false; - BC15Options->m_b3DRefinement = false; - BC15Options->m_nRefinementSteps = 1; - BC15Options->m_src_width = 4; - BC15Options->m_src_height = 4; + BC15Options->m_bUseAdaptiveWeighting= false; + BC15Options->m_fChannelWeights[0] = 0.3086f; + BC15Options->m_fChannelWeights[1] = 0.6094f; + BC15Options->m_fChannelWeights[2] = 0.0820f; + BC15Options->m_nAlphaThreshold = 128; + BC15Options->m_bUseFloat = false; + BC15Options->m_b3DRefinement = false; + BC15Options->m_bUseAlpha = false; + BC15Options->m_bIsSNORM = false; + BC15Options->m_bIsSRGB = false; + BC15Options->m_nRefinementSteps = 1; + BC15Options->m_src_width = 4; + BC15Options->m_src_height = 4; #ifdef CMP_SET_BC13_DECODER_RGBA - BC15Options->m_mapDecodeRGBA = true; + BC15Options->m_mapDecodeRGBA = true; #else - BC15Options->m_mapDecodeRGBA = false; + BC15Options->m_mapDecodeRGBA = false; #endif } } +#endif -inline CGU_UINT8 minb(CGU_UINT8 a, CGU_UINT8 b) { return a < b ? a : b; } -inline CGU_FLOAT minf(CGU_FLOAT a, CGU_FLOAT b) { return a < b ? a : b; } -inline CGU_FLOAT maxf(CGU_FLOAT a, CGU_FLOAT b) { return a > b ? a : b; } - -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -static void CalculateColourWeightings(CGU_UINT8 block[BLOCK_SIZE_4X4X4], - CMP_GLOBAL CMP_BC15Options *BC15options) { +static CMP_BC15Options CalculateColourWeightings(CGU_Vec4f rgbaBlock[BLOCK_SIZE_4X4],CMP_BC15Options BC15options) +{ CGU_FLOAT fBaseChannelWeights[3] = {0.3086f, 0.6094f, 0.0820f}; - if (!BC15options->m_bUseChannelWeighting) { - BC15options->m_fChannelWeights[0] = 1.0F; - BC15options->m_fChannelWeights[1] = 1.0F; - BC15options->m_fChannelWeights[2] = 1.0F; - return; + if (!BC15options.m_bUseChannelWeighting) { + BC15options.m_fChannelWeights[0] = 1.0F; + BC15options.m_fChannelWeights[1] = 1.0F; + BC15options.m_fChannelWeights[2] = 1.0F; + return BC15options; } - if (BC15options->m_bUseAdaptiveWeighting) { + if (BC15options.m_bUseAdaptiveWeighting) { float medianR = 0.0f, medianG = 0.0f, medianB = 0.0f; for (CGU_UINT32 k = 0; k < BLOCK_SIZE_4X4; k++) { - CGU_DWORD R = (block[k] & 0xff0000) >> 16; - CGU_DWORD G = (block[k] & 0xff00) >> 8; - CGU_DWORD B = block[k] & 0xff; + medianR += rgbaBlock[k].x; + medianG += rgbaBlock[k].y; + medianB += rgbaBlock[k].z; + } + + medianR /= BLOCK_SIZE_4X4; + medianG /= BLOCK_SIZE_4X4; + medianB /= BLOCK_SIZE_4X4; + + // Now skew the colour weightings based on the gravity center of the block + float largest = max(max(medianR, medianG), medianB); + + if (largest > 0) { + medianR /= largest; + medianG /= largest; + medianB /= largest; + } else + medianR = medianG = medianB = 1.0f; + + // Scale weightings back up to 1.0f + CGU_FLOAT fWeightScale = + 1.0f / (fBaseChannelWeights[0] + fBaseChannelWeights[1] + + fBaseChannelWeights[2]); + + BC15options.m_fChannelWeights[0] = fBaseChannelWeights[0] * fWeightScale; + BC15options.m_fChannelWeights[1] = fBaseChannelWeights[1] * fWeightScale; + BC15options.m_fChannelWeights[2] = fBaseChannelWeights[2] * fWeightScale; + + BC15options.m_fChannelWeights[0] = ((BC15options.m_fChannelWeights[0] * 3 * medianR) + BC15options.m_fChannelWeights[0]) *0.25f; + BC15options.m_fChannelWeights[1] = ((BC15options.m_fChannelWeights[1] * 3 * medianG) + BC15options.m_fChannelWeights[1]) *0.25f; + BC15options.m_fChannelWeights[2] = ((BC15options.m_fChannelWeights[2] * 3 * medianB) + BC15options.m_fChannelWeights[2]) *0.25f; + + fWeightScale = 1.0f / (BC15options.m_fChannelWeights[0] + BC15options.m_fChannelWeights[1] + BC15options.m_fChannelWeights[2]); + + BC15options.m_fChannelWeights[0] *= fWeightScale; + BC15options.m_fChannelWeights[1] *= fWeightScale; + BC15options.m_fChannelWeights[2] *= fWeightScale; + } + else { + BC15options.m_fChannelWeights[0] = fBaseChannelWeights[0]; + BC15options.m_fChannelWeights[1] = fBaseChannelWeights[1]; + BC15options.m_fChannelWeights[2] = fBaseChannelWeights[2]; + } + + return BC15options; +} + +static CMP_BC15Options CalculateColourWeightings3f(CGU_Vec3f rgbBlock[BLOCK_SIZE_4X4],CMP_BC15Options BC15options) +{ + CGU_FLOAT fBaseChannelWeights[3] = {0.3086f, 0.6094f, 0.0820f}; + + if (!BC15options.m_bUseChannelWeighting) { + BC15options.m_fChannelWeights[0] = 1.0F; + BC15options.m_fChannelWeights[1] = 1.0F; + BC15options.m_fChannelWeights[2] = 1.0F; + return BC15options; + } + + if (BC15options.m_bUseAdaptiveWeighting) { + float medianR = 0.0f, medianG = 0.0f, medianB = 0.0f; - medianR += R; - medianG += G; - medianB += B; + for (CGU_UINT32 k = 0; k < BLOCK_SIZE_4X4; k++) { + medianR += rgbBlock[k].x; + medianG += rgbBlock[k].y; + medianB += rgbBlock[k].z; } medianR /= BLOCK_SIZE_4X4; @@ -198,7 +460,7 @@ static void CalculateColourWeightings(CGU_UINT8 block[BLOCK_SIZE_4X4X4], medianB /= BLOCK_SIZE_4X4; // Now skew the colour weightings based on the gravity center of the block - float largest = maxf(maxf(medianR, medianG), medianB); + float largest = max(max(medianR, medianG), medianB); if (largest > 0) { medianR /= largest; @@ -211,741 +473,689 @@ static void CalculateColourWeightings(CGU_UINT8 block[BLOCK_SIZE_4X4X4], CGU_FLOAT fWeightScale = 1.0f / (fBaseChannelWeights[0] + fBaseChannelWeights[1] + fBaseChannelWeights[2]); - BC15options->m_fChannelWeights[0] = fBaseChannelWeights[0] * fWeightScale; - BC15options->m_fChannelWeights[1] = fBaseChannelWeights[1] * fWeightScale; - BC15options->m_fChannelWeights[2] = fBaseChannelWeights[2] * fWeightScale; - BC15options->m_fChannelWeights[0] = - ((BC15options->m_fChannelWeights[0] * 3 * medianR) + - BC15options->m_fChannelWeights[0]) * - 0.25f; - BC15options->m_fChannelWeights[1] = - ((BC15options->m_fChannelWeights[1] * 3 * medianG) + - BC15options->m_fChannelWeights[1]) * - 0.25f; - BC15options->m_fChannelWeights[2] = - ((BC15options->m_fChannelWeights[2] * 3 * medianB) + - BC15options->m_fChannelWeights[2]) * - 0.25f; - fWeightScale = 1.0f / (BC15options->m_fChannelWeights[0] + - BC15options->m_fChannelWeights[1] + - BC15options->m_fChannelWeights[2]); - BC15options->m_fChannelWeights[0] *= fWeightScale; - BC15options->m_fChannelWeights[1] *= fWeightScale; - BC15options->m_fChannelWeights[2] *= fWeightScale; - } else { - BC15options->m_fChannelWeights[0] = fBaseChannelWeights[0]; - BC15options->m_fChannelWeights[1] = fBaseChannelWeights[1]; - BC15options->m_fChannelWeights[2] = fBaseChannelWeights[2]; + + BC15options.m_fChannelWeights[0] = fBaseChannelWeights[0] * fWeightScale; + BC15options.m_fChannelWeights[1] = fBaseChannelWeights[1] * fWeightScale; + BC15options.m_fChannelWeights[2] = fBaseChannelWeights[2] * fWeightScale; + + BC15options.m_fChannelWeights[0] = ((BC15options.m_fChannelWeights[0] * 3 * medianR) + BC15options.m_fChannelWeights[0]) *0.25f; + BC15options.m_fChannelWeights[1] = ((BC15options.m_fChannelWeights[1] * 3 * medianG) + BC15options.m_fChannelWeights[1]) *0.25f; + BC15options.m_fChannelWeights[2] = ((BC15options.m_fChannelWeights[2] * 3 * medianB) + BC15options.m_fChannelWeights[2]) *0.25f; + + fWeightScale = 1.0f / (BC15options.m_fChannelWeights[0] + BC15options.m_fChannelWeights[1] + BC15options.m_fChannelWeights[2]); + + BC15options.m_fChannelWeights[0] *= fWeightScale; + BC15options.m_fChannelWeights[1] *= fWeightScale; + BC15options.m_fChannelWeights[2] *= fWeightScale; + } + else { + BC15options.m_fChannelWeights[0] = fBaseChannelWeights[0]; + BC15options.m_fChannelWeights[1] = fBaseChannelWeights[1]; + BC15options.m_fChannelWeights[2] = fBaseChannelWeights[2]; } + + return BC15options; } -#endif // !BC5 -#endif // !BC4 - -/*------------------------------------------------------------------------------------------------ -1 dim error -------------------------------------------------------------------------------------------------*/ -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) - -static CGU_FLOAT RampSrchW(CGU_FLOAT _Blck[MAX_BLOCK], - CGU_FLOAT _BlckErr[MAX_BLOCK], - CGU_FLOAT _Rpt[MAX_BLOCK], CGU_FLOAT _maxerror, - CGU_FLOAT _min_ex, CGU_FLOAT _max_ex, int _NmbClrs, - int _block) { + +static CGU_FLOAT cmp_getRampErr(CGU_FLOAT Prj[BLOCK_SIZE_4X4], + CGU_FLOAT PrjErr[BLOCK_SIZE_4X4], + CGU_FLOAT PreMRep[BLOCK_SIZE_4X4], + CGU_FLOAT StepErr, + CGU_FLOAT lowPosStep, + CGU_FLOAT highPosStep, + CGU_UINT32 dwUniqueColors) +{ CGU_FLOAT error = 0; - CGU_FLOAT step = (_max_ex - _min_ex) / (_block - 1); + CGU_FLOAT step = (highPosStep - lowPosStep) / 3; // using (dwNumChannels=4 - 1); CGU_FLOAT step_h = step * (CGU_FLOAT)0.5; CGU_FLOAT rstep = (CGU_FLOAT)1.0f / step; - for (CGU_INT32 i = 0; i < _NmbClrs; i++) { + for (CGU_UINT32 i = 0; i < dwUniqueColors; i++) { CGU_FLOAT v; // Work out which value in the block this select CGU_FLOAT del; - if ((del = _Blck[i] - _min_ex) <= 0) - v = _min_ex; - else if (_Blck[i] - _max_ex >= 0) - v = _max_ex; + if ((del = Prj[i] - lowPosStep) <= 0) + v = lowPosStep; + else if (Prj[i] - highPosStep >= 0) + v = highPosStep; else - v = floor((del + step_h) * rstep) * step + _min_ex; + v = floor((del + step_h) * rstep) * step + lowPosStep; // And accumulate the error - CGU_FLOAT d = (_Blck[i] - v); + CGU_FLOAT d = (Prj[i] - v); d *= d; - CGU_FLOAT err = _Rpt[i] * d + _BlckErr[i]; + CGU_FLOAT err = PreMRep[i] * d + PrjErr[i]; error += err; - if (_maxerror < error) { - error = _maxerror; + if (StepErr < error) { + error = StepErr; break; } } return error; } -#endif // !BC5 -#endif // BC4 - -/*------------------------------------------------------------------------------------------------ -// this is how the end points is going to be rounded in compressed format -------------------------------------------------------------------------------------------------*/ -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) - -static void MkRmpOnGrid(CGU_FLOAT _RmpF[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_FLOAT _MnMx[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_FLOAT _Min, CGU_FLOAT _Max, CGU_UINT8 nRedBits, - CGU_UINT8 nGreenBits, CGU_UINT8 nBlueBits) { - CGU_FLOAT Fctrs0[3]; - CGU_FLOAT Fctrs1[3]; - - Fctrs1[RC] = (CGU_FLOAT)(1 << nRedBits); - Fctrs1[GC] = (CGU_FLOAT)(1 << nGreenBits); - Fctrs1[BC] = (CGU_FLOAT)(1 << nBlueBits); - Fctrs0[RC] = (CGU_FLOAT)(1 << (PIX_GRID - nRedBits)); - Fctrs0[GC] = (CGU_FLOAT)(1 << (PIX_GRID - nGreenBits)); - Fctrs0[BC] = (CGU_FLOAT)(1 << (PIX_GRID - nBlueBits)); - - for (CGU_INT32 j = 0; j < 3; j++) { - for (CGU_INT32 k = 0; k < 2; k++) { - _RmpF[j][k] = floor(_MnMx[j][k]); - if (_RmpF[j][k] <= _Min) - _RmpF[j][k] = _Min; - else { - _RmpF[j][k] += - floor(128.f / Fctrs1[j]) - floor(_RmpF[j][k] / Fctrs1[j]); - _RmpF[j][k] = minf(_RmpF[j][k], _Max); - } - _RmpF[j][k] = floor(_RmpF[j][k] / Fctrs0[j]) * Fctrs0[j]; - } - } -} -#endif // !BC5 -#endif // BC4 - -/*------------------------------------------------------------------------------------------------ -// this is how the end points is going to be look like when decompressed -------------------------------------------------------------------------------------------------*/ -inline void MkWkRmpPts(CGU_BOOL *_bEq, - CGU_FLOAT _OutRmpPts[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_FLOAT _InpRmpPts[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, - CGU_UINT8 nBlueBits) { - CGU_FLOAT Fctrs[3]; - Fctrs[RC] = (CGU_FLOAT)(1 << nRedBits); - Fctrs[GC] = (CGU_FLOAT)(1 << nGreenBits); - Fctrs[BC] = (CGU_FLOAT)(1 << nBlueBits); - - *_bEq = TRUE; - // find whether input ramp is flat - for (CGU_INT32 j = 0; j < 3; j++) - *_bEq &= (_InpRmpPts[j][0] == _InpRmpPts[j][1]); - - // end points on the integer grid - for (CGU_INT32 j = 0; j < 3; j++) { - for (CGU_INT32 k = 0; k < 2; k++) { - // Apply the lower bit replication to give full dynamic range - _OutRmpPts[j][k] = _InpRmpPts[j][k] + floor(_InpRmpPts[j][k] / Fctrs[j]); - _OutRmpPts[j][k] = maxf((CGU_FLOAT)_OutRmpPts[j][k], 0.f); - _OutRmpPts[j][k] = minf((CGU_FLOAT)_OutRmpPts[j][k], 255.f); +static CGU_Vec2ui cmp_compressExplicitAlphaBlock(const CGU_FLOAT AlphaBlockUV[16]) +{ + CGU_Vec2ui compBlock = {0,0}; + CGU_UINT8 i; + for (i = 0; i < 16; i++) + { + CGU_UINT8 v = (CGU_UINT8)(AlphaBlockUV[i]*255.0F); + v = (v + 7 - (v >> 4)); + v >>= 4; + + if (v < 0) + v = 0; + else + if (v > 0xf) + v = 0xf; + + if (i < 8) + compBlock.x |= v << (4 * i); + else + compBlock.y |= v << (4 * (i - 8)); } - } + return compBlock; } -/*------------------------------------------------------------------------------------------------ -1 DIM ramp -------------------------------------------------------------------------------------------------*/ - -inline void BldClrRmp(CGU_FLOAT _Rmp[MAX_POINTS], - CGU_FLOAT _InpRmp[NUM_ENDPOINTS], CGU_UINT8 dwNumPoints) { - // linear interpolate end points to get the ramp - _Rmp[0] = _InpRmp[0]; - _Rmp[dwNumPoints - 1] = _InpRmp[1]; - if (dwNumPoints % 2) - _Rmp[dwNumPoints] = - 1000000.f; // for 3 point ramp; not to select the 4th point as min - for (CGU_INT32 e = 1; e < dwNumPoints - 1; e++) - _Rmp[e] = floor((_Rmp[0] * (dwNumPoints - 1 - e) + - _Rmp[dwNumPoints - 1] * e + dwRndAmount[dwNumPoints]) / - (CGU_FLOAT)(dwNumPoints - 1)); -} +static CGU_FLOAT cmp_getRampError( CGU_FLOAT _Blk[BLOCK_SIZE_4X4], + CGU_FLOAT _Rpt[BLOCK_SIZE_4X4], + CGU_FLOAT _maxerror, + CGU_FLOAT _min_ex, + CGU_FLOAT _max_ex, + CGU_INT _NmbrClrs) // Max 16 +{ + CGU_INT i; + CGU_FLOAT error = 0; + const CGU_FLOAT step = (_max_ex - _min_ex) / 7; // (CGU_FLOAT)(dwNumPoints - 1); + const CGU_FLOAT step_h = step * 0.5f; + const CGU_FLOAT rstep = 1.0f / step; -/*------------------------------------------------------------------------------------------------ -// build 3D ramp -------------------------------------------------------------------------------------------------*/ -inline void BldRmp(CGU_FLOAT _Rmp[NUM_CHANNELS][MAX_POINTS], - CGU_FLOAT _InpRmp[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_UINT8 dwNumPoints) { - for (CGU_INT32 j = 0; j < 3; j++) BldClrRmp(_Rmp[j], _InpRmp[j], dwNumPoints); -} + for (i = 0; i < _NmbrClrs; i++) { + CGU_FLOAT v; + // Work out which value in the block this select + CGU_FLOAT del; -/*------------------------------------------------------------------------------------------------ -Compute cumulative error for the current cluster -------------------------------------------------------------------------------------------------*/ -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) - -static CGU_FLOAT ClstrErr(CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], - CGU_FLOAT _Rpt[MAX_BLOCK], - CGU_FLOAT _Rmp[NUM_CHANNELS][MAX_POINTS], - int _NmbClrs, int _blcktp, CGU_BOOL _ConstRamp, - CMP_GLOBAL const CMP_BC15Options *BC15options) { - CGU_FLOAT fError = 0.f; - int rmp_l = (_ConstRamp) ? 1 : _blcktp; - - // For each colour in the original block, find the closest cluster - // and compute the comulative error - for (CGU_INT32 i = 0; i < _NmbClrs; i++) { - CGU_FLOAT fShortest = 99999999999.f; - - if (BC15options->m_bUseChannelWeighting) - for (CGU_INT32 r = 0; r < rmp_l; r++) { - // calculate the distance for each component - CGU_FLOAT fDistance = - (_Blk[i][RC] - _Rmp[RC][r]) * (_Blk[i][RC] - _Rmp[RC][r]) * - BC15options->m_fChannelWeights[0] + - (_Blk[i][GC] - _Rmp[GC][r]) * (_Blk[i][GC] - _Rmp[GC][r]) * - BC15options->m_fChannelWeights[1] + - (_Blk[i][BC] - _Rmp[BC][r]) * (_Blk[i][BC] - _Rmp[BC][r]) * - BC15options->m_fChannelWeights[2]; - - if (fDistance < fShortest) fShortest = fDistance; - } + if ((del = _Blk[i] - _min_ex) <= 0) + v = _min_ex; + else if (_Blk[i] - _max_ex >= 0) + v = _max_ex; else - for (CGU_INT32 r = 0; r < rmp_l; r++) { - // calculate the distance for each component - CGU_FLOAT fDistance = - (_Blk[i][RC] - _Rmp[RC][r]) * (_Blk[i][RC] - _Rmp[RC][r]) + - (_Blk[i][GC] - _Rmp[GC][r]) * (_Blk[i][GC] - _Rmp[GC][r]) + - (_Blk[i][BC] - _Rmp[BC][r]) * (_Blk[i][BC] - _Rmp[BC][r]); - - if (fDistance < fShortest) fShortest = fDistance; - } - - // accumulate the error - fError += fShortest * _Rpt[i]; - } - - return fError; -} -#endif // !BC5 -#endif // !BC4 - -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -static CGU_FLOAT Refine3D(CGU_FLOAT _OutRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_FLOAT _InpRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], - CGU_FLOAT _Rpt[MAX_BLOCK], int _NmrClrs, - CGU_UINT8 dwNumPoints, - CMP_GLOBAL const CMP_BC15Options *BC15options, - CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, - CGU_UINT8 nBlueBits, CGU_UINT8 nRefineSteps) { - ALIGN_16 CGU_FLOAT Rmp[NUM_CHANNELS][MAX_POINTS]; - - CGU_FLOAT Blk[MAX_BLOCK][NUM_CHANNELS]; - for (CGU_INT32 i = 0; i < _NmrClrs; i++) - for (CGU_INT32 j = 0; j < 3; j++) Blk[i][j] = _Blk[i][j]; - - CGU_FLOAT fWeightRed = BC15options->m_fChannelWeights[0]; - CGU_FLOAT fWeightGreen = BC15options->m_fChannelWeights[1]; - CGU_FLOAT fWeightBlue = BC15options->m_fChannelWeights[2]; - - // here is our grid - CGU_FLOAT Fctrs[3]; - Fctrs[RC] = (CGU_FLOAT)(1 << (PIX_GRID - nRedBits)); - Fctrs[GC] = (CGU_FLOAT)(1 << (PIX_GRID - nGreenBits)); - Fctrs[BC] = (CGU_FLOAT)(1 << (PIX_GRID - nBlueBits)); - - CGU_FLOAT InpRmp0[NUM_CHANNELS][NUM_ENDPOINTS]; - CGU_FLOAT InpRmp[NUM_CHANNELS][NUM_ENDPOINTS]; - for (CGU_INT32 k = 0; k < 2; k++) - for (CGU_INT32 j = 0; j < 3; j++) - InpRmp0[j][k] = InpRmp[j][k] = _OutRmpPnts[j][k] = _InpRmpPnts[j][k]; - - // make ramp endpoints the way they'll going to be decompressed - // plus check whether the ramp is flat - CGU_BOOL Eq; - CGU_FLOAT WkRmpPts[NUM_CHANNELS][NUM_ENDPOINTS]; - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); - - // build ramp for all 3 colors - BldRmp(Rmp, WkRmpPts, dwNumPoints); - - // clusterize for the current ramp - CGU_FLOAT bestE = - ClstrErr(Blk, _Rpt, Rmp, _NmrClrs, dwNumPoints, Eq, BC15options); - if (bestE == 0.f || !nRefineSteps) // if exact, we've done - return bestE; - - // Jitter endpoints in each direction - int nRefineStart = 0 - (minb(nRefineSteps, (CGU_UINT8)8)); - int nRefineEnd = minb(nRefineSteps, (CGU_UINT8)8); - for (CGU_INT32 nJitterG0 = nRefineStart; nJitterG0 <= nRefineEnd; - nJitterG0++) { - InpRmp[GC][0] = - minf(maxf(InpRmp0[GC][0] + nJitterG0 * Fctrs[GC], 0.f), 255.f); - for (CGU_INT32 nJitterG1 = nRefineStart; nJitterG1 <= nRefineEnd; - nJitterG1++) { - InpRmp[GC][1] = - minf(maxf(InpRmp0[GC][1] + nJitterG1 * Fctrs[GC], 0.f), 255.f); - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); - BldClrRmp(Rmp[GC], WkRmpPts[GC], dwNumPoints); - - CGU_FLOAT RmpErrG[MAX_POINTS][MAX_BLOCK]; - for (CGU_INT32 i = 0; i < _NmrClrs; i++) { - for (CGU_INT32 r = 0; r < dwNumPoints; r++) { - CGU_FLOAT DistG = (Rmp[GC][r] - Blk[i][GC]); - RmpErrG[r][i] = DistG * DistG * fWeightGreen; - } - } - - for (CGU_INT32 nJitterB0 = nRefineStart; nJitterB0 <= nRefineEnd; - nJitterB0++) { - InpRmp[BC][0] = - minf(maxf(InpRmp0[BC][0] + nJitterB0 * Fctrs[BC], 0.f), 255.f); - for (CGU_INT32 nJitterB1 = nRefineStart; nJitterB1 <= nRefineEnd; - nJitterB1++) { - InpRmp[BC][1] = - minf(maxf(InpRmp0[BC][1] + nJitterB1 * Fctrs[BC], 0.f), 255.f); - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); - BldClrRmp(Rmp[BC], WkRmpPts[BC], dwNumPoints); - - CGU_FLOAT RmpErr[MAX_POINTS][MAX_BLOCK]; - for (CGU_INT32 i = 0; i < _NmrClrs; i++) { - for (CGU_INT32 r = 0; r < dwNumPoints; r++) { - CGU_FLOAT DistB = (Rmp[BC][r] - Blk[i][BC]); - RmpErr[r][i] = RmpErrG[r][i] + DistB * DistB * fWeightBlue; - } - } + v = (floor((del + step_h) * rstep) * step) + _min_ex; - for (CGU_INT32 nJitterR0 = nRefineStart; nJitterR0 <= nRefineEnd; - nJitterR0++) { - InpRmp[RC][0] = - minf(maxf(InpRmp0[RC][0] + nJitterR0 * Fctrs[RC], 0.f), 255.f); - for (CGU_INT32 nJitterR1 = nRefineStart; nJitterR1 <= nRefineEnd; - nJitterR1++) { - InpRmp[RC][1] = minf( - maxf(InpRmp0[RC][1] + nJitterR1 * Fctrs[RC], 0.f), 255.f); - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, - nBlueBits); - BldClrRmp(Rmp[RC], WkRmpPts[RC], dwNumPoints); - - // compute cumulative error - CGU_FLOAT mse = 0.f; - int rmp_l = (Eq) ? 1 : dwNumPoints; - for (CGU_INT32 k = 0; k < _NmrClrs; k++) { - CGU_FLOAT MinErr = 10000000.f; - for (CGU_INT32 r = 0; r < rmp_l; r++) { - CGU_FLOAT Dist = (Rmp[RC][r] - Blk[k][RC]); - CGU_FLOAT Err = RmpErr[r][k] + Dist * Dist * fWeightRed; - MinErr = minf(MinErr, Err); - } - mse += MinErr * _Rpt[k]; - } + // And accumulate the error + CGU_FLOAT del2 = (_Blk[i] - v); + error += del2 * del2 * _Rpt[i]; - // save if we achieve better result - if (mse < bestE) { - bestE = mse; - for (CGU_INT32 k = 0; k < 2; k++) - for (CGU_INT32 j = 0; j < 3; j++) - _OutRmpPnts[j][k] = InpRmp[j][k]; - } - } - } - } - } + // if we've already lost to the previous step bail out + if (_maxerror < error) { + error = _maxerror; + break; } } - - return bestE; + return error; } -#endif // !BC5 -#endif // BC4 - -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) - -static CGU_FLOAT Refine(CGU_FLOAT _OutRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_FLOAT _InpRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], - CGU_FLOAT _Rpt[MAX_BLOCK], int _NmrClrs, - CGU_UINT8 dwNumPoints, - CMP_GLOBAL const CMP_BC15Options *BC15options, - CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, - CGU_UINT8 nBlueBits, CGU_UINT8 nRefineSteps) { - ALIGN_16 CGU_FLOAT Rmp[NUM_CHANNELS][MAX_POINTS]; - - CGU_FLOAT Blk[MAX_BLOCK][NUM_CHANNELS]; - for (CGU_INT32 i = 0; i < _NmrClrs; i++) - for (CGU_INT32 j = 0; j < 3; j++) Blk[i][j] = _Blk[i][j]; - - CGU_FLOAT fWeightRed = BC15options->m_fChannelWeights[0]; - CGU_FLOAT fWeightGreen = BC15options->m_fChannelWeights[1]; - CGU_FLOAT fWeightBlue = BC15options->m_fChannelWeights[2]; - - // here is our grid - CGU_FLOAT Fctrs[3]; - Fctrs[RC] = (CGU_FLOAT)(1 << (PIX_GRID - nRedBits)); - Fctrs[GC] = (CGU_FLOAT)(1 << (PIX_GRID - nGreenBits)); - Fctrs[BC] = (CGU_FLOAT)(1 << (PIX_GRID - nBlueBits)); - - CGU_FLOAT InpRmp0[NUM_CHANNELS][NUM_ENDPOINTS]; - CGU_FLOAT InpRmp[NUM_CHANNELS][NUM_ENDPOINTS]; - for (CGU_INT32 k = 0; k < 2; k++) - for (CGU_INT32 j = 0; j < 3; j++) - InpRmp0[j][k] = InpRmp[j][k] = _OutRmpPnts[j][k] = _InpRmpPnts[j][k]; - - // make ramp endpoints the way they'll going to be decompressed - // plus check whether the ramp is flat - CGU_BOOL Eq; - CGU_FLOAT WkRmpPts[NUM_CHANNELS][NUM_ENDPOINTS]; - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); - - // build ramp for all 3 colors - BldRmp(Rmp, WkRmpPts, dwNumPoints); - - // clusterize for the current ramp - CGU_FLOAT bestE = - ClstrErr(Blk, _Rpt, Rmp, _NmrClrs, dwNumPoints, Eq, BC15options); - if (bestE == 0.f || !nRefineSteps) // if exact, we've done - return bestE; - - // Tweak each component in isolation and get the best values - - // precompute ramp errors for Green and Blue - CGU_FLOAT RmpErr[MAX_POINTS][MAX_BLOCK]; - for (CGU_INT32 i = 0; i < _NmrClrs; i++) { - for (CGU_INT32 r = 0; r < dwNumPoints; r++) { - CGU_FLOAT DistG = (Rmp[GC][r] - Blk[i][GC]); - CGU_FLOAT DistB = (Rmp[BC][r] - Blk[i][BC]); - RmpErr[r][i] = DistG * DistG * fWeightGreen + DistB * DistB * fWeightBlue; - } - } - // First Red - CGU_FLOAT bstC0 = InpRmp0[RC][0]; - CGU_FLOAT bstC1 = InpRmp0[RC][1]; - int nRefineStart = 0 - (minb(nRefineSteps, (CGU_UINT8)8)); - int nRefineEnd = minb(nRefineSteps, (CGU_UINT8)8); - for (CGU_INT32 i = nRefineStart; i <= nRefineEnd; i++) { - for (CGU_INT32 j = nRefineStart; j <= nRefineEnd; j++) { - // make a move; both sides of interval. - InpRmp[RC][0] = minf(maxf(InpRmp0[RC][0] + i * Fctrs[RC], 0.f), 255.f); - InpRmp[RC][1] = minf(maxf(InpRmp0[RC][1] + j * Fctrs[RC], 0.f), 255.f); - - // make ramp endpoints the way they'll going to be decompressed - // plus check whether the ramp is flat - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); - - // build ramp only for red - BldClrRmp(Rmp[RC], WkRmpPts[RC], dwNumPoints); - - // compute cumulative error - CGU_FLOAT mse = 0.f; - int rmp_l = (Eq) ? 1 : dwNumPoints; - for (CGU_INT32 k = 0; k < _NmrClrs; k++) { - CGU_FLOAT MinErr = 10000000.f; - for (CGU_INT32 r = 0; r < rmp_l; r++) { - CGU_FLOAT Dist = (Rmp[RC][r] - Blk[k][RC]); - CGU_FLOAT Err = RmpErr[r][k] + Dist * Dist * fWeightRed; - MinErr = minf(MinErr, Err); - } - mse += MinErr * _Rpt[k]; - } +static CGU_FLOAT cmp_linearBlockRefine(CGU_FLOAT _Blk[BLOCK_SIZE_4X4], + CGU_FLOAT _Rpt[BLOCK_SIZE_4X4], + CGU_FLOAT _MaxError, + CMP_INOUT CGU_FLOAT CMP_PTRINOUT _min_ex, + CMP_INOUT CGU_FLOAT CMP_PTRINOUT _max_ex, + CGU_FLOAT _m_step, CGU_FLOAT _min_bnd, + CGU_FLOAT _max_bnd, + CGU_INT _NmbrClrs) { + // Start out assuming our endpoints are the min and max values we've + // determined - // save if we achieve better result - if (mse < bestE) { - bstC0 = InpRmp[RC][0]; - bstC1 = InpRmp[RC][1]; - bestE = mse; - } - } - } + // Attempt a (simple) progressive refinement step to reduce noise in the + // output image by trying to find a better overall match for the endpoints. - // our best REDs - InpRmp[RC][0] = bstC0; - InpRmp[RC][1] = bstC1; + CGU_FLOAT maxerror = _MaxError; + CGU_FLOAT min_ex = CMP_PTRINOUT _min_ex; + CGU_FLOAT max_ex = CMP_PTRINOUT _max_ex; - // make ramp endpoints the way they'll going to be decompressed - // plus check whether the ramp is flat - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); + CGU_INT mode, bestmode; - // build ramp only for green - BldRmp(Rmp, WkRmpPts, dwNumPoints); + do { + CGU_FLOAT cr_min0 = min_ex; + CGU_FLOAT cr_max0 = max_ex; + for (bestmode = -1, mode = 0; mode < SCH_STPS * SCH_STPS; mode++) { + // check each move (see sStep for direction) + CGU_FLOAT cr_min = min_ex + _m_step * sMvF[mode / SCH_STPS]; + CGU_FLOAT cr_max = max_ex + _m_step * sMvF[mode % SCH_STPS]; - // precompute ramp errors for Red and Blue - for (CGU_INT32 i = 0; i < _NmrClrs; i++) { - for (CGU_INT32 r = 0; r < dwNumPoints; r++) { - CGU_FLOAT DistR = (Rmp[RC][r] - Blk[i][RC]); - CGU_FLOAT DistB = (Rmp[BC][r] - Blk[i][BC]); - RmpErr[r][i] = DistR * DistR * fWeightRed + DistB * DistB * fWeightBlue; - } - } + cr_min = max(cr_min, _min_bnd); + cr_max = min(cr_max, _max_bnd); - // Now green - bstC0 = InpRmp0[GC][0]; - bstC1 = InpRmp0[GC][1]; - for (CGU_INT32 i = nRefineStart; i <= nRefineEnd; i++) { - for (CGU_INT32 j = nRefineStart; j <= nRefineEnd; j++) { - InpRmp[GC][0] = minf(maxf(InpRmp0[GC][0] + i * Fctrs[GC], 0.f), 255.f); - InpRmp[GC][1] = minf(maxf(InpRmp0[GC][1] + j * Fctrs[GC], 0.f), 255.f); - - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); - BldClrRmp(Rmp[GC], WkRmpPts[GC], dwNumPoints); - - CGU_FLOAT mse = 0.f; - int rmp_l = (Eq) ? 1 : dwNumPoints; - for (CGU_INT32 k = 0; k < _NmrClrs; k++) { - CGU_FLOAT MinErr = 10000000.f; - for (CGU_INT32 r = 0; r < rmp_l; r++) { - CGU_FLOAT Dist = (Rmp[GC][r] - Blk[k][GC]); - CGU_FLOAT Err = RmpErr[r][k] + Dist * Dist * fWeightGreen; - MinErr = minf(MinErr, Err); - } - mse += MinErr * _Rpt[k]; - } + CGU_FLOAT error; + error = cmp_getRampError(_Blk, _Rpt, maxerror, cr_min, cr_max, _NmbrClrs); - if (mse < bestE) { - bstC0 = InpRmp[GC][0]; - bstC1 = InpRmp[GC][1]; - bestE = mse; + if (error < maxerror) { + maxerror = error; + bestmode = mode; + cr_min0 = cr_min; + cr_max0 = cr_max; } } - } - // our best GREENs - InpRmp[GC][0] = bstC0; - InpRmp[GC][1] = bstC1; + if (bestmode != -1) { + // make move (see sStep for direction) + min_ex = cr_min0; + max_ex = cr_max0; + } + } while (bestmode != -1); - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); - BldRmp(Rmp, WkRmpPts, dwNumPoints); + CMP_PTRINOUT _min_ex = min_ex; + CMP_PTRINOUT _max_ex = max_ex; - // ramp err for Red and Green - for (CGU_INT32 i = 0; i < _NmrClrs; i++) { - for (CGU_INT32 r = 0; r < dwNumPoints; r++) { - CGU_FLOAT DistR = (Rmp[RC][r] - Blk[i][RC]); - CGU_FLOAT DistG = (Rmp[GC][r] - Blk[i][GC]); - RmpErr[r][i] = DistR * DistR * fWeightRed + DistG * DistG * fWeightGreen; - } - } + return maxerror; +} - bstC0 = InpRmp0[BC][0]; - bstC1 = InpRmp0[BC][1]; - // Now blue - for (CGU_INT32 i = nRefineStart; i <= nRefineEnd; i++) { - for (CGU_INT32 j = nRefineStart; j <= nRefineEnd; j++) { - InpRmp[BC][0] = minf(maxf(InpRmp0[BC][0] + i * Fctrs[BC], 0.f), 255.f); - InpRmp[BC][1] = minf(maxf(InpRmp0[BC][1] + j * Fctrs[BC], 0.f), 255.f); - - MkWkRmpPts(&Eq, WkRmpPts, InpRmp, nRedBits, nGreenBits, nBlueBits); - BldClrRmp(Rmp[BC], WkRmpPts[BC], dwNumPoints); - - CGU_FLOAT mse = 0.f; - int rmp_l = (Eq) ? 1 : dwNumPoints; - for (CGU_INT32 k = 0; k < _NmrClrs; k++) { - CGU_FLOAT MinErr = 10000000.f; - for (CGU_INT32 r = 0; r < rmp_l; r++) { - CGU_FLOAT Dist = (Rmp[BC][r] - Blk[k][BC]); - CGU_FLOAT Err = RmpErr[r][k] + Dist * Dist * fWeightBlue; - MinErr = minf(MinErr, Err); - } - mse += MinErr * _Rpt[k]; - } - if (mse < bestE) { - bstC0 = InpRmp[BC][0]; - bstC1 = InpRmp[BC][1]; - bestE = mse; - } +static CGU_Vec2f cmp_getLinearEndPoints( + CGU_FLOAT _Blk[BLOCK_SIZE_4X4], + CMP_IN CGU_FLOAT fquality) +{ + CGU_UINT32 i; + CGU_Vec2f cmpMinMax; + + //================================================================ + // Bounding Box + // lowest quality calculation to get min and max value to use + //================================================================ + if (fquality < CMP_QUALITY2) + { + cmpMinMax.x = _Blk[0]; + cmpMinMax.y = _Blk[0]; + for (i=1; i> ((PIX_GRID - nBlueBits)))); -} -#endif // !BC5 -#endif // !BC4 - -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -// Compute error and find DXTC indexes for the current cluster -static CGU_FLOAT ClstrIntnl(CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], - CGU_UINT8 *_Indxs, - CGU_FLOAT _Rmp[NUM_CHANNELS][MAX_POINTS], - int dwBlockSize, CGU_UINT8 dwNumPoints, - CGU_BOOL _ConstRamp, - CMP_GLOBAL const CMP_BC15Options *BC15options, - CGU_BOOL _bUseAlpha) { - CGU_FLOAT Err = 0.f; - CGU_UINT8 rmp_l = (_ConstRamp) ? 1 : dwNumPoints; - - // For each colour in the original block assign it - // to the closest cluster and compute the cumulative error - for (CGU_INT32 i = 0; i < dwBlockSize; i++) { - if (_bUseAlpha && *((CGU_DWORD *)&_Blk[i][AC]) == 0) - _Indxs[i] = dwNumPoints; - else { - CGU_FLOAT shortest = 99999999999.f; - CGU_UINT8 shortestIndex = 0; - if (BC15options) - for (CGU_UINT8 r = 0; r < rmp_l; r++) { - // calculate the distance for each component - CGU_FLOAT distance = - (_Blk[i][RC] - _Rmp[RC][r]) * (_Blk[i][RC] - _Rmp[RC][r]) * - BC15options->m_fChannelWeights[0] + - (_Blk[i][GC] - _Rmp[GC][r]) * (_Blk[i][GC] - _Rmp[GC][r]) * - BC15options->m_fChannelWeights[1] + - (_Blk[i][BC] - _Rmp[BC][r]) * (_Blk[i][BC] - _Rmp[BC][r]) * - BC15options->m_fChannelWeights[2]; - - if (distance < shortest) { - shortest = distance; - shortestIndex = r; - } + // For each unique value we compute the number of it appearances. + CGU_FLOAT fBlk[BLOCK_SIZE_4X4]; + + // sort the input + #ifndef ASPM_GPU + memcpy(fBlk, _Blk, BLOCK_SIZE_4X4 * sizeof(CGU_FLOAT)); + qsort((void *)fBlk, (size_t)BLOCK_SIZE_4X4, sizeof(CGU_FLOAT), QSortFCmp); + #else + CGU_UINT32 j; + + for (i = 0; i < BLOCK_SIZE_4X4; i++) { + fBlk[i] = _Blk[i]; } - else - for (CGU_UINT8 r = 0; r < rmp_l; r++) { - // calculate the distance for each component - CGU_FLOAT distance = - (_Blk[i][RC] - _Rmp[RC][r]) * (_Blk[i][RC] - _Rmp[RC][r]) + - (_Blk[i][GC] - _Rmp[GC][r]) * (_Blk[i][GC] - _Rmp[GC][r]) + - (_Blk[i][BC] - _Rmp[BC][r]) * (_Blk[i][BC] - _Rmp[BC][r]); - - if (distance < shortest) { - shortest = distance; - shortestIndex = r; + + CMP_df what[BLOCK_SIZE]; + + for (i = 0; i < BLOCK_SIZE_4X4; i++) { + what[i].index = i; + what[i].data = fBlk[i]; + } + + CGU_UINT32 tmp_index; + CGU_FLOAT tmp_data; + + for (i = 1; i < BLOCK_SIZE_4X4; i++) { + for (j = i; j > 0; j--) { + if (what[j - 1].data > what[j].data) { + tmp_index = what[j].index; + tmp_data = what[j].data; + what[j].index = what[j - 1].index; + what[j].data = what[j - 1].data; + what[j - 1].index = tmp_index; + what[j - 1].data = tmp_data; + } } } - Err += shortest; + for (i = 0; i < BLOCK_SIZE_4X4; i++) fBlk[i] = what[i].data; + #endif - // We have the index of the best cluster, so assign this in the block - // Reorder indices to match correct DXTC ordering - if (shortestIndex == dwNumPoints - 1) - shortestIndex = 1; - else if (shortestIndex) - shortestIndex++; - _Indxs[i] = shortestIndex; - } - } - - return Err; -} -#endif // !BC5 -#endif // !BC4 - -/*------------------------------------------------------------------------------------------------ -// input ramp is on the coarse grid -------------------------------------------------------------------------------------------------*/ -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -static CGU_FLOAT ClstrBas(CGU_UINT8 *_Indxs, - CGU_FLOAT _Blk[MAX_BLOCK][NUM_CHANNELS], - CGU_FLOAT _InpRmp[NUM_CHANNELS][NUM_ENDPOINTS], - int dwBlockSize, CGU_UINT8 dwNumPoints, - CMP_GLOBAL const CMP_BC15Options *BC15options, - CGU_BOOL _bUseAlpha, CGU_UINT8 nRedBits, - CGU_UINT8 nGreenBits, CGU_UINT8 nBlueBits) { - // make ramp endpoints the way they'll going to be decompressed - CGU_BOOL Eq = TRUE; - CGU_FLOAT InpRmp[NUM_CHANNELS][NUM_ENDPOINTS]; - MkWkRmpPts(&Eq, InpRmp, _InpRmp, nRedBits, nGreenBits, nBlueBits); - - // build ramp as it would be built by decompressor - CGU_FLOAT Rmp[NUM_CHANNELS][MAX_POINTS]; - BldRmp(Rmp, InpRmp, dwNumPoints); - - // clusterize and find a cumulative error - return ClstrIntnl(_Blk, _Indxs, Rmp, dwBlockSize, dwNumPoints, Eq, - BC15options, _bUseAlpha); -} -#endif // !BC5 -#endif // !BC4 - -/*------------------------------------------------------------------------------------------------ -Clusterization the way it looks from the DXTC decompressor -------------------------------------------------------------------------------------------------*/ -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -static CGU_FLOAT Clstr(CGU_UINT32 block_32[MAX_BLOCK], CGU_UINT32 dwBlockSize, - CGU_UINT8 nEndpoints[3][NUM_ENDPOINTS], - CGU_UINT8 *pcIndices, CGU_UINT8 dwNumPoints, - CMP_GLOBAL const CMP_BC15Options *BC15options, - CGU_BOOL _bUseAlpha, CGU_UINT8 _nAlphaThreshold, - CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, - CGU_UINT8 nBlueBits) { - CGU_INT32 c0 = ConstructColor(nEndpoints[RC][0], nRedBits, nEndpoints[GC][0], - nGreenBits, nEndpoints[BC][0], nBlueBits); - CGU_INT32 c1 = ConstructColor(nEndpoints[RC][1], nRedBits, nEndpoints[GC][1], - nGreenBits, nEndpoints[BC][1], nBlueBits); - CGU_INT32 nEndpointIndex0 = 0; - CGU_INT32 nEndpointIndex1 = 1; - if ((!(dwNumPoints & 0x1) && c0 <= c1) || ((dwNumPoints & 0x1) && c0 > c1)) { - nEndpointIndex0 = 1; - nEndpointIndex1 = 0; - } + CGU_FLOAT new_p = -2.0f; - CGU_FLOAT InpRmp[NUM_CHANNELS][NUM_ENDPOINTS]; - InpRmp[RC][0] = (CGU_FLOAT)nEndpoints[RC][nEndpointIndex0]; - InpRmp[RC][1] = (CGU_FLOAT)nEndpoints[RC][nEndpointIndex1]; - InpRmp[GC][0] = (CGU_FLOAT)nEndpoints[GC][nEndpointIndex0]; - InpRmp[GC][1] = (CGU_FLOAT)nEndpoints[GC][nEndpointIndex1]; - InpRmp[BC][0] = (CGU_FLOAT)nEndpoints[BC][nEndpointIndex0]; - InpRmp[BC][1] = (CGU_FLOAT)nEndpoints[BC][nEndpointIndex1]; - - CGU_UINT32 dwAlphaThreshold = _nAlphaThreshold << 24; - CGU_FLOAT Blk[MAX_BLOCK][NUM_CHANNELS]; - for (CGU_UINT32 i = 0; i < dwBlockSize; i++) { - Blk[i][RC] = (CGU_FLOAT)((block_32[i] & 0xff0000) >> 16); - Blk[i][GC] = (CGU_FLOAT)((block_32[i] & 0xff00) >> 8); - Blk[i][BC] = (CGU_FLOAT)(block_32[i] & 0xff); - if (_bUseAlpha) - Blk[i][AC] = ((block_32[i] & 0xff000000) >= dwAlphaThreshold) ? 1.f : 0.f; - } + CGU_UINT32 dwUniqueValues = 0; + afUniqueValues[0] = 0.0f; + CGU_BOOL requiresCalculation = true; - return ClstrBas(pcIndices, Blk, InpRmp, dwBlockSize, dwNumPoints, BC15options, - _bUseAlpha, nRedBits, nGreenBits, nBlueBits); -} -#endif // !BC5 -#endif // !BC4 + { // Ramp not fixed + for(i = 0; i < BLOCK_SIZE_4X4; i++) + { + if(new_p != fBlk[i]) + { + afUniqueValues[dwUniqueValues] = new_p = fBlk[i]; + afValueRepeats[dwUniqueValues] = 1.f; + dwUniqueValues++; + } + else + if (dwUniqueValues) afValueRepeats[dwUniqueValues - 1] += 1.f; + } + // if number of unique colors is less or eq 2, we've done + if(dwUniqueValues <= 2) + { + Ramp[0] = floor(afUniqueValues[0] * 255.0f + 0.5f); + if(dwUniqueValues == 1) + Ramp[1] = Ramp[0] + 1.f; + else + Ramp[1] = floor(afUniqueValues[1] * 255.0f + 0.5f); + requiresCalculation = false; + } + } // Ramp not fixed + + if (requiresCalculation) { + CGU_FLOAT min_ex = afUniqueValues[0]; + CGU_FLOAT max_ex = afUniqueValues[dwUniqueValues - 1]; + CGU_FLOAT min_bnd = 0, max_bnd = 1.; + CGU_FLOAT min_r = min_ex, max_r = max_ex; + CGU_FLOAT gbl_l = 0, gbl_r = 0; + CGU_FLOAT cntr = (min_r + max_r) / 2; + + CGU_FLOAT gbl_err = MAX_ERROR; + // Trying to avoid unnecessary calculations. Heuristics: after some analisis + // it appears that in integer case, if the input interval not more then 48 + // we won't get much better + bool wantsSearch = !((max_ex - min_ex) <= (48.f / 256.0f)); + + if (wantsSearch) { + // Search. + // 1. take the vicinities of both low and high bound of the input + // interval. + // 2. setup some search step + // 3. find the new low and high bound which provides an (sub) optimal + // (infinite precision) clusterization. + CGU_FLOAT gbl_llb = (min_bnd > min_r - GBL_SCH_EXT) ? min_bnd : min_r - GBL_SCH_EXT; + CGU_FLOAT gbl_rrb = (max_bnd < max_r + GBL_SCH_EXT) ? max_bnd : max_r + GBL_SCH_EXT; + CGU_FLOAT gbl_lrb = (cntr < min_r + GBL_SCH_EXT) ? cntr : min_r + GBL_SCH_EXT; + CGU_FLOAT gbl_rlb = (cntr > max_r - GBL_SCH_EXT) ? cntr : max_r - GBL_SCH_EXT; + + for (CGU_FLOAT step_l = gbl_llb; step_l < gbl_lrb; step_l += GBL_SCH_STEP) + { + for (CGU_FLOAT step_r = gbl_rrb; gbl_rlb <= step_r; step_r -= GBL_SCH_STEP) + { + CGU_FLOAT sch_err; + // an sse version is avaiable + sch_err = cmp_getRampError(afUniqueValues, afValueRepeats, gbl_err, step_l, step_r, dwUniqueValues); + if (sch_err < gbl_err) + { + gbl_err = sch_err; + gbl_l = step_l; + gbl_r = step_r; + } + } + } + + min_r = gbl_l; + max_r = gbl_r; + } // want search + + // This is a refinement call. The function tries to make several small + // stretches or squashes to minimize quantization error. + CGU_FLOAT m_step = LCL_SCH_STEP / 256.0f; + cmp_linearBlockRefine(afUniqueValues, afValueRepeats, gbl_err, + CMP_REFINOUT min_r, + CMP_REFINOUT max_r, + m_step, min_bnd, max_bnd, + dwUniqueValues); + + min_ex = min_r; + max_ex = max_r; + max_ex *= 255.0f; + min_ex *= 255.0f; + + Ramp[0] = floor(min_ex + 0.5f); + Ramp[1] = floor(max_ex + 0.5f); + } + + // Ensure that the two endpoints are not the same + // This is legal but serves no need & can break some optimizations in the compressor + if (Ramp[0] == Ramp[1]) { + if (Ramp[1] < 255.f) + Ramp[1] = Ramp[1] + 1.0f; + else + if (Ramp[1] > 0.0f) + Ramp[1] = Ramp[1] - 1.0f; + } + + cmpMinMax.x = Ramp[0]; + cmpMinMax.y = Ramp[1]; + + return cmpMinMax; +} + +static CGU_Vec2ui cmp_getBlockPackedIndices( + CGU_Vec2f RampMinMax, + CGU_FLOAT alphaBlock[BLOCK_SIZE_4X4], + CMP_IN CGU_FLOAT fquality) +{ + CGU_UINT32 i; + CGU_UINT32 j; + CGU_Vec2ui cmpBlock = {0,0}; + CGU_UINT32 MinRampU; + CGU_UINT32 MaxRampU; + CGU_INT32 pcIndices[BLOCK_SIZE_4X4]; + + if (fquality < CMP_QUALITY2) + { + CGU_FLOAT Range; + CGU_FLOAT RampSteps; // segments into 0..7 sections + CGU_FLOAT Bias; + + if (RampMinMax.x != RampMinMax.y) + Range = RampMinMax.x - RampMinMax.y; + else + Range = 1.0f; + + RampSteps = 7.f / Range; // segments into 0..7 sections + Bias = -RampSteps * RampMinMax.y; + + for (i=0; i < 16; ++i) + { + pcIndices[i] = (CGU_UINT32)round(alphaBlock[i] * RampSteps + Bias); + if (i < 5) + { + pcIndices[i] += (pcIndices[i] > 0) - (7 * (pcIndices[i] == 7)); + } + else if (i > 5) + { + pcIndices[i] += (pcIndices[i] > 0) - (7 * (pcIndices[i] == 7 ? 1 : 0)); + } + else + { + pcIndices[i] += (pcIndices[i] > 0) - (7 * (pcIndices[i] == 7)); + } + } + + MinRampU = (CGU_UINT32 )round(RampMinMax.x*255.0f); + MaxRampU = (CGU_UINT32 )round(RampMinMax.y*255.0f); + + cmpBlock.x = (MinRampU << 8) | MaxRampU; + cmpBlock.y = 0; + for (i=0; i < 5; ++i) + { + cmpBlock.x |= (pcIndices[i] << (16 + (i*3))); + } + { + cmpBlock.x |= (pcIndices[5] << 31); + cmpBlock.y |= (pcIndices[5] >> 1); + } + for (i=6; i < BLOCK_SIZE_4X4; ++i) + { + cmpBlock.y |= (pcIndices[i] << (i*3 - 16)); + } + + } + else { + CGU_UINT32 epoint; + CGU_FLOAT alpha[BLOCK_SIZE_4X4]; + CGU_FLOAT OverIntFctr; + CGU_FLOAT shortest; + CGU_FLOAT adist; + + for(i = 0; i < BLOCK_SIZE_4X4; i++) + pcIndices[i] = 0; + + for (i = 0; i < MAX_POINTS; i++) alpha[i] = 0; + + // GetRmp1 + { + if (RampMinMax.x <= RampMinMax.y) { + CGU_FLOAT t = RampMinMax.x; + RampMinMax.x = RampMinMax.y; + RampMinMax.y = t; + } + + //============================= + // final clusterization applied + //============================= + CGU_FLOAT ramp[NUM_ENDPOINTS]; + + ramp[0] = RampMinMax.x; + ramp[1] = RampMinMax.y; + + { // BldRmp1 + alpha[0] = ramp[0]; + alpha[1] = ramp[1]; + for (epoint = 1; epoint < CMP_ALPHA_RAMP - 1; epoint++) + alpha[epoint + 1] = (alpha[0] * (CMP_ALPHA_RAMP - 1 - epoint) + alpha[1] * epoint) / (CGU_FLOAT)(CMP_ALPHA_RAMP - 1); + for (epoint = CMP_ALPHA_RAMP; epoint < BLOCK_SIZE_4X4; epoint++) alpha[epoint] = 100000.f; + } // BldRmp1 + + // FixedRamp + for (i = 0; i < CMP_ALPHA_RAMP; i++) { + alpha[i] = floor(alpha[i] + 0.5f); + } + }// GetRmp1 + + OverIntFctr = 1.f / 255.0f; + for (i = 0; i < CMP_ALPHA_RAMP; i++) + alpha[i] *= OverIntFctr; + + // For each colour in the original block, calculate its weighted + // distance from each point in the original and assign it + // to the closest cluster + for (i = 0; i < BLOCK_SIZE_4X4; i++) { + shortest = 10000000.f; + for (j = 0; j < CMP_ALPHA_RAMP; j++) { + adist = (alphaBlock[i] - alpha[j]); + adist *= adist; + if (adist < shortest) { + shortest = adist; + pcIndices[i] = j; + } + } + } + + //================================================== + // EncodeAlphaBlock + //================================================== + MinRampU = (CGU_UINT32 )RampMinMax.x; + MaxRampU = (CGU_UINT32 )RampMinMax.y; + + cmpBlock.x = (MaxRampU << 8) | MinRampU; + cmpBlock.y = 0; + for(i = 0; i < 5; i++) + { + cmpBlock.x |= (pcIndices[i]) << (16 + (i*3)); + } + { + cmpBlock.x |= (pcIndices[5] & 0x1) << 31; + cmpBlock.y |= (pcIndices[5] & 0x6) >> 1; + } + for(i = 6; i < BLOCK_SIZE_4X4; i++) + { + cmpBlock.y |= (pcIndices[i]) << (i*3 - 16); + } + } + return cmpBlock; +} + +static CGU_Vec2ui cmp_compressAlphaBlock( CMP_IN CGU_FLOAT alphaBlock[BLOCK_SIZE_4X4], + CMP_IN CGU_FLOAT fquality) +{ + CGU_Vec2f RampMinMax; + CGU_Vec2ui CmpBlock; + + RampMinMax = cmp_getLinearEndPoints(alphaBlock,fquality); + CmpBlock = cmp_getBlockPackedIndices(RampMinMax,alphaBlock,fquality); + return CmpBlock; +} + +static void cmp_getCompressedAlphaRamp(CGU_UINT8 alpha[8], + const CGU_UINT32 compressedBlock[2]) +{ + alpha[0] = (CGU_UINT8)(compressedBlock[0] & 0xff); + alpha[1] = (CGU_UINT8)((compressedBlock[0] >> 8) & 0xff); + + if (alpha[0] > alpha[1]) { + // 8-alpha block: derive the other six alphas. + // Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated. +#ifdef ASPM_GPU + alpha[2] = (CGU_UINT8)((6 * alpha[0] + 1 * alpha[1] + 3) / 7); // bit code 010 + alpha[3] = (CGU_UINT8)((5 * alpha[0] + 2 * alpha[1] + 3) / 7); // bit code 011 + alpha[4] = (CGU_UINT8)((4 * alpha[0] + 3 * alpha[1] + 3) / 7); // bit code 100 + alpha[5] = (CGU_UINT8)((3 * alpha[0] + 4 * alpha[1] + 3) / 7); // bit code 101 + alpha[6] = (CGU_UINT8)((2 * alpha[0] + 5 * alpha[1] + 3) / 7); // bit code 110 + alpha[7] = (CGU_UINT8)((1 * alpha[0] + 6 * alpha[1] + 3) / 7); // bit code 111 +#else + alpha[2] = static_cast((6 * alpha[0] + 1 * alpha[1] + 3) / 7); // bit code 010 + alpha[3] = static_cast((5 * alpha[0] + 2 * alpha[1] + 3) / 7); // bit code 011 + alpha[4] = static_cast((4 * alpha[0] + 3 * alpha[1] + 3) / 7); // bit code 100 + alpha[5] = static_cast((3 * alpha[0] + 4 * alpha[1] + 3) / 7); // bit code 101 + alpha[6] = static_cast((2 * alpha[0] + 5 * alpha[1] + 3) / 7); // bit code 110 + alpha[7] = static_cast((1 * alpha[0] + 6 * alpha[1] + 3) / 7); // bit code 111 +#endif + } else { + // 6-alpha block. + // Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated. +#ifdef ASPM_GPU + alpha[2] = (CGU_UINT8)((4 * alpha[0] + 1 * alpha[1] + 2) / 5); // Bit code 010 + alpha[3] = (CGU_UINT8)((3 * alpha[0] + 2 * alpha[1] + 2) / 5); // Bit code 011 + alpha[4] = (CGU_UINT8)((2 * alpha[0] + 3 * alpha[1] + 2) / 5); // Bit code 100 + alpha[5] = (CGU_UINT8)((1 * alpha[0] + 4 * alpha[1] + 2) / 5); // Bit code 101 +#else + alpha[2] = static_cast((4 * alpha[0] + 1 * alpha[1] + 2) / 5); // Bit code 010 + alpha[3] = static_cast((3 * alpha[0] + 2 * alpha[1] + 2) / 5); // Bit code 011 + alpha[4] = static_cast((2 * alpha[0] + 3 * alpha[1] + 2) / 5); // Bit code 100 + alpha[5] = static_cast((1 * alpha[0] + 4 * alpha[1] + 2) / 5); // Bit code 101 +#endif + alpha[6] = 0; // Bit code 110 + alpha[7] = 255; // Bit code 111 + } +} + +static void cmp_decompressAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], + const CGU_UINT32 compressedBlock[2]) +{ + CGU_UINT32 i; + CGU_UINT8 alpha[8]; + cmp_getCompressedAlphaRamp(alpha, compressedBlock); + + for (i = 0; i < BLOCK_SIZE_4X4; i++) { + CGU_UINT32 index; + if (i < 5) + index = (compressedBlock[0] & (0x7 << (16 + (i * 3)))) >> (16 + (i * 3)); + else if (i > 5) + index = (compressedBlock[1] & (0x7 << (2 + (i - 6) * 3))) >> (2 + (i - 6) * 3); + else { + index = (compressedBlock[0] & 0x80000000) >> 31; + index |= (compressedBlock[1] & 0x3) << 1; + } + + alphaBlock[i] = alpha[index]; + } +} + + +static CGU_Vec3f cmp_565ToLinear(CGU_UINT32 n565) +{ + CGU_UINT32 r0; + CGU_UINT32 g0; + CGU_UINT32 b0; + + r0 = ((n565 & 0xf800) >> 8); + g0 = ((n565 & 0x07e0) >> 3); + b0 = ((n565 & 0x001f) << 3); + + // Apply the lower bit replication to give full dynamic range (5,6,5) + r0 += (r0 >> 5); + g0 += (g0 >> 6); + b0 += (b0 >> 5); + + CGU_Vec3f LinearColor; + LinearColor.x = (CGU_FLOAT)r0; + LinearColor.y = (CGU_FLOAT)g0; + LinearColor.z = (CGU_FLOAT)b0; + + return LinearColor; +} + +static void cmp_ProcessColors(CMP_INOUT CGU_Vec3f CMP_PTRINOUT colorMin, + CMP_INOUT CGU_Vec3f CMP_PTRINOUT colorMax, + CMP_INOUT CGU_UINT32 CMP_PTRINOUT c0, + CMP_INOUT CGU_UINT32 CMP_PTRINOUT c1, + CGU_INT setopt, + CGU_BOOL isSRGB) +{ + // CGU_UINT32 srbMap[32] = {0,5,8,11,12,13,14,15,16,17,18,19,20,21,22,23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31}; + // CGU_UINT32 sgMap[64] = {0,10,14,16,19,20,22,24,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,42,43,43,44,45,45, + // 46,47,47,48,48,49,50,50,51,52,52,53,53,54,54,55,55,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63}; + CGU_INT32 x,y,z; + CGU_Vec3f scale = {31.0f, 63.0f, 31.0f}; + CGU_Vec3f MinColorScaled; + CGU_Vec3f MaxColorScaled; + + // Clamp or Transform is needed, the transforms have built in clamps + if (isSRGB) { + MinColorScaled = cmp_linearToSrgb(CMP_PTRINOUT colorMin); + MaxColorScaled = cmp_linearToSrgb(CMP_PTRINOUT colorMax); + } + else { + MinColorScaled = cmp_clamp3f(CMP_PTRINOUT colorMin,0.0f,1.0f); + MaxColorScaled = cmp_clamp3f(CMP_PTRINOUT colorMax,0.0f,1.0f); + } + + switch (setopt) + { + case 0 : // Use Min Max processing + MinColorScaled = floor(MinColorScaled * scale); + MaxColorScaled = ceil (MaxColorScaled * scale); + CMP_PTRINOUT colorMin = MinColorScaled / scale; + CMP_PTRINOUT colorMax = MaxColorScaled / scale; + break; + default : // Use round processing + MinColorScaled = round (MinColorScaled * scale); + MaxColorScaled = round (MaxColorScaled * scale); + break; + } + + x = (CGU_UINT32)(MinColorScaled.x); + y = (CGU_UINT32)(MinColorScaled.y); + z = (CGU_UINT32)(MinColorScaled.z); + + //if (isSRGB) { + // // scale RB + // x = srbMap[x]; // &0x1F]; + // y = sgMap [y]; // &0x3F]; + // z = srbMap[z]; // &0x1F]; + // // scale G + //} + CMP_PTRINOUT c0 = (x << 11) | (y << 5) | z; + + x = (CGU_UINT32)(MaxColorScaled.x); + y = (CGU_UINT32)(MaxColorScaled.y); + z = (CGU_UINT32)(MaxColorScaled.z); + CMP_PTRINOUT c1 = (x << 11) | (y << 5) | z; +} + +#ifndef ASPM_GPU // Used by BC1, BC2 & BC3 //---------------------------------------------------- // This function decompresses a DXT colour block // The block is decompressed to 8 bits per channel -// Result buffer is RGBA format +// Result buffer is RGBA format, A is set to 255 //---------------------------------------------------- -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -#ifndef ASPM_GPU -static void DecompressDXTRGB_Internal(CGU_UINT8 rgbBlock[BLOCK_SIZE_4X4X4], - const CGU_UINT32 compressedBlock[2], - const CMP_BC15Options *BC15options) { +static void cmp_decompressDXTRGBA_Internal(CGU_UINT8 rgbBlock[BLOCK_SIZE_4X4X4], + const CGU_Vec2ui compressedBlock, + const CGU_BOOL mapDecodeRGBA) { + CGU_BOOL bDXT1 = TRUE; - CGU_UINT32 n0 = compressedBlock[0] & 0xffff; - CGU_UINT32 n1 = compressedBlock[0] >> 16; + CGU_UINT32 n0 = compressedBlock.x & 0xffff; + CGU_UINT32 n1 = compressedBlock.x >> 16; CGU_UINT32 r0; CGU_UINT32 g0; CGU_UINT32 b0; @@ -969,7 +1179,7 @@ static void DecompressDXTRGB_Internal(CGU_UINT8 rgbBlock[BLOCK_SIZE_4X4X4], b0 += (b0 >> 5); b1 += (b1 >> 5); -if (!BC15options->m_mapDecodeRGBA) +if (!mapDecodeRGBA) { //-------------------------------------------------------------- // Channel mapping output as BGRA @@ -979,12 +1189,12 @@ if (!BC15options->m_mapDecodeRGBA) if(!bDXT1 || n0 > n1) { - CGU_UINT32 c2 = 0xff000000 | (((2*r0+r1+1)/3)<<16) | (((2*g0+g1+1)/3)<<8) | (((2*b0+b1+1)/3)); - CGU_UINT32 c3 = 0xff000000 | (((2*r1+r0+1)/3)<<16) | (((2*g1+g0+1)/3)<<8) | (((2*b1+b0+1)/3)); + CGU_UINT32 c2 = 0xff000000 | (((2*r0+r1)/3)<<16) | (((2*g0+g1)/3)<<8) | (((2*b0+b1)/3)); + CGU_UINT32 c3 = 0xff000000 | (((2*r1+r0)/3)<<16) | (((2*g1+g0)/3)<<8) | (((2*b1+b0)/3)); for(int i=0; i<16; i++) { - int index = (compressedBlock[1] >> (2 * i)) & 3; + int index = (compressedBlock.y >> (2 * i)) & 3; switch(index) { @@ -1010,7 +1220,7 @@ if (!BC15options->m_mapDecodeRGBA) for(int i=0; i<16; i++) { - int index = (compressedBlock[1] >> (2 * i)) & 3; + int index = (compressedBlock.y >> (2 * i)) & 3; switch(index) { @@ -1032,20 +1242,18 @@ if (!BC15options->m_mapDecodeRGBA) } else { // MAP_BC15_TO_ABGR //-------------------------------------------------------------- - // Channel mapping output as ARGB + // Channel mapping output as RGBA //-------------------------------------------------------------- CGU_UINT32 c0 = 0xff000000 | (b0 << 16) | (g0 << 8) | r0; CGU_UINT32 c1 = 0xff000000 | (b1 << 16) | (g1 << 8) | r1; if (!bDXT1 || n0 > n1) { - CGU_UINT32 c2 = 0xff000000 | (((2 * b0 + b1 + 1) / 3) << 16) | - (((2 * g0 + g1 + 1) / 3) << 8) | (((2 * r0 + r1 + 1) / 3)); - CGU_UINT32 c3 = 0xff000000 | (((2 * b1 + b0 + 1) / 3) << 16) | - (((2 * g1 + g0 + 1) / 3) << 8) | (((2 * r1 + r0 + 1) / 3)); + CGU_UINT32 c2 = 0xff000000 | (((2 * b0 + b1 + 1) / 3) << 16) | (((2 * g0 + g1 + 1) / 3) << 8) | (((2 * r0 + r1 + 1) / 3)); + CGU_UINT32 c3 = 0xff000000 | (((2 * b1 + b0 + 1) / 3) << 16) | (((2 * g1 + g0 + 1) / 3) << 8) | (((2 * r1 + r0 + 1) / 3)); for (int i = 0; i < 16; i++) { - int index = (compressedBlock[1] >> (2 * i)) & 3; + int index = (compressedBlock.y >> (2 * i)) & 3; switch (index) { case 0: ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c0; @@ -1063,11 +1271,10 @@ else { // MAP_BC15_TO_ABGR } } else { // Transparent decode - CGU_UINT32 c2 = 0xff000000 | (((b0 + b1) / 2) << 16) | - (((g0 + g1) / 2) << 8) | (((r0 + r1) / 2)); + CGU_UINT32 c2 = 0xff000000 | (((b0 + b1) / 2) << 16) | (((g0 + g1) / 2) << 8) | (((r0 + r1) / 2)); for (int i = 0; i < 16; i++) { - int index = (compressedBlock[1] >> (2 * i)) & 3; + int index = (compressedBlock.y >> (2 * i)) & 3; switch (index) { case 0: ((CMP_GLOBAL CGU_UINT32 *)rgbBlock)[i] = c0; @@ -1087,246 +1294,605 @@ else { // MAP_BC15_TO_ABGR } //MAP_ABGR } #endif // !ASPM_GPU -#endif // !BC5 -#endif // !BC4 -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -static int QSortIntCmp(const void *Elem1, const void *Elem2) { - return (*(CGU_INT32 *)Elem1 - *(CGU_INT32 *)Elem2); +//-------------------------------------------------------------------------------------------------------- +// Decompress is RGB (0.0f..255.0f) +//-------------------------------------------------------------------------------------------------------- +static void cmp_decompressRGBBlock(CMP_INOUT CGU_Vec3f rgbBlock[BLOCK_SIZE_4X4], + const CGU_Vec2ui compressedBlock) { + CGU_UINT32 n0 = compressedBlock.x & 0xffff; + CGU_UINT32 n1 = compressedBlock.x >> 16; + CGU_UINT32 index; + + //------------------------------------------------------- + // Decode the compressed block 0..255 color range + //------------------------------------------------------- + CGU_Vec3f c0 = cmp_565ToLinear(n0); // max color + CGU_Vec3f c1 = cmp_565ToLinear(n1); // min color + CGU_Vec3f c2; + CGU_Vec3f c3; + + if (n0 > n1) { + c2 = (c0*2.0f + c1) / 3.0f; + c3 = (c1*2.0f + c0) / 3.0f; + + for (CGU_UINT32 i = 0; i < 16; i++) { + index = (compressedBlock.y >> (2 * i)) & 3; + switch (index) { + case 0: + rgbBlock[i] = c0; + break; + case 1: + rgbBlock[i] = c1; + break; + case 2: + rgbBlock[i] = c2; + break; + case 3: + rgbBlock[i] = c3; + break; + } + } + } + else { + // Transparent decode + c2 = (c0 + c1) / 2.0f; + + for (CGU_UINT32 i = 0; i < 16; i++) { + index = (compressedBlock.y >> (2 * i)) & 3; + switch (index) { + case 0: + rgbBlock[i] = c0; + break; + case 1: + rgbBlock[i] = c1; + break; + case 2: + rgbBlock[i] = c2; + break; + case 3: + rgbBlock[i] = 0.0f; + break; + } + } + } } -#endif // !BC5 -#endif // !BC4 -// Find the first approximation of the line -// Assume there is a linear relation -// Z = a * X_In -// Z = b * Y_In -// Find a,b to minimize MSE between Z and Z_In -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) - -static void FindAxis(CGU_FLOAT _outBlk[MAX_BLOCK][NUM_CHANNELS], - CGU_FLOAT fLineDirection[NUM_CHANNELS], - CGU_FLOAT fBlockCenter[NUM_CHANNELS], CGU_BOOL *_pbSmall, - CGU_FLOAT _inpBlk[MAX_BLOCK][NUM_CHANNELS], - CGU_FLOAT _inpRpt[MAX_BLOCK], int nDimensions, - int nNumColors) { - CGU_FLOAT Crrl[NUM_CHANNELS]; - CGU_FLOAT RGB2[NUM_CHANNELS]; - - fLineDirection[0] = fLineDirection[1] = fLineDirection[2] = RGB2[0] = - RGB2[1] = RGB2[2] = Crrl[0] = Crrl[1] = Crrl[2] = fBlockCenter[0] = - fBlockCenter[1] = fBlockCenter[2] = 0.f; - - // sum position of all points - CGU_FLOAT fNumPoints = 0.f; - for (CGU_INT32 i = 0; i < nNumColors; i++) { - fBlockCenter[0] += _inpBlk[i][0] * _inpRpt[i]; - fBlockCenter[1] += _inpBlk[i][1] * _inpRpt[i]; - fBlockCenter[2] += _inpBlk[i][2] * _inpRpt[i]; - fNumPoints += _inpRpt[i]; +// The source is 0..1, decompressed data using cmp_decompressRGBBlock is 0..255 which is converted down to 0..1 +static float CMP_RGBBlockError( const CGU_Vec3f src_rgbBlock[BLOCK_SIZE_4X4], + const CGU_Vec2ui compressedBlock, + CGU_BOOL isSRGB + ) +{ + CGU_Vec3f rgbBlock[BLOCK_SIZE_4X4]; + + // Decompressed block channels are 0..255 + cmp_decompressRGBBlock(rgbBlock,compressedBlock); + + //------------------------------------------------------------------ + // Calculate MSE of the block + // Note : pow is used as Float type for the code to be usable on CPU + //------------------------------------------------------------------ + CGU_Vec3f serr; + serr = 0.0f; + + float sR,sG,sB,R,G,B; + + for (int j = 0; j<16; j++) + { + if (isSRGB) + { + sR = round(cmp_linearToSrgbf(src_rgbBlock[j].x)*255.0f); + sG = round(cmp_linearToSrgbf(src_rgbBlock[j].y)*255.0f); + sB = round(cmp_linearToSrgbf(src_rgbBlock[j].z)*255.0f); + } + else { + sR = round(src_rgbBlock[j].x*255.0f); + sG = round(src_rgbBlock[j].y*255.0f); + sB = round(src_rgbBlock[j].z*255.0f); + } + + rgbBlock[j] = rgbBlock[j]; + + R = rgbBlock[j].x; + G = rgbBlock[j].y; + B = rgbBlock[j].z; + + // Norm colors + serr.x += pow(sR - R,2.0f); + serr.y += pow(sG - G,2.0f); + serr.z += pow(sB - B,2.0f); } - // and then average to calculate center coordinate of block - fBlockCenter[0] /= fNumPoints; - fBlockCenter[1] /= fNumPoints; - fBlockCenter[2] /= fNumPoints; - - for (CGU_INT32 i = 0; i < nNumColors; i++) { - // calculate output block as offsets around block center - _outBlk[i][0] = _inpBlk[i][0] - fBlockCenter[0]; - _outBlk[i][1] = _inpBlk[i][1] - fBlockCenter[1]; - _outBlk[i][2] = _inpBlk[i][2] - fBlockCenter[2]; - - // compute correlation matrix - // RGB2 = sum of ((distance from point from center) squared) - // Crrl = ???????. Seems to be be some calculation based on distance from - // point center in two dimensions - for (CGU_INT32 j = 0; j < nDimensions; j++) { - RGB2[j] += _outBlk[i][j] * _outBlk[i][j] * _inpRpt[i]; - Crrl[j] += _outBlk[i][j] * _outBlk[i][(j + 1) % 3] * _inpRpt[i]; + // MSE for 16 texels + return (serr.x + serr.y + serr.z) / 48.0f; +} + + +// Processing input source 0..1.0f) +static CGU_Vec2ui CompressRGBBlock_FM(const CGU_Vec3f rgbBlockUVf[16], + CMP_IN CGU_FLOAT fquality, + CGU_BOOL isSRGB, + CMP_INOUT CGU_FLOAT CMP_PTRINOUT errout) +{ + CGU_Vec3f axisVectorRGB = {0.0f,0.0f,0.0f};// The axis vector for index projection + CGU_FLOAT pos_on_axis[16]; // The distance each unique falls along the compression axis + CGU_FLOAT axisleft = 0; // The extremities and centre (average of left/right) of srcRGB along the compression axis + CGU_FLOAT axisright = 0; // The extremities and centre (average of left/right) of srcRGB along the compression axis + CGU_FLOAT axiscentre= 0; // The extremities and centre (average of left/right) of srcRGB along the compression axis + CGU_INT32 swap = 0; // Indicator if the RGB values need swapping to generate an opaque result + CGU_Vec3f average_rgb; // The centrepoint of the axis + CGU_Vec3f srcRGB[16]; // The list of source colors with blue channel altered + CGU_Vec3f srcBlock[16]; // The list of source colors with any color space transforms and clipping + CGU_Vec3f rgb; + CGU_UINT32 c0 = 0, c1 = 0; + CGU_Vec2ui compressedBlock = {0,0}; + CGU_FLOAT Q1CompErr; + CGU_Vec2ui Q1CompData; + + // ------------------------------------------------------------------------------------- + // (1) Find the array of unique pixel values and sum them to find their average position + // ------------------------------------------------------------------------------------- + { + CGU_FLOAT errLQ = 0.0f; + CGU_BOOL fastProcess = (fquality <= CMP_QUALITY1); + CGU_Vec3f srcMin = 1.0f; // Min source color + CGU_Vec3f srcMax = 0.0f; // Max source color + CGU_Vec2ui Q1compressedBlock = {0,0}; + + average_rgb = 0.0f; + // Get average and modifed src + // find average position and save list of pixels as 0F..255F range for processing + // Note: z (blue) is average of blue+green channels + for (CGU_INT32 i = 0; i (R+G)/2 + srcRGB[i] = rgb; + average_rgb = average_rgb + rgb; + } + } + + + // Process two colors for saving in 565 format as C0 and C1 + cmp_ProcessColors(CMP_REFINOUT srcMin,CMP_REFINOUT srcMax,CMP_REFINOUT c0, CMP_REFINOUT c1,isSRGB?1:0, isSRGB); + + // Save simple min-max encoding + if (c0 < c1) { + Q1CompData.x = (c0 << 16) | c1; + CGU_UINT32 index; + errLQ = cmp_getIndicesRGB(CMP_REFINOUT index,rgbBlockUVf, srcMin, srcMax,false); + Q1CompData.y = index; + CMP_PTRINOUT errout = errLQ; + } + else { // Most simple case all colors are equal or 0.0f + Q1compressedBlock.x = (c1 << 16) | c0; + Q1compressedBlock.y = 0; + CMP_PTRINOUT errout = 0.0f; + return Q1compressedBlock; + } + + if (fastProcess) + return Q1CompData; + + // 0.0625F is (1/BLOCK_SIZE_4X4) + average_rgb = average_rgb * 0.0625F; + } - } - // if set's diameter is small - int i0 = 0, i1 = 1; - CGU_FLOAT mxRGB2 = 0.f; - int k = 0, j = 0; - CGU_FLOAT fEPS = fNumPoints * EPS; - for (k = 0, j = 0; j < 3; j++) { - if (RGB2[j] >= fEPS) - k++; - else - RGB2[j] = 0.f; + // ------------------------------------------------------------------------------------- + // (4) For each component, reflect points about the average so all lie on the same side + // of the average, and compute the new average - this gives a second point that defines the axis + // To compute the sign of the axis sum the positive differences of G for each of R and B (the + // G axis is always positive in this implementation + // ------------------------------------------------------------------------------------- + // An interesting situation occurs if the G axis contains no information, in which case the RB + // axis is also compared. I am not entirely sure if this is the correct implementation - should + // the priority axis be determined by magnitude? + { + + CGU_FLOAT rg_pos = 0.0f; + CGU_FLOAT bg_pos = 0.0f; + CGU_FLOAT rb_pos = 0.0f; + + for (CGU_INT32 i = 0; i < BLOCK_SIZE_4X4; i++) + { + rgb = srcRGB[i] - average_rgb; + axisVectorRGB = axisVectorRGB + fabs(rgb); + if (rgb.x > 0) { + rg_pos += rgb.y; + rb_pos += rgb.z; + } + if (rgb.z > 0) bg_pos += rgb.y; + } + + // Average over BLOCK_SIZE_4X4 + axisVectorRGB = axisVectorRGB*0.0625F; - if (mxRGB2 < RGB2[j]) { - mxRGB2 = RGB2[j]; - i0 = j; + // New average position + if (rg_pos < 0) axisVectorRGB.x = -axisVectorRGB.x; + if (bg_pos < 0) axisVectorRGB.z = -axisVectorRGB.z; + if ((rg_pos == bg_pos) && (rg_pos == 0)) + { + if (rb_pos < 0) axisVectorRGB.z = -axisVectorRGB.z; + } } - } - CGU_FLOAT fEPS2 = fNumPoints * EPS2; - *_pbSmall = TRUE; - for (j = 0; j < 3; j++) *_pbSmall &= (RGB2[j] < fEPS2); + // ------------------------------------------------------------------------------------- + // (5) Axis projection and remapping + // ------------------------------------------------------------------------------------- + { + CGU_FLOAT v2_recip; + // Normalize the axis for simplicity of future calculation + v2_recip = dot(axisVectorRGB,axisVectorRGB); + if (v2_recip > 0) + v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip); + else + v2_recip = 1.0f; + axisVectorRGB = axisVectorRGB*v2_recip; + } - if (*_pbSmall) // all are very small to avoid division on the small - // determinant - return; + // ------------------------------------------------------------------------------------- + // (6) Map the axis + // ------------------------------------------------------------------------------------- + // the line joining (and extended on either side of) average and axis + // defines the axis onto which the points will be projected + // Project all the points onto the axis, calculate the distance along + // the axis from the centre of the axis (average) + // From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is + // P + ((R-P).v) / (v.v))v + // The distance along v is therefore (R-P).v / (v.v) where (v.v) is 1 if v is a unit vector. + // + // Calculate the extremities at the same time - these need to be reasonably accurately + // represented in all cases + { + axisleft = CMP_FLOAT_MAX; + axisright = -CMP_FLOAT_MAX; + for (CGU_INT32 i = 0; i < BLOCK_SIZE_4X4; i++) + { + // Compute the distance along the axis of the point of closest approach + CGU_Vec3f temp = (srcRGB[i] - average_rgb); + pos_on_axis[i] = dot(temp,axisVectorRGB); + + // Work out the extremities + if (pos_on_axis[i] < axisleft) + axisleft = pos_on_axis[i]; + if (pos_on_axis[i] > axisright) + axisright = pos_on_axis[i]; + } + } - if (k == 1) // really only 1 dimension - fLineDirection[i0] = 1.; - else if (k == 2) // really only 2 dimensions - { - i1 = (RGB2[(i0 + 1) % 3] > 0.f) ? (i0 + 1) % 3 : (i0 + 2) % 3; - CGU_FLOAT Crl = (i1 == (i0 + 1) % 3) ? Crrl[i0] : Crrl[(i0 + 2) % 3]; - fLineDirection[i1] = Crl / RGB2[i0]; - fLineDirection[i0] = 1.; - } else { - CGU_FLOAT maxDet = 100000.f; - CGU_FLOAT Cs[3]; - // select max det for precision - for (j = 0; j < nDimensions; j++) { - CGU_FLOAT Det = RGB2[j] * RGB2[(j + 1) % 3] - Crrl[j] * Crrl[j]; - Cs[j] = fabs(Crrl[j] / sqrt(RGB2[j] * RGB2[(j + 1) % 3])); - if (maxDet < Det) { - maxDet = Det; - i0 = j; - } + // --------------------------------------------------------------------------------------------- + // (7) Now we have a good axis and the basic information about how the points are mapped to it + // Our initial guess is to represent the endpoints accurately, by moving the average + // to the centre and recalculating the point positions along the line + // --------------------------------------------------------------------------------------------- + { + axiscentre = (axisleft + axisright) * 0.5F; + average_rgb = average_rgb + (axisVectorRGB*axiscentre); + for (CGU_INT32 i = 0; i | -B A | - // -- -- -- -- - CGU_FLOAT mtrx1[2][2]; - CGU_FLOAT vc1[2]; - CGU_FLOAT vc[2]; - vc1[0] = Crrl[(i0 + 2) % 3]; - vc1[1] = Crrl[(i0 + 1) % 3]; - // C - mtrx1[0][0] = RGB2[(i0 + 1) % 3]; - // A - mtrx1[1][1] = RGB2[i0]; - // -B - mtrx1[1][0] = mtrx1[0][1] = -Crrl[i0]; - // find a solution - vc[0] = mtrx1[0][0] * vc1[0] + mtrx1[0][1] * vc1[1]; - vc[1] = mtrx1[1][0] * vc1[0] + mtrx1[1][1] * vc1[1]; - // normalize - vc[0] /= maxDet; - vc[1] /= maxDet; - // find a line direction vector - fLineDirection[i0] = 1.; - fLineDirection[(i0 + 1) % 3] = 1.; - fLineDirection[(i0 + 2) % 3] = vc[0] + vc[1]; - } + // ------------------------------------------------------------------------------------- + // (8) Calculate the high and low output colour values + // Involved in this is a rounding procedure which is undoubtedly slightly twitchy. A + // straight rounded average is not correct, as the decompressor 'unrounds' by replicating + // the top bits to the bottom. + // In order to take account of this process, we don't just apply a straight rounding correction, + // but base our rounding on the input value (a straight rounding is actually pretty good in terms of + // error measure, but creates a visual colour and/or brightness shift relative to the original image) + // The method used here is to apply a centre-biased rounding dependent on the input value, which was + // (mostly by experiment) found to give minimum MSE while preserving the visual characteristics of + // the image. + // rgb = (average_rgb + (left|right)*axisVectorRGB); + // ------------------------------------------------------------------------------------- + { + CGU_Vec3f MinColor, MaxColor; - // normalize direction vector - CGU_FLOAT Len = fLineDirection[0] * fLineDirection[0] + - fLineDirection[1] * fLineDirection[1] + - fLineDirection[2] * fLineDirection[2]; - Len = sqrt(Len); + MinColor = average_rgb + (axisVectorRGB * axisleft); + MaxColor = average_rgb + (axisVectorRGB * axisright); + MinColor.z = (MinColor.z*2)- MinColor.y; + MaxColor.z = (MaxColor.z*2)- MaxColor.y; - for (j = 0; j < 3; j++) - fLineDirection[j] = (Len > 0.f) ? fLineDirection[j] / Len : 0.f; -} -#endif // !BC5 -#endif // !BC4 - -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -static void CompressRGBBlockX( - CGU_FLOAT _RsltRmpPnts[NUM_CHANNELS][NUM_ENDPOINTS], - CGU_FLOAT _BlkIn[MAX_BLOCK][NUM_CHANNELS], CGU_FLOAT _Rpt[MAX_BLOCK], - int _UniqClrs, CGU_UINT8 dwNumPoints, CGU_BOOL b3DRefinement, - CGU_UINT8 nRefinementSteps, CMP_GLOBAL const CMP_BC15Options *BC15options, - CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, CGU_UINT8 nBlueBits) { - ALIGN_16 CGU_FLOAT Prj0[MAX_BLOCK]; - ALIGN_16 CGU_FLOAT Prj[MAX_BLOCK]; - ALIGN_16 CGU_FLOAT PrjErr[MAX_BLOCK]; - ALIGN_16 CGU_FLOAT LineDir[NUM_CHANNELS]; - ALIGN_16 CGU_FLOAT RmpIndxs[MAX_BLOCK]; - - CGU_FLOAT LineDirG[NUM_CHANNELS]; - CGU_FLOAT PosG[NUM_ENDPOINTS]; - CGU_FLOAT Blk[MAX_BLOCK][NUM_CHANNELS]; - CGU_FLOAT BlkSh[MAX_BLOCK][NUM_CHANNELS]; - CGU_FLOAT LineDir0[NUM_CHANNELS]; - CGU_FLOAT Mdl[NUM_CHANNELS]; + cmp_ProcessColors(CMP_REFINOUT MinColor,CMP_REFINOUT MaxColor,CMP_REFINOUT c0, CMP_REFINOUT c1,1,false); + + // Force to be a 4-colour opaque block - in which case, c0 is greater than c1 + swap = 0; + if (c0 < c1) + { + CGU_UINT32 t; + t = c0; + c0 = c1; + c1 = t; + swap = 1; + } + else if (c0 == c1) + { + // This block will always be encoded in 3-colour mode + // Need to ensure that only one of the two points gets used, + // avoiding accidentally setting some transparent pixels into the block + for (CGU_INT32 i = 0; i cn[1] Max Color = index 0, 2/3 offset =index 2, 1/3 offset = index 3, Min Color = index 1 + // CGU_Vec3f cn[4]; + // cn[0] = MaxColor; + // cn[1] = MinColor; + // cn[2] = cn[0]*2.0f/3.0f + cn[1]*1.0f/3.0f; + // cn[3] = cn[0]*1.0f/3.0f + cn[1]*2.0f/3.0f; + + for (CGU_INT32 i = 0; i average) are 0 and 1, while + // interpolants are 2 and 3 + if (fabs(pos_on_axis[i]) >= division) + index = 0; + else + index = 2; + // Positive is in the latter half of the block + if (pos_on_axis[i] >= axiscentre) + index += 1; + + index = index^swap; + // Set the output, taking swapping into account + compressedBlock.y |= (index << (2 * i)); + + // use err calc for use in higher quality code + //CompMinErr += dot(srcRGBRef[i] - cn[index],srcRGBRef[i] - cn[index]); + } - // down to [0., 1.] - for (i = 0; i < _UniqClrs; i++) - for (j = 0; j < 3; j++) Blk[i][j] = _BlkIn[i][j] / 255.f; + //CompMinErr = CompMinErr * 0.0208333f; - CGU_BOOL isDONE = FALSE; + CompMinErr = CMP_RGBBlockError(rgbBlockUVf,compressedBlock,isSRGB); + Q1CompErr = CMP_RGBBlockError(rgbBlockUVf,Q1CompData,isSRGB); - // as usual if not more then 2 different colors, we've done - if (_UniqClrs <= 2) { - for (j = 0; j < 3; j++) { - rsltC[j][0] = _BlkIn[0][j]; - rsltC[j][1] = _BlkIn[_UniqClrs - 1][j]; + if (CompMinErr > Q1CompErr) { + compressedBlock = Q1CompData; + CMP_PTRINOUT errout = Q1CompErr; + } + else + CMP_PTRINOUT errout = CompMinErr; + } } - isDONE = TRUE; + // done + + return compressedBlock; +} + +#ifndef CMP_USE_LOWQUALITY +static CMP_EndPoints CompressRGBBlock_Slow( + CGU_Vec3f BlkInBGRf_UV[BLOCK_SIZE_4X4], + CGU_FLOAT Rpt[BLOCK_SIZE_4X4], + CGU_UINT32 dwUniqueColors, + CGU_Vec3f channelWeightsBGR, + CGU_UINT32 m_nRefinementSteps + ) +{ + CMP_UNUSED(channelWeightsBGR); + CMP_UNUSED(m_nRefinementSteps); + ALIGN_16 CGU_FLOAT Prj0[BLOCK_SIZE_4X4]; + ALIGN_16 CGU_FLOAT Prj[BLOCK_SIZE_4X4]; + ALIGN_16 CGU_FLOAT PrjErr[BLOCK_SIZE_4X4]; + ALIGN_16 CGU_FLOAT RmpIndxs[BLOCK_SIZE_4X4]; + + CGU_Vec3f LineDirG; + CGU_Vec3f LineDir; + CGU_FLOAT LineDir0[NUM_CHANNELS]; + CGU_Vec3f BlkUV[BLOCK_SIZE_4X4]; + CGU_Vec3f BlkSh[BLOCK_SIZE_4X4]; + CGU_Vec3f Mdl; + + CGU_Vec3f rsltC0; + CGU_Vec3f rsltC1; + CGU_Vec3f PosG0 = {0.0f,0.0f,0.0f}; + CGU_Vec3f PosG1 = {0.0f,0.0f,0.0f}; + CGU_UINT32 i; + + for (i = 0; i < dwUniqueColors; i++) + { + BlkUV[i] = BlkInBGRf_UV[i]; } - if (!isDONE) { + // if not more then 2 different colors, we've done + if (dwUniqueColors <= 2) { + rsltC0 = BlkInBGRf_UV[0] * 255.0f; + rsltC1 = BlkInBGRf_UV[dwUniqueColors - 1] * 255.0f; + } + else { // This is our first attempt to find an axis we will go along. // The cumulation is done to find a line minimizing the MSE from the // input 3D points. - CGU_BOOL bSmall = TRUE; - FindAxis(BlkSh, LineDir0, Mdl, &bSmall, Blk, _Rpt, 3, _UniqClrs); // While trying to find the axis we found that the diameter of the input // set is quite small. Do not bother. - if (bSmall) { - for (j = 0; j < 3; j++) { - rsltC[j][0] = _BlkIn[0][j]; - rsltC[j][1] = _BlkIn[_UniqClrs - 1][j]; + + // FindAxisIsSmall(BlkSh, LineDir0, Mdl, Blk, Rpt,dwUniqueColors); + { + CGU_UINT32 ii; + CGU_UINT32 jj; + CGU_UINT32 kk; + + // These vars cannot be Vec3 as index to them are varying + CGU_FLOAT Crrl[NUM_CHANNELS]; + CGU_FLOAT RGB2[NUM_CHANNELS]; + + LineDir0[0] = LineDir0[1] = LineDir0[2] = + RGB2[0] = RGB2[1] = RGB2[2] = + Crrl[0] = Crrl[1] = Crrl[2] = + Mdl.x = Mdl.y = Mdl.z = 0.f; + + // sum position of all points + CGU_FLOAT fNumPoints = 0.0f; + for (ii = 0; ii < dwUniqueColors; ii++) { + Mdl.x += BlkUV[ii].x * Rpt[ii]; + Mdl.y += BlkUV[ii].y * Rpt[ii]; + Mdl.z += BlkUV[ii].z * Rpt[ii]; + fNumPoints += Rpt[ii]; } - isDONE = TRUE; - } - } - // GCC is being an awful being when it comes to goto-jumps. - // So please bear with this. - if (!isDONE) { + // and then average to calculate center coordinate of block + Mdl /= fNumPoints; + + for (ii = 0; ii < dwUniqueColors; ii++) { + // calculate output block as offsets around block center + BlkSh[ii] = BlkUV[ii] - Mdl; + + // compute correlation matrix + // RGB2 = sum of ((distance from point from center) squared) + RGB2[0] += BlkSh[ii].x * BlkSh[ii].x * Rpt[ii]; + RGB2[1] += BlkSh[ii].y * BlkSh[ii].y * Rpt[ii]; + RGB2[2] += BlkSh[ii].z * BlkSh[ii].z * Rpt[ii]; + + Crrl[0] += BlkSh[ii].x * BlkSh[ii].y * Rpt[ii]; + Crrl[1] += BlkSh[ii].y * BlkSh[ii].z * Rpt[ii]; + Crrl[2] += BlkSh[ii].z * BlkSh[ii].x * Rpt[ii]; + + } + + // if set's diameter is small + CGU_UINT32 i0 = 0, i1 = 1; + CGU_FLOAT mxRGB2 = 0.0f; + + CGU_FLOAT fEPS = fNumPoints * EPS; + for (kk = 0, jj = 0; jj < 3; jj++) { + if (RGB2[jj] >= fEPS) + kk++; + else + RGB2[jj] = 0.0f; + + if (mxRGB2 < RGB2[jj]) { + mxRGB2 = RGB2[jj]; + i0 = jj; + } + } + + CGU_FLOAT fEPS2 = fNumPoints * EPS2; + CGU_BOOL AxisIsSmall; + + AxisIsSmall = (RGB2[0] < fEPS2); + AxisIsSmall = AxisIsSmall && (RGB2[1] < fEPS2); + AxisIsSmall = AxisIsSmall && (RGB2[2] < fEPS2); + + // all are very small to avoid division on the small determinant + if (AxisIsSmall) { + rsltC0 = BlkInBGRf_UV[0]*255.0f; + rsltC1 = BlkInBGRf_UV[dwUniqueColors - 1]*255.0f; + } + else { // !AxisIsSmall + if (kk == 1) // really only 1 dimension + LineDir0[i0] = 1.; + else + if (kk == 2) // really only 2 dimensions + { + i1 = (RGB2[(i0 + 1) % 3] > 0.f) ? (i0 + 1) % 3 : (i0 + 2) % 3; + CGU_FLOAT Crl = (i1 == (i0 + 1) % 3) ? Crrl[i0] : Crrl[(i0 + 2) % 3]; + LineDir0[i1] = Crl / RGB2[i0]; + LineDir0[i0] = 1.; + } + else { + CGU_FLOAT maxDet = 100000.f; + CGU_FLOAT Cs[3]; + // select max det for precision + for (jj = 0; jj < 3; jj++) { // 3 = nDimensions + CGU_FLOAT Det = RGB2[jj] * RGB2[(jj + 1) % 3] - Crrl[jj] * Crrl[jj]; + Cs[jj] = fabs(Crrl[jj] / sqrt(RGB2[jj] * RGB2[(jj + 1) % 3])); + if (maxDet < Det) { + maxDet = Det; + i0 = jj; + } + } + + // inverse correl matrix + // -- -- -- -- + // | A B | | C -B | + // | B C | => | -B A | + // -- -- -- -- + CGU_FLOAT mtrx1[2][2]; + CGU_FLOAT vc1[2]; + CGU_FLOAT vc[2]; + vc1[0] = Crrl[(i0 + 2) % 3]; + vc1[1] = Crrl[(i0 + 1) % 3]; + // C + mtrx1[0][0] = RGB2[(i0 + 1) % 3]; + // A + mtrx1[1][1] = RGB2[i0]; + // -B + mtrx1[1][0] = mtrx1[0][1] = -Crrl[i0]; + // find a solution + vc[0] = mtrx1[0][0] * vc1[0] + mtrx1[0][1] * vc1[1]; + vc[1] = mtrx1[1][0] * vc1[0] + mtrx1[1][1] * vc1[1]; + // normalize + vc[0] /= maxDet; + vc[1] /= maxDet; + // find a line direction vector + LineDir0[i0] = 1.; + LineDir0[(i0 + 1) % 3] = 1.; + LineDir0[(i0 + 2) % 3] = vc[0] + vc[1]; + } + + // normalize direction vector + CGU_FLOAT Len = LineDir0[0] * LineDir0[0] + + LineDir0[1] * LineDir0[1] + + LineDir0[2] * LineDir0[2]; + Len = sqrt(Len); + + LineDir0[0] = (Len > 0.f) ? LineDir0[0] / Len : 0.0f; + LineDir0[1] = (Len > 0.f) ? LineDir0[1] / Len : 0.0f; + LineDir0[2] = (Len > 0.f) ? LineDir0[2] / Len : 0.0f; + } + } // FindAxisIsSmall + + // GCC is being an awful being when it comes to goto-jumps. + // So please bear with this. CGU_FLOAT ErrG = 10000000.f; - CGU_FLOAT PrjBnd[NUM_ENDPOINTS]; - ALIGN_16 CGU_FLOAT PreMRep[MAX_BLOCK]; - for (j = 0; j < 3; j++) LineDir[j] = LineDir0[j]; + CGU_FLOAT PrjBnd0; + CGU_FLOAT PrjBnd1; + ALIGN_16 CGU_FLOAT PreMRep[BLOCK_SIZE_4X4]; + + LineDir.x = LineDir0[0]; + LineDir.y = LineDir0[1]; + LineDir.z = LineDir0[2]; // Here is the main loop. // 1. Project input set on the axis in consideration. - // 2. Run 1 dimensional search (see scalar case) to find an (sub) optimal - // pair of end points. - // 3. Compute the vector of indexes (or clusters) for the current - // approximate ramp. + // 2. Run 1 dimensional search (see scalar case) to find an (sub) optimal pair of end points. + // 3. Compute the vector of indexes (or clusters) for the current approximate ramp. // 4. Present our color channels as 3 16DIM vectors. - // 5. Find closest approximation of each of 16DIM color vector with the - // projection of the 16DIM index vector. + // 5. Find closest approximation of each of 16DIM color vector with the projection of the 16DIM index vector. // 6. Plug the projections as a new directional vector for the axis. // 7. Goto 1. - - // D - is 16 dim "index" vector (or 16 DIM vector of indexes - {0, 1/3, - // 2/3, 0, ...,}, but shifted and normalized). Ci - is a 16 dim vector of - // color i. for each Ci find a scalar Ai such that (Ai * D - Ci) (Ai * D - // - Ci) -> min , i.e distance between vector AiD and C is min. You can - // think of D as a unit interval(vector) "clusterizer", and Ai is a scale - // you need to apply to the clusterizer to approximate the Ci vector - // instead of the unit vector. - + // D - is 16 dim "index" vector (or 16 DIM vector of indexes - {0, 1/3,2/3, 0, ...,}, but shifted and normalized). + // Ci - is a 16 dim vector of color i. for each Ci find a scalar Ai such that (Ai * D - Ci) (Ai * D - Ci) -> min , + // i.e distance between vector AiD and C is min. You can think of D as a unit interval(vector) "clusterizer", and Ai is a scale + // you need to apply to the clusterizer to approximate the Ci vector instead of the unit vector. // Solution is - // Ai = (D . Ci) / (D . D); . - is a dot product. - // in 3 dim space Ai(s) represent a line direction, along which // we again try to find (sub)optimal quantizer. - // That's what our for(;;) loop is about. for (;;) { // 1. Project input set on the axis in consideration. @@ -1336,1025 +1902,614 @@ static void CompressRGBBlockX( // The distance along v is therefore (R-P).v / (v.v) // (v.v) is 1 if v is a unit vector. // - PrjBnd[0] = 1000.; - PrjBnd[1] = -1000.; - for (i = 0; i < MAX_BLOCK; i++) + PrjBnd0 = 1000.0f; + PrjBnd1 = -1000.0f; + for (i = 0; i < BLOCK_SIZE_4X4; i++) Prj0[i] = Prj[i] = PrjErr[i] = PreMRep[i] = 0.f; - for (i = 0; i < _UniqClrs; i++) { - Prj0[i] = Prj[i] = BlkSh[i][0] * LineDir[0] + BlkSh[i][1] * LineDir[1] + - BlkSh[i][2] * LineDir[2]; - - PrjErr[i] = (BlkSh[i][0] - LineDir[0] * Prj[i]) * - (BlkSh[i][0] - LineDir[0] * Prj[i]) + - (BlkSh[i][1] - LineDir[1] * Prj[i]) * - (BlkSh[i][1] - LineDir[1] * Prj[i]) + - (BlkSh[i][2] - LineDir[2] * Prj[i]) * - (BlkSh[i][2] - LineDir[2] * Prj[i]); - - PrjBnd[0] = minf(PrjBnd[0], Prj[i]); - PrjBnd[1] = maxf(PrjBnd[1], Prj[i]); + for (i = 0; i < dwUniqueColors; i++) { + Prj0[i] = Prj[i] = dot(BlkSh[i],LineDir); + PrjErr[i] = dot(BlkSh[i]-LineDir* Prj[i],BlkSh[i]-LineDir*Prj[i]); + PrjBnd0 = min(PrjBnd0, Prj[i]); + PrjBnd1 = max(PrjBnd1, Prj[i]); } // 2. Run 1 dimensional search (see scalar case) to find an (sub) optimal // pair of end points. // min and max of the search interval - CGU_FLOAT Scl[NUM_ENDPOINTS]; - Scl[0] = PrjBnd[0] - (PrjBnd[1] - PrjBnd[0]) * 0.125f; - ; - Scl[1] = PrjBnd[1] + (PrjBnd[1] - PrjBnd[0]) * 0.125f; - ; + CGU_FLOAT Scl0; + CGU_FLOAT Scl1; + Scl0 = PrjBnd0 - (PrjBnd1 - PrjBnd0) * 0.125f; + Scl1 = PrjBnd1 + (PrjBnd1 - PrjBnd0) * 0.125f; // compute scaling factor to scale down the search interval to [0.,1] - const CGU_FLOAT Scl2 = (Scl[1] - Scl[0]) * (Scl[1] - Scl[0]); - const CGU_FLOAT overScl = 1.f / (Scl[1] - Scl[0]); + const CGU_FLOAT Scl2 = (Scl1 - Scl0) * (Scl1 - Scl0); + const CGU_FLOAT overScl = 1.f / (Scl1 - Scl0); - for (i = 0; i < _UniqClrs; i++) { + for (i = 0; i < dwUniqueColors; i++) { // scale them - Prj[i] = (Prj[i] - Scl[0]) * overScl; - // premultiply the scale squire to plug into error computation later - PreMRep[i] = _Rpt[i] * Scl2; + Prj[i] = (Prj[i] - Scl0) * overScl; + // premultiply the scale square to plug into error computation later + PreMRep[i] = Rpt[i] * Scl2; } // scale first approximation of end points - for (k = 0; k < 2; k++) PrjBnd[k] = (PrjBnd[k] - Scl[0]) * overScl; + PrjBnd0 = (PrjBnd0 - Scl0) * overScl; + PrjBnd1 = (PrjBnd1 - Scl0) * overScl; - CGU_FLOAT Err = MAX_ERROR; + CGU_FLOAT StepErr = MAX_ERROR; // search step - CGU_FLOAT stp = 0.025f; + CGU_FLOAT searchStep = 0.025f; // low Start/End; high Start/End - const CGU_FLOAT lS = - (PrjBnd[0] - 2.f * stp > 0.f) ? PrjBnd[0] - 2.f * stp : 0.f; - const CGU_FLOAT hE = - (PrjBnd[1] + 2.f * stp < 1.f) ? PrjBnd[1] + 2.f * stp : 1.f; + const CGU_FLOAT lowStartEnd = (PrjBnd0 - 2.f * searchStep > 0.f) ? PrjBnd0 - 2.f * searchStep : 0.f; + const CGU_FLOAT highStartEnd = (PrjBnd1 + 2.f * searchStep < 1.f) ? PrjBnd1 + 2.f * searchStep : 1.f; // find the best endpoints - CGU_FLOAT Pos[NUM_ENDPOINTS]; - CGU_FLOAT lP, hP; + CGU_FLOAT Pos0 = 0; + CGU_FLOAT Pos1 = 0; + CGU_FLOAT lowPosStep, highPosStep; + CGU_FLOAT err; + int l, h; - for (l = 0, lP = lS; l < 8; l++, lP += stp) { - for (h = 0, hP = hE; h < 8; h++, hP -= stp) { - CGU_FLOAT err = Err; - // compute an error for the current pair of end points. - err = RampSrchW(Prj, PrjErr, PreMRep, err, lP, hP, _UniqClrs, - dwNumPoints); - - if (err < Err) { - // save better result - Err = err; - Pos[0] = lP; - Pos[1] = hP; - } - } + for (l = 0, lowPosStep = lowStartEnd; l < 8; l++, lowPosStep += searchStep) { + for (h = 0, highPosStep = highStartEnd; h < 8; h++, highPosStep -= searchStep) { + // compute an error for the current pair of end points. + err = cmp_getRampErr(Prj, PrjErr, PreMRep, StepErr, lowPosStep, highPosStep, dwUniqueColors); + + if (err < StepErr) { + // save better result + StepErr = err; + Pos0 = lowPosStep; + Pos1 = highPosStep; + } + } } // inverse the scaling - for (k = 0; k < 2; k++) Pos[k] = Pos[k] * (Scl[1] - Scl[0]) + Scl[0]; + Pos0 = Pos0 * (Scl1 - Scl0) + Scl0; + Pos1 = Pos1 * (Scl1 - Scl0) + Scl0; // did we find somthing better from the previous run? - if (Err + 0.001 < ErrG) { + if (StepErr + 0.001 < ErrG) { // yes, remember it - ErrG = Err; - LineDirG[0] = LineDir[0]; - LineDirG[1] = LineDir[1]; - LineDirG[2] = LineDir[2]; - PosG[0] = Pos[0]; - PosG[1] = Pos[1]; + ErrG = StepErr; + LineDirG = LineDir; + + PosG0.x = Pos0; + PosG0.y = Pos0; + PosG0.z = Pos0; + PosG1.x = Pos1; + PosG1.y = Pos1; + PosG1.z = Pos1; + // 3. Compute the vector of indexes (or clusters) for the current // approximate ramp. // indexes - const CGU_FLOAT step = (Pos[1] - Pos[0]) / (CGU_FLOAT)(dwNumPoints - 1); + const CGU_FLOAT step = (Pos1 - Pos0) / 3.0f; // (dwNumChannels=4 - 1); const CGU_FLOAT step_h = step * (CGU_FLOAT)0.5; const CGU_FLOAT rstep = (CGU_FLOAT)1.0f / step; - const CGU_FLOAT overBlkTp = 1.f / (CGU_FLOAT)(dwNumPoints - 1); + const CGU_FLOAT overBlkTp = 1.f / 3.0f; // (dwNumChannels=4 - 1); // here the index vector is computed, // shifted and normalized - CGU_FLOAT indxAvrg = (CGU_FLOAT)(dwNumPoints - 1) / 2.f; + CGU_FLOAT indxAvrg = 3.0f / 2.0f; // (dwNumChannels=4 - 1); - for (i = 0; i < _UniqClrs; i++) { + for (i = 0; i < dwUniqueColors; i++) { CGU_FLOAT del; // CGU_UINT32 n = (CGU_UINT32)((b - _min_ex + (step*0.5f)) * rstep); - if ((del = Prj0[i] - Pos[0]) <= 0) + if ((del = Prj0[i] - Pos0) <= 0) RmpIndxs[i] = 0.f; - else if (Prj0[i] - Pos[1] >= 0) - RmpIndxs[i] = (CGU_FLOAT)(dwNumPoints - 1); + else if (Prj0[i] - Pos1 >= 0) + RmpIndxs[i] = 3.0f; // (dwNumChannels=4 - 1); else RmpIndxs[i] = floor((del + step_h) * rstep); // shift and normalization RmpIndxs[i] = (RmpIndxs[i] - indxAvrg) * overBlkTp; } - // 4. Present our color channels as 3 16DIM vectors. + // 4. Present our color channels as 3 16 DIM vectors. // 5. Find closest aproximation of each of 16DIM color vector with the // pojection of the 16DIM index vector. - CGU_FLOAT Crs[3], Len, Len2; - for (i = 0, Crs[0] = Crs[1] = Crs[2] = Len = 0.f; i < _UniqClrs; i++) { - const CGU_FLOAT PreMlt = RmpIndxs[i] * _Rpt[i]; + CGU_Vec3f Crs = {0.0f,0.0f,0.0f}; + CGU_FLOAT Len = 0.0f; + + for (i = 0; i < dwUniqueColors; i++) { + const CGU_FLOAT PreMlt = RmpIndxs[i] * Rpt[i]; Len += RmpIndxs[i] * PreMlt; - for (j = 0; j < 3; j++) Crs[j] += BlkSh[i][j] * PreMlt; + Crs.x += BlkSh[i].x * PreMlt; + Crs.y += BlkSh[i].y * PreMlt; + Crs.z += BlkSh[i].z * PreMlt; } - LineDir[0] = LineDir[1] = LineDir[2] = 0.f; - if (Len > 0.f) { - LineDir[0] = Crs[0] / Len; - LineDir[1] = Crs[1] / Len; - LineDir[2] = Crs[2] / Len; - + LineDir.x = LineDir.y = LineDir.z = 0.0f; + if (Len > 0.0f) { + CGU_FLOAT Len2; + LineDir = Crs / Len; // 6. Plug the projections as a new directional vector for the axis. // 7. Goto 1. - Len2 = LineDir[0] * LineDir[0] + LineDir[1] * LineDir[1] + - LineDir[2] * LineDir[2]; + Len2 = dot(LineDir,LineDir); // LineDir.x * LineDir.x + LineDir.y * LineDir.y + LineDir.z * LineDir.z; Len2 = sqrt(Len2); - - LineDir[0] /= Len2; - LineDir[1] /= Len2; - LineDir[2] /= Len2; + LineDir /= Len2; } - } else // We was not able to find anything better. Drop dead. + } + else // We was not able to find anything better. Drop out. break; } // inverse transform to find end-points of 3-color ramp - for (k = 0; k < 2; k++) - for (j = 0; j < 3; j++) - rsltC[j][k] = (PosG[k] * LineDirG[j] + Mdl[j]) * 255.f; - } + rsltC0 = (PosG0 * LineDirG + Mdl) * 255.f; + rsltC1 = (PosG1 * LineDirG + Mdl) * 255.f; + } // !isDone // We've dealt with (almost) unrestricted full precision realm. - // Now back to the dirty digital world. + // Now back digital world. // round the end points to make them look like compressed ones - CGU_FLOAT inpRmpEndPts[NUM_CHANNELS][NUM_ENDPOINTS]; - MkRmpOnGrid(inpRmpEndPts, rsltC, 0.f, 255.f, nRedBits, nGreenBits, nBlueBits); - - // This not a small procedure squeezes and stretches the ramp along each - // axis (R,G,B) separately while other 2 are fixed. It does it only over - // coarse grid - 565 that is. It tries to squeeze more precision for the - // real world ramp. - if (b3DRefinement) - Refine3D(_RsltRmpPnts, inpRmpEndPts, _BlkIn, _Rpt, _UniqClrs, dwNumPoints, - BC15options, nRedBits, nGreenBits, nBlueBits, nRefinementSteps); - else - Refine(_RsltRmpPnts, inpRmpEndPts, _BlkIn, _Rpt, _UniqClrs, dwNumPoints, - BC15options, nRedBits, nGreenBits, nBlueBits, nRefinementSteps); -} -#endif // !BC5 -#endif // !BC4 - -#ifdef ASPM_GPU -void cmp_memsetfBCn(CGU_FLOAT ptr[], CGU_FLOAT value, CGU_UINT32 size) { - for (CGU_UINT32 i = 0; i < size; i++) { - ptr[i] = value; - } -} -#endif - -#ifdef ASPM_GPU -void memset(CGU_UINT8 *srcdata, CGU_UINT8 value, CGU_INT size) { - for (CGU_INT i = 0; i < size; i++) *srcdata++ = value; -} - -void memcpy(CGU_UINT8 *srcdata, CGU_UINT8 *dstdata, CGU_INT size) { - for (CGU_INT i = 0; i < size; i++) { - *srcdata = *dstdata; - srcdata++; - dstdata++; - } -} - -void cmp_memsetBC1(CGU_UINT8 ptr[], CGU_UINT8 value, CGU_UINT32 size) { - for (CGU_UINT32 i = 0; i < size; i++) { - ptr[i] = value; - } -} -#endif - -#ifdef ASPM_GPU -static void sortData_UINT32(CGU_UINT32 data_ordered[BLOCK_SIZE], - CGU_UINT32 projection[BLOCK_SIZE], - CGU_UINT32 numEntries // max 64 -) { - CMP_di what[BLOCK_SIZE]; - - for (CGU_UINT32 i = 0; i < numEntries; i++) { - what[i].index = i; - what[i].data = projection[i]; - } - - CGU_UINT32 tmp_index; - CGU_UINT32 tmp_data; - - for (CGU_UINT32 i = 1; i < numEntries; i++) { - for (CGU_UINT32 j = i; j > 0; j--) { - if (what[j - 1].data > what[j].data) { - tmp_index = what[j].index; - tmp_data = what[j].data; - what[j].index = what[j - 1].index; - what[j].data = what[j - 1].data; - what[j - 1].index = tmp_index; - what[j - 1].data = tmp_data; - } - } - } - - for (CGU_UINT32 i = 0; i < numEntries; i++) data_ordered[i] = what[i].data; -}; - -static void sortData_FLOAT(CGU_FLOAT data_ordered[BLOCK_SIZE], - CGU_FLOAT projection[BLOCK_SIZE], - CGU_UINT32 numEntries // max 64 -) { - CMP_df what[BLOCK_SIZE]; - - for (CGU_UINT32 i = 0; i < numEntries; i++) { - what[i].index = i; - what[i].data = projection[i]; - } - - CGU_UINT32 tmp_index; - CGU_FLOAT tmp_data; - - for (CGU_UINT32 i = 1; i < numEntries; i++) { - for (CGU_UINT32 j = i; j > 0; j--) { - if (what[j - 1].data > what[j].data) { - tmp_index = what[j].index; - tmp_data = what[j].data; - what[j].index = what[j - 1].index; - what[j].data = what[j - 1].data; - what[j - 1].index = tmp_index; - what[j - 1].data = tmp_data; - } - } - } + CGU_Vec3f inpRmpEndPts0 = {0.0f,255.0f,0.0f}; + CGU_Vec3f inpRmpEndPts1 = {0.0f,255.0f,0.0f}; + CGU_Vec3f Fctrs0 = {8.0f,4.0f,8.0f}; //(1 << (PIX_GRID - BG)); x (1 << (PIX_GRID - GG)); y (1 << (PIX_GRID - RG)); z + CGU_Vec3f Fctrs1 = {32.0f,64.0f,32.0f}; //(CGU_FLOAT)(1 << RG); z (CGU_FLOAT)(1 << GG); y (CGU_FLOAT)(1 << BG); x + CGU_FLOAT _Min = 0.0f; + CGU_FLOAT _Max = 255.0f; - for (CGU_UINT32 i = 0; i < numEntries; i++) data_ordered[i] = what[i].data; -}; -#endif + { // MkRmpOnGrid(inpRmpEndPts, rsltC, _Min, _Max); -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -static CGU_FLOAT CompRGBBlock(CGU_UINT32 *block_32, CGU_UINT32 dwBlockSize, - CGU_UINT8 nRedBits, CGU_UINT8 nGreenBits, - CGU_UINT8 nBlueBits, - CGU_UINT8 nEndpoints[3][NUM_ENDPOINTS], - CGU_UINT8 *pcIndices, CGU_UINT8 dwNumPoints, - CGU_BOOL b3DRefinement, - CGU_UINT8 nRefinementSteps, - CMP_GLOBAL const CMP_BC15Options *BC15options, - CGU_BOOL _bUseAlpha, CGU_UINT8 _nAlphaThreshold) { - ALIGN_16 CGU_FLOAT Rpt[BLOCK_SIZE]; - ALIGN_16 CGU_FLOAT BlkIn[BLOCK_SIZE][NUM_CHANNELS]; -#ifndef ASPM_GPU - memset(Rpt, 0, sizeof(Rpt)); - memset(BlkIn, 0, sizeof(BlkIn)); -#else - cmp_memsetfBCn(&Rpt[0], 0, BLOCK_SIZE); - cmp_memsetfBCn(&BlkIn[0][0], 0, BLOCK_SIZE * NUM_CHANNELS); -#endif + inpRmpEndPts0 = floor(rsltC0); - CGU_UINT32 dwAlphaThreshold = _nAlphaThreshold << 24; - CGU_UINT32 dwColors = 0; - CGU_UINT32 dwBlk[BLOCK_SIZE]; - for (CGU_UINT32 i = 0; i < dwBlockSize; i++) - if (!_bUseAlpha || (block_32[i] & 0xff000000) >= dwAlphaThreshold) - dwBlk[dwColors++] = block_32[i] | 0xff000000; - - // Do we have any colors ? - if (dwColors) { - CGU_BOOL bHasAlpha = (dwColors != dwBlockSize); - if (bHasAlpha && _bUseAlpha && !(dwNumPoints & 0x1)) return CMP_FLOAT_MAX; - - // CGU_UINT32 dwBlk_sorted[BLOCK_SIZE]; - // Here we are computing an unique number of colors. - // For each unique value we compute the number of it appearences. -#ifndef ASPM_GPU - qsort((void *)dwBlk, (size_t)dwColors, sizeof(CGU_UINT32), QSortIntCmp); -#else - sortData_UINT32(dwBlk, dwBlk, dwColors); -#endif - - CGU_UINT32 new_p; - CGU_UINT32 dwBlkU[BLOCK_SIZE]; - CGU_UINT32 dwUniqueColors = 0; - new_p = dwBlkU[0] = dwBlk[0]; - Rpt[dwUniqueColors] = 1.f; - for (CGU_UINT32 i = 1; i < dwColors; i++) { - if (new_p != dwBlk[i]) { - dwUniqueColors++; - new_p = dwBlkU[dwUniqueColors] = dwBlk[i]; - Rpt[dwUniqueColors] = 1.f; - } else - Rpt[dwUniqueColors] += 1.f; - } - dwUniqueColors++; - - // switch to float - for (CGU_UINT32 i = 0; i < dwUniqueColors; i++) { - BlkIn[i][RC] = (CGU_FLOAT)((dwBlkU[i] >> 16) & 0xff); // R - BlkIn[i][GC] = (CGU_FLOAT)((dwBlkU[i] >> 8) & 0xff); // G - BlkIn[i][BC] = (CGU_FLOAT)((dwBlkU[i] >> 0) & 0xff); // B - BlkIn[i][AC] = 255.f; // A - } - - CGU_FLOAT rsltC[NUM_CHANNELS][NUM_ENDPOINTS]; - CompressRGBBlockX(rsltC, BlkIn, Rpt, dwUniqueColors, dwNumPoints, - b3DRefinement, nRefinementSteps, BC15options, nRedBits, - nGreenBits, nBlueBits); - - // return to integer realm - for (CGU_INT32 i = 0; i < 3; i++) - for (CGU_INT32 j = 0; j < 2; j++) - nEndpoints[i][j] = (CGU_UINT8)rsltC[i][j]; - - return Clstr(block_32, dwBlockSize, nEndpoints, pcIndices, dwNumPoints, - BC15options, _bUseAlpha, _nAlphaThreshold, nRedBits, - nGreenBits, nBlueBits); - } else { - // All colors transparent - nEndpoints[0][0] = nEndpoints[1][0] = nEndpoints[2][0] = 0; - nEndpoints[0][1] = nEndpoints[1][1] = nEndpoints[2][1] = 0xff; -#ifndef ASPM_GPU - memset(pcIndices, 0xff, dwBlockSize); -#else - cmp_memsetBC1(pcIndices, 0xff, dwBlockSize); -#endif - return 0.0; - } -} -#endif // !BC5 -#endif // !BC4 - -#if !defined(BC4_ENCODE_KERNEL_H) -#if !defined(BC5_ENCODE_KERNEL_H) -static void CompressRGBBlock(const CGU_UINT8 rgbBlock[64], - CMP_GLOBAL CGU_UINT32 compressedBlock[2], - CMP_GLOBAL const CMP_BC15Options *BC15options, - CGU_BOOL bDXT1, CGU_BOOL bDXT1UseAlpha, - CGU_UINT8 nDXT1AlphaThreshold) { - CGU_BOOL m_b3DRefinement = FALSE; - CGU_UINT8 m_nRefinementSteps = 1; - - /* - ARGB Channel indexes - */ - if (bDXT1) { - CGU_UINT8 nEndpoints[2][3][2]; - CGU_UINT8 nIndices[2][16]; - - CGU_FLOAT fError3 = CompRGBBlock((CGU_UINT32 *)rgbBlock, BLOCK_SIZE_4X4, RG, GG, BG, nEndpoints[0], - nIndices[0], 3, m_b3DRefinement, m_nRefinementSteps, BC15options, - bDXT1UseAlpha, nDXT1AlphaThreshold); - CGU_FLOAT fError4 = (fError3 == 0.0) ? CMP_FLOAT_MAX : CompRGBBlock((CGU_UINT32 *)rgbBlock, BLOCK_SIZE_4X4, RG, GG, BG, - nEndpoints[1], nIndices[1], 4, m_b3DRefinement, - m_nRefinementSteps, BC15options, bDXT1UseAlpha, - nDXT1AlphaThreshold); - - CGU_INT32 nMethod = (fError3 <= fError4) ? 0 : 1; - CGU_INT32 c0 = ConstructColour((nEndpoints[nMethod][RC][0] >> (8 - RG)), - (nEndpoints[nMethod][GC][0] >> (8 - GG)), - (nEndpoints[nMethod][BC][0] >> (8 - BG))); - CGU_INT32 c1 = ConstructColour((nEndpoints[nMethod][RC][1] >> (8 - RG)), - (nEndpoints[nMethod][GC][1] >> (8 - GG)), - (nEndpoints[nMethod][BC][1] >> (8 - BG))); - CGU_BOOL m1 = (nMethod == 1 && c0 <= c1); - CGU_BOOL m2 = (nMethod == 0 && c0 > c1); - if (m1 || m2) - compressedBlock[0] = c1 | (c0 << 16); - else - compressedBlock[0] = c0 | (c1 << 16); - - compressedBlock[1] = 0; - for (CGU_INT32 i = 0; i < 16; i++) - compressedBlock[1] |= (nIndices[nMethod][i] << (2 * i)); - } else { - CGU_UINT8 nEndpoints[3][2]; - CGU_UINT8 nIndices[BLOCK_SIZE_4X4]; - - CompRGBBlock((CGU_UINT32 *)rgbBlock, BLOCK_SIZE_4X4, RG, GG, BG, nEndpoints, - nIndices, 4, m_b3DRefinement, m_nRefinementSteps, BC15options, - bDXT1UseAlpha, nDXT1AlphaThreshold); - - CGU_INT32 c0 = ConstructColour((nEndpoints[RC][0] >> (8 - RG)), - (nEndpoints[GC][0] >> (8 - GG)), - (nEndpoints[BC][0] >> (8 - BG))); - CGU_INT32 c1 = ConstructColour((nEndpoints[RC][1] >> (8 - RG)), - (nEndpoints[GC][1] >> (8 - GG)), - (nEndpoints[BC][1] >> (8 - BG))); - if (c0 <= c1) - compressedBlock[0] = c1 | (c0 << 16); - else - compressedBlock[0] = c0 | (c1 << 16); - - compressedBlock[1] = 0; - for (CGU_INT32 i = 0; i < 16; i++) - compressedBlock[1] |= (nIndices[i] << (2 * i)); - } -} -#endif // BC5 - -#endif // BC4 - -#if !defined(BC1_ENCODE_KERNEL_H) -#if !defined(BC2_ENCODE_KERNEL_H) -static CGU_FLOAT RmpSrch1(CGU_FLOAT _Blk[MAX_BLOCK], CGU_FLOAT _Rpt[MAX_BLOCK], - CGU_FLOAT _maxerror, CGU_FLOAT _min_ex, - CGU_FLOAT _max_ex, CGU_INT _NmbrClrs, - CGU_UINT8 nNumPoints) { - CGU_FLOAT error = 0; - const CGU_FLOAT step = (_max_ex - _min_ex) / (CGU_FLOAT)(nNumPoints - 1); - const CGU_FLOAT step_h = step * 0.5f; - const CGU_FLOAT rstep = 1.0f / step; - - for (CGU_INT i = 0; i < _NmbrClrs; i++) { - CGU_FLOAT v; - // Work out which value in the block this select - CGU_FLOAT del; - - if ((del = _Blk[i] - _min_ex) <= 0) - v = _min_ex; - else if (_Blk[i] - _max_ex >= 0) - v = _max_ex; - else - v = (floor((del + step_h) * rstep) * step) + _min_ex; - - // And accumulate the error - CGU_FLOAT del2 = (_Blk[i] - v); - error += del2 * del2 * _Rpt[i]; - - // if we've already lost to the previous step bail out - if (_maxerror < error) { - error = _maxerror; - break; - } - } - return error; -} -#endif // !BC2 - -#if !defined(BC2_ENCODE_KERNEL_H) -static CGU_FLOAT BlockRefine1(CGU_FLOAT _Blk[MAX_BLOCK], - CGU_FLOAT _Rpt[MAX_BLOCK], CGU_FLOAT _MaxError, - CGU_FLOAT *_min_ex, CGU_FLOAT *_max_ex, - CGU_FLOAT _m_step, CGU_FLOAT _min_bnd, - CGU_FLOAT _max_bnd, CGU_INT _NmbrClrs, - CGU_UINT8 dwNumPoints) { - // Start out assuming our endpoints are the min and max values we've - // determined - - // Attempt a (simple) progressive refinement step to reduce noise in the - // output image by trying to find a better overall match for the endpoints. - - CGU_FLOAT maxerror = _MaxError; - CGU_FLOAT min_ex = *_min_ex; - CGU_FLOAT max_ex = *_max_ex; - - int mode, bestmode; - do { - CGU_FLOAT cr_min0 = min_ex; - CGU_FLOAT cr_max0 = max_ex; - for (bestmode = -1, mode = 0; mode < SCH_STPS * SCH_STPS; mode++) { - // check each move (see sStep for direction) - CGU_FLOAT cr_min = min_ex + _m_step * sMvF[mode / SCH_STPS]; - CGU_FLOAT cr_max = max_ex + _m_step * sMvF[mode % SCH_STPS]; - - cr_min = maxf(cr_min, _min_bnd); - cr_max = minf(cr_max, _max_bnd); - - CGU_FLOAT error; - error = RmpSrch1(_Blk, _Rpt, maxerror, cr_min, cr_max, _NmbrClrs, - dwNumPoints); - - if (error < maxerror) { - maxerror = error; - bestmode = mode; - cr_min0 = cr_min; - cr_max0 = cr_max; - } - } + if (inpRmpEndPts0.x <= _Min) inpRmpEndPts0.x = _Min; + else { + inpRmpEndPts0.x += floor(128.f / Fctrs1.x) - floor(inpRmpEndPts0.x / Fctrs1.x); + inpRmpEndPts0.x = min(inpRmpEndPts0.x, _Max); + } + if (inpRmpEndPts0.y <= _Min) inpRmpEndPts0.y = _Min; + else { + inpRmpEndPts0.y += floor(128.f / Fctrs1.y) - floor(inpRmpEndPts0.y / Fctrs1.y); + inpRmpEndPts0.y = min(inpRmpEndPts0.y, _Max); + } + if (inpRmpEndPts0.z <= _Min) inpRmpEndPts0.z = _Min; + else { + inpRmpEndPts0.z += floor(128.f / Fctrs1.z) - floor(inpRmpEndPts0.z / Fctrs1.z); + inpRmpEndPts0.z = min(inpRmpEndPts0.z, _Max); + } - if (bestmode != -1) { - // make move (see sStep for direction) - min_ex = cr_min0; - max_ex = cr_max0; - } - } while (bestmode != -1); + inpRmpEndPts0 = floor(inpRmpEndPts0 / Fctrs0) * Fctrs0; - *_min_ex = min_ex; - *_max_ex = max_ex; + inpRmpEndPts1 = floor(rsltC1); + if (inpRmpEndPts1.x <= _Min) inpRmpEndPts1.x = _Min; + else { + inpRmpEndPts1.x += floor(128.f / Fctrs1.x) - floor(inpRmpEndPts1.x / Fctrs1.x); + inpRmpEndPts1.x = min(inpRmpEndPts1.x, _Max); + } + if (inpRmpEndPts1.y <= _Min) inpRmpEndPts1.y = _Min; + else { + inpRmpEndPts1.y += floor(128.f / Fctrs1.y) - floor(inpRmpEndPts1.y / Fctrs1.y); + inpRmpEndPts1.y = min(inpRmpEndPts1.y, _Max); + } + if (inpRmpEndPts1.z <= _Min) inpRmpEndPts1.z = _Min; + else { + inpRmpEndPts1.z += floor(128.f / Fctrs1.z) - floor(inpRmpEndPts1.z / Fctrs1.z); + inpRmpEndPts1.z = min(inpRmpEndPts1.z, _Max); + } - return maxerror; -} -#endif //! BC2 + inpRmpEndPts1 = floor(inpRmpEndPts1 / Fctrs0) * Fctrs0; + } // MkRmpOnGrid -#if !defined(BC2_ENCODE_KERNEL_H) -static int QSortFCmp(const void *Elem1, const void *Elem2) { - int ret = 0; + CMP_EndPoints EndPoints; + EndPoints.Color0 = inpRmpEndPts0; + EndPoints.Color1 = inpRmpEndPts1; - if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 < 0.) - ret = -1; - else if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 > 0.) - ret = 1; - return ret; + return EndPoints; } -#endif // !BC2 - -#if !defined(BC2_ENCODE_KERNEL_H) -static CGU_FLOAT CompBlock1(CGU_FLOAT _RmpPnts[NUM_ENDPOINTS], - CGU_FLOAT _Blk[MAX_BLOCK], CGU_INT _Nmbr, - CGU_UINT8 dwNumPoints, CGU_BOOL bFixedRampPoints, - CGU_INT _IntPrc, CGU_INT _FracPrc, - CGU_BOOL _bFixedRamp) { - CGU_FLOAT fMaxError = 0.f; - - CGU_FLOAT Ramp[NUM_ENDPOINTS]; - - CGU_FLOAT IntFctr = (CGU_FLOAT)(1 << _IntPrc); - // CGU_FLOAT FracFctr = (CGU_FLOAT)(1 << _FracPrc); - - ALIGN_16 CGU_FLOAT afUniqueValues[MAX_BLOCK]; - ALIGN_16 CGU_FLOAT afValueRepeats[MAX_BLOCK]; - for (int i = 0; i < MAX_BLOCK; i++) - afUniqueValues[i] = afValueRepeats[i] = 0.f; - - // For each unique value we compute the number of it appearances. - CGU_FLOAT fBlk[MAX_BLOCK]; -#ifdef ASPM_GPU - for (CGU_INT i = 0; i < _Nmbr; i++) { - fBlk[i] = _Blk[i]; - } -#else - memcpy(fBlk, _Blk, _Nmbr * sizeof(CGU_FLOAT)); #endif - // sort the input -#ifndef ASPM_GPU - qsort((void *)fBlk, (size_t)_Nmbr, sizeof(CGU_FLOAT), QSortFCmp); -#else - sortData_FLOAT(fBlk, fBlk, _Nmbr); -#endif - - CGU_FLOAT new_p = -2.; - int N0s = 0, N1s = 0; - CGU_UINT32 dwUniqueValues = 0; - afUniqueValues[0] = 0.f; - - bool requiresCalculation = true; - - if (bFixedRampPoints) { - for (CGU_INT i = 0; i < _Nmbr; i++) { - if (new_p != fBlk[i]) { - new_p = fBlk[i]; - if (new_p <= 1.5 / 255.) - N0s++; - else if (new_p >= 253.5 / 255.) - N1s++; - else { - afUniqueValues[dwUniqueValues] = fBlk[i]; - afValueRepeats[dwUniqueValues] = 1.f; - dwUniqueValues++; - } - } else { - if (dwUniqueValues > 0) { - if (afUniqueValues[dwUniqueValues - 1] == new_p) - afValueRepeats[dwUniqueValues - 1] += 1.f; - } - } +// Process a rgbBlock which is normalized (0.0f ... 1.0f), signed normal is not implemented +static CGU_Vec2ui CompressBlockBC1_RGBA_Internal( + const CGU_Vec3f rgbBlockUVf[BLOCK_SIZE_4X4], + const CGU_FLOAT BlockA[BLOCK_SIZE_4X4], + CGU_Vec3f channelWeights, + CGU_UINT32 dwAlphaThreshold, + CGU_UINT32 m_nRefinementSteps, + CMP_IN CGU_FLOAT fquality, + CGU_BOOL isSRGB ) +{ + CGU_Vec2ui cmpBlock = {0,0}; + CGU_FLOAT errLQ = 1e6f; + + cmpBlock = CompressRGBBlock_FM(rgbBlockUVf,fquality,isSRGB,CMP_REFINOUT errLQ); + +#ifndef CMP_USE_LOWQUALITY + //------------------------------------------------------------------ + // Processing is in 0..255 range, code needs to be normized to 0..1 + //------------------------------------------------------------------ + if ((errLQ > 0.0f)&&(fquality > CMP_QUALITY2)) { + + CGU_Vec3f rgbBlock_normal[BLOCK_SIZE_4X4]; + CGU_UINT32 nCmpIndices = 0; + CGU_UINT32 c0, c1; + // High Quality + CMP_EndPoints EndPoints = {{0,0,0xFF},{0,0,0xFF}}; + // Hold a err ref to lowest quality compression, to check if new compression is any better + CGU_Vec2ui Q1CompData = cmpBlock; + // High Quality + CGU_UINT32 i; + + ALIGN_16 CGU_FLOAT Rpt[BLOCK_SIZE_4X4]; + CGU_UINT32 pcIndices = 0; + + m_nRefinementSteps = 0; + + CGU_Vec3f BlkInBGRf_UV[BLOCK_SIZE_4X4]; // Normalized Block Input (0..1) in BGR channel format + // Default inidices & endpoints for Transparent Block + CGU_Vec3ui nEndpoints0 = {0,0,0}; // Endpoints are stored BGR as x,y,z + CGU_Vec3ui nEndpoints1 = {0xFF,0xFF,0xFF}; // Endpoints are stored BGR as x,y,z + + for (i = 0; i < BLOCK_SIZE_4X4; i++) + { + Rpt[i] = 0.0f; } - // if number of unique colors is less or eq 2 we've done either, but we know - // that we may have 0s and/or 1s as well. To avoid for the ramp to be - // considered flat we invented couple entries on the way. - if (dwUniqueValues <= 2) { - if (dwUniqueValues == 2) // if 2, take them - { - Ramp[0] = floor(afUniqueValues[0] * (IntFctr - 1) + 0.5f); - Ramp[1] = floor(afUniqueValues[1] * (IntFctr - 1) + 0.5f); - } else if (dwUniqueValues == 1) // if 1, add another one - { - Ramp[0] = floor(afUniqueValues[0] * (IntFctr - 1) + 0.5f); - Ramp[1] = Ramp[0] + 1.f; - } else // if 0, invent them - { - Ramp[0] = 128.f; - Ramp[1] = Ramp[0] + 1.f; - } + //=============================================================== + // Check if we have more then 2 colors and process Alpha block + CGU_UINT32 dwColors = 0; + CGU_UINT32 dwBlk[BLOCK_SIZE_4X4]; + CGU_UINT32 R,G,B,A; + for (i = 0; i < BLOCK_SIZE_4X4; i++) + { + // Do any color conversion prior to processing the block + rgbBlock_normal[i] = isSRGB?cmp_linearToSrgb(rgbBlockUVf[i]):rgbBlockUVf[i]; - fMaxError = 0.f; - requiresCalculation = false; - } - } else { - for (CGU_INT i = 0; i < _Nmbr; i++) { - if (new_p != fBlk[i]) { - afUniqueValues[dwUniqueValues] = new_p = fBlk[i]; - afValueRepeats[dwUniqueValues] = 1.f; - dwUniqueValues++; - } else - afValueRepeats[dwUniqueValues - 1] += 1.f; - } + R = (CGU_UINT32)(rgbBlock_normal[i].x*255.0f); + G = (CGU_UINT32)(rgbBlock_normal[i].y*255.0f); + B = (CGU_UINT32)(rgbBlock_normal[i].z*255.0f); - // if number of unique colors is less or eq 2, we've done - if (dwUniqueValues <= 2) { - Ramp[0] = floor(afUniqueValues[0] * (IntFctr - 1) + 0.5f); - if (dwUniqueValues == 1) - Ramp[1] = Ramp[0] + 1.f; - else - Ramp[1] = floor(afUniqueValues[1] * (IntFctr - 1) + 0.5f); - fMaxError = 0.f; - requiresCalculation = false; - } - } + if (dwAlphaThreshold > 0) + A = (CGU_UINT32)BlockA[i]; + else + A = 255; - if (requiresCalculation) { - CGU_FLOAT min_ex = afUniqueValues[0]; - CGU_FLOAT max_ex = afUniqueValues[dwUniqueValues - 1]; - CGU_FLOAT min_bnd = 0, max_bnd = 1.; - CGU_FLOAT min_r = min_ex, max_r = max_ex; - CGU_FLOAT gbl_l = 0, gbl_r = 0; - CGU_FLOAT cntr = (min_r + max_r) / 2; - - CGU_FLOAT gbl_err = MAX_ERROR; - // Trying to avoid unnecessary calculations. Heuristics: after some analisis - // it appears that in integer case, if the input interval not more then 48 - // we won't get much better - - bool wantsSearch = !(_INT_GRID && max_ex - min_ex <= 48.f / IntFctr); - - if (wantsSearch) { - // Search. - // 1. take the vicinities of both low and high bound of the input - // interval. - // 2. setup some search step - // 3. find the new low and high bound which provides an (sub) optimal - // (infinite precision) clusterization. - CGU_FLOAT gbl_llb = - (min_bnd > min_r - GBL_SCH_EXT) ? min_bnd : min_r - GBL_SCH_EXT; - CGU_FLOAT gbl_rrb = - (max_bnd < max_r + GBL_SCH_EXT) ? max_bnd : max_r + GBL_SCH_EXT; - CGU_FLOAT gbl_lrb = - (cntr < min_r + GBL_SCH_EXT) ? cntr : min_r + GBL_SCH_EXT; - CGU_FLOAT gbl_rlb = - (cntr > max_r - GBL_SCH_EXT) ? cntr : max_r - GBL_SCH_EXT; - for (CGU_FLOAT step_l = gbl_llb; step_l < gbl_lrb; - step_l += GBL_SCH_STEP) { - for (CGU_FLOAT step_r = gbl_rrb; gbl_rlb <= step_r; - step_r -= GBL_SCH_STEP) { - CGU_FLOAT sch_err; - // an sse version is avaiable - sch_err = RmpSrch1(afUniqueValues, afValueRepeats, gbl_err, step_l, - step_r, dwUniqueValues, dwNumPoints); - if (sch_err < gbl_err) { - gbl_err = sch_err; - gbl_l = step_l; - gbl_r = step_r; - } + // Punch Through Alpha in BC1 Codec (1 bit alpha) + if ((dwAlphaThreshold == 0) || (A >= dwAlphaThreshold)) + { + // copy to local RGB data and have alpha set to 0xFF + dwBlk[dwColors++] = A << 24 | R << 16 | G << 8 | B; } - } - - min_r = gbl_l; - max_r = gbl_r; } - // This is a refinement call. The function tries to make several small - // stretches or squashes to minimize quantization error. - CGU_FLOAT m_step = LCL_SCH_STEP / IntFctr; - fMaxError = - BlockRefine1(afUniqueValues, afValueRepeats, gbl_err, &min_r, &max_r, - m_step, min_bnd, max_bnd, dwUniqueValues, dwNumPoints); - - min_ex = min_r; - max_ex = max_r; - - max_ex *= (IntFctr - 1); - min_ex *= (IntFctr - 1); - /* - this one is tricky. for the float or high fractional precision ramp it tries - to avoid for the ramp to be collapsed into one integer number after - rounding. Notice the condition. There is a difference between max_ex and - min_ex but after rounding they may collapse into the same integer. - - So we try to run the same refinement procedure but with starting position on - the integer grid and step equal 1. - */ - if (!_INT_GRID && max_ex - min_ex > 0. && - floor(min_ex + 0.5f) == floor(max_ex + 0.5f)) { - m_step = 1.; - gbl_err = MAX_ERROR; - for (CGU_UINT32 i = 0; i < dwUniqueValues; i++) - afUniqueValues[i] *= (IntFctr - 1); - - max_ex = min_ex = floor(min_ex + 0.5f); - - gbl_err = BlockRefine1(afUniqueValues, afValueRepeats, gbl_err, &min_ex, - &max_ex, m_step, 0.f, 255.f, dwUniqueValues, - dwNumPoints); - - fMaxError = gbl_err; + if (!dwColors) + { + // All are colors transparent + EndPoints.Color0.x = EndPoints.Color0.y = EndPoints.Color0.z = 0.0f; + EndPoints.Color1.x = EndPoints.Color1.y = EndPoints.Color0.z = 255.0f; + nCmpIndices = 0xFFFFFFFF; } - Ramp[1] = floor(max_ex + 0.5f); - Ramp[0] = floor(min_ex + 0.5f); - } + else { // We have colors to process + nCmpIndices = 0; + // Punch Through Alpha Support ToDo + // CGU_BOOL bHasAlpha = (dwColors != BLOCK_SIZE_4X4); + // bHasAlpha = bHasAlpha && (dwAlphaThreshold > 0); // valid for (dwNumChannels=4); + // if (bHasAlpha) { + // CGU_Vec2ui compBlock = {0xf800f800,0}; + // return compBlock; + // } + + // Here we are computing an unique number of sorted colors. + // For each unique value we compute the number of it appearences. + // qsort((void *)dwBlk, (size_t)dwColors, sizeof(CGU_UINT32), QSortIntCmp); + #ifndef ASPM_GPU + std::sort(dwBlk, dwBlk+15); + #else + { + CGU_UINT32 j; + CMP_di what[BLOCK_SIZE_4X4]; + + for (i = 0; i < dwColors; i++) { + what[i].index = i; + what[i].data = dwBlk[i]; + } + + CGU_UINT32 tmp_index; + CGU_UINT32 tmp_data; + + for (i = 1; i < dwColors; i++) { + for (j = i; j > 0; j--) { + if (what[j - 1].data > what[j].data) { + tmp_index = what[j].index; + tmp_data = what[j].data; + what[j].index = what[j - 1].index; + what[j].data = what[j - 1].data; + what[j - 1].index = tmp_index; + what[j - 1].data = tmp_data; + } + } + } + for (i = 0; i < dwColors; i++) dwBlk[i] = what[i].data; + } + #endif + CGU_UINT32 new_p; + CGU_UINT32 dwBlkU[BLOCK_SIZE_4X4]; + CGU_UINT32 dwUniqueColors = 0; + new_p = dwBlkU[0] = dwBlk[0]; + Rpt[dwUniqueColors] = 1.f; + for (i = 1; i < dwColors; i++) { + if (new_p != dwBlk[i]) { + dwUniqueColors++; + new_p = dwBlkU[dwUniqueColors] = dwBlk[i]; + Rpt[dwUniqueColors] = 1.f; + } else + Rpt[dwUniqueColors] += 1.f; + } + dwUniqueColors++; - // Ensure that the two endpoints are not the same - // This is legal but serves no need & can break some optimizations in the - // compressor - if (Ramp[0] == Ramp[1]) { - if (Ramp[1] < 255.f) - Ramp[1]++; - else - Ramp[1]--; - } - _RmpPnts[0] = Ramp[0]; - _RmpPnts[1] = Ramp[1]; + // Simple case of only 2 colors to process + // no need for futher processing as lowest quality methods work best for this case + if (dwUniqueColors <= 2) { + return Q1CompData; + } + else + { + // switch from int range back to UV floats + for (i = 0; i < dwUniqueColors; i++) + { + R = (dwBlkU[i] >> 16) & 0xff; + G = (dwBlkU[i] >> 8) & 0xff; + B = (dwBlkU[i] >> 0) & 0xff; + BlkInBGRf_UV[i].z = (CGU_FLOAT)R/255.0f; + BlkInBGRf_UV[i].y = (CGU_FLOAT)G/255.0f; + BlkInBGRf_UV[i].x = (CGU_FLOAT)B/255.0f; + } - return fMaxError; -} -#endif // !BC2 - -#if !defined(BC2_ENCODE_KERNEL_H) -static void BldRmp1(CGU_FLOAT _Rmp[MAX_POINTS], - CGU_FLOAT _InpRmp[NUM_ENDPOINTS], int nNumPoints) { - // for 3 point ramp; not to select the 4th point in min - for (int e = nNumPoints; e < MAX_POINTS; e++) _Rmp[e] = 100000.f; - - _Rmp[0] = _InpRmp[0]; - _Rmp[1] = _InpRmp[1]; - for (int e = 1; e < nNumPoints - 1; e++) - _Rmp[e + 1] = (_Rmp[0] * (nNumPoints - 1 - e) + _Rmp[1] * e) / - (CGU_FLOAT)(nNumPoints - 1); -} -#endif //! BC2 - -#if !defined(BC2_ENCODE_KERNEL_H) -static void GetRmp1(CGU_FLOAT _rampDat[MAX_POINTS], - CGU_FLOAT _ramp[NUM_ENDPOINTS], int nNumPoints, - CGU_BOOL bFixedRampPoints, CGU_INT _intPrec, - CGU_INT _fracPrec, CGU_BOOL _bFixedRamp) { - if (_ramp[0] == _ramp[1]) return; - - CGU_BOOL r0 = _ramp[0] <= _ramp[1]; - CGU_BOOL r1 = _ramp[0] > _ramp[1]; - if ((!bFixedRampPoints && r0) || (bFixedRampPoints && r1)) { - CGU_FLOAT t = _ramp[0]; - _ramp[0] = _ramp[1]; - _ramp[1] = t; - } + CGU_Vec3f channelWeightsBGR; + channelWeightsBGR.x = channelWeights.z; + channelWeightsBGR.y = channelWeights.y; + channelWeightsBGR.z = channelWeights.x; - _rampDat[0] = _ramp[0]; - _rampDat[1] = _ramp[1]; + EndPoints = CompressRGBBlock_Slow( + BlkInBGRf_UV, + Rpt, + dwUniqueColors, + channelWeightsBGR, + m_nRefinementSteps + ); - CGU_FLOAT IntFctr = (CGU_FLOAT)(1 << _intPrec); - CGU_FLOAT FracFctr = (CGU_FLOAT)(1 << _fracPrec); - CGU_FLOAT ramp[NUM_ENDPOINTS]; - ramp[0] = _ramp[0] * FracFctr; - ramp[1] = _ramp[1] * FracFctr; + } + } // colors - BldRmp1(_rampDat, ramp, nNumPoints); - if (bFixedRampPoints) { - _rampDat[nNumPoints] = 0.; - _rampDat[nNumPoints + 1] = FracFctr * IntFctr - 1.f; - } + //=================================================================== + // Process Cluster INPUT is constant EndPointsf OUTPUT is pcIndices + //=================================================================== + if (nCmpIndices == 0) + { + R = (CGU_UINT32)(EndPoints.Color0.z); + G = (CGU_UINT32)(EndPoints.Color0.y); + B = (CGU_UINT32)(EndPoints.Color0.x); + CGU_INT32 cluster0 = cmp_constructColor(R,G,B); + + R = (CGU_UINT32)(EndPoints.Color1.z); + G = (CGU_UINT32)(EndPoints.Color1.y); + B = (CGU_UINT32)(EndPoints.Color1.x); + CGU_INT32 cluster1 = cmp_constructColor(R,G,B); + + CGU_Vec3f InpRmp[NUM_ENDPOINTS]; + if ((cluster0 <= cluster1) // valid for 4 channels + // || (cluster0 > cluster1) // valid for 3 channels + ) + { + // inverse endpoints + InpRmp[0] = EndPoints.Color1; + InpRmp[1] = EndPoints.Color0; + } + else + { + InpRmp[0] = EndPoints.Color0; + InpRmp[1] = EndPoints.Color1; + } + + CGU_Vec3f srcblockBGR[BLOCK_SIZE_4X4]; + CGU_FLOAT srcblockA[BLOCK_SIZE_4X4]; + + // Swizzle the source RGB to BGR for processing + for (i = 0; i < BLOCK_SIZE_4X4; i++) { + srcblockBGR[i].z = rgbBlock_normal[i].x*255.0f; + srcblockBGR[i].y = rgbBlock_normal[i].y*255.0f; + srcblockBGR[i].x = rgbBlock_normal[i].z*255.0f; + srcblockA[i] = 0.0f; + if (dwAlphaThreshold > 0) { + CGU_UINT32 alpha = (CGU_UINT32)BlockA[i]; + if (alpha >= dwAlphaThreshold) + srcblockA[i] = BlockA[i]; + } + } - if (_bFixedRamp) { - for (CGU_INT i = 0; i < nNumPoints; i++) { - _rampDat[i] = floor(_rampDat[i] + 0.5f); - _rampDat[i] /= FracFctr; + // input ramp is on the coarse grid + // make ramp endpoints the way they'll going to be decompressed + CGU_Vec3f InpRmpL[NUM_ENDPOINTS]; + CGU_Vec3f Fctrs = {32.0F,64.0F,32.0F}; // 1 << RG,1 << GG,1 << BG + + { // ConstantRamp = MkWkRmpPts(InpRmpL, InpRmp); + InpRmpL[0] = InpRmp[0] + floor(InpRmp[0] / Fctrs); + InpRmpL[0] = cmp_clamp3f(InpRmpL[0],0.0f,255.0f); + InpRmpL[1] = InpRmp[1] + floor(InpRmp[1] / Fctrs); + InpRmpL[1] = cmp_clamp3f(InpRmpL[1],0.0f,255.0f); + } // MkWkRmpPts + + // build ramp + CGU_Vec3f LerpRmp[4]; + CGU_Vec3f offset = {1.0f,1.0f,1.0f}; + { //BldRmp(Rmp, InpRmpL, dwNumChannels); + // linear interpolate end points to get the ramp + LerpRmp[0] = InpRmpL[0]; + LerpRmp[3] = InpRmpL[1]; + LerpRmp[1] = floor((InpRmpL[0]*2.0f + LerpRmp[3] + offset) / 3.0f); + LerpRmp[2] = floor((InpRmpL[0] + LerpRmp[3]*2.0f + offset) / 3.0f); + } // BldRmp + + //========================================================================= + // Clusterize, Compute error and find DXTC indexes for the current cluster + //========================================================================= + { // Clusterize + CGU_UINT32 alpha; + + // For each colour in the original block assign it + // to the closest cluster and compute the cumulative error + for (i = 0; i < BLOCK_SIZE_4X4; i++) { + alpha = (CGU_UINT32)srcblockA[i]; + if ((dwAlphaThreshold > 0) && alpha == 0) //*((CGU_DWORD *)&_Blk[i][AC]) == 0) + { + pcIndices |= cmp_set2Bit32(4,i); // dwNumChannels 3 or 4 (default is 4) + } + else + { + CGU_FLOAT shortest = 99999999999.f; + CGU_UINT8 shortestIndex = 0; + + CGU_Vec3f channelWeightsBGR; + channelWeightsBGR.x = channelWeights.z; + channelWeightsBGR.y = channelWeights.y; + channelWeightsBGR.z = channelWeights.x; + + for (CGU_UINT8 rampindex = 0; rampindex < 4; rampindex++) { // r is either 1 or 4 + // calculate the distance for each component + CGU_FLOAT distance = dot(((srcblockBGR[i]- LerpRmp[rampindex])* channelWeightsBGR), + ((srcblockBGR[i]- LerpRmp[rampindex])* channelWeightsBGR)); + if (distance < shortest) { + shortest = distance; + shortestIndex = rampindex; + } + } + + // The total is a sum of (error += shortest) + // We have the index of the best cluster, so assign this in the block + // Reorder indices to match correct DXTC ordering + if (shortestIndex == 3) // dwNumChannels - 1 + shortestIndex = 1; + else if (shortestIndex) + shortestIndex++; + pcIndices |= cmp_set2Bit32(shortestIndex,i); + } + } // BLOCK_SIZE_4X4 + } // Clusterize + }// Process Cluster + + //============================================================== + // Generate Compressed Result from nEndpoints & pcIndices + //============================================================== + R = (CGU_UINT32)(EndPoints.Color0.z); + G = (CGU_UINT32)(EndPoints.Color0.y); + B = (CGU_UINT32)(EndPoints.Color0.x); + c0 = cmp_constructColor(R,G,B); + + R = (CGU_UINT32)(EndPoints.Color1.z); + G = (CGU_UINT32)(EndPoints.Color1.y); + B = (CGU_UINT32)(EndPoints.Color1.x); + c1 = cmp_constructColor(R,G,B); + + // Get Processed indices if not set + if (nCmpIndices == 0) + nCmpIndices = pcIndices; + + if (c0 <= c1) + { + cmpBlock.x = c1 | (c0 << 16); + } + else + cmpBlock.x = c0 | (c1 << 16); + + cmpBlock.y = nCmpIndices; + + // Select best compression + CGU_FLOAT CompErr = CMP_RGBBlockError(rgbBlockUVf,cmpBlock,isSRGB); + if (CompErr > errLQ) + cmpBlock = Q1CompData; } - } -} #endif - -#if !defined(BC2_ENCODE_KERNEL_H) -static CGU_FLOAT Clstr1(CGU_UINT8 *pcIndices, CGU_FLOAT _blockIn[MAX_BLOCK], - CGU_FLOAT _ramp[NUM_ENDPOINTS], CGU_INT _NmbrClrs, - CGU_INT nNumPoints, CGU_BOOL bFixedRampPoints, - CGU_INT _intPrec, CGU_INT _fracPrec, - CGU_BOOL _bFixedRamp) { - CGU_FLOAT Err = 0.f; - CGU_FLOAT alpha[MAX_POINTS]; - - for (CGU_INT i = 0; i < _NmbrClrs; i++) pcIndices[i] = 0; - - if (_ramp[0] == _ramp[1]) return Err; - - if (!_bFixedRamp) { - _intPrec = 8; - _fracPrec = 0; - } - - GetRmp1(alpha, _ramp, nNumPoints, bFixedRampPoints, _intPrec, _fracPrec, - _bFixedRamp); - - if (bFixedRampPoints) nNumPoints += 2; - - const CGU_FLOAT OverIntFctr = 1.f / ((CGU_FLOAT)(1 << _intPrec) - 1.f); - for (int i = 0; i < nNumPoints; i++) alpha[i] *= OverIntFctr; - - // For each colour in the original block, calculate its weighted - // distance from each point in the original and assign it - // to the closest cluster - for (int i = 0; i < _NmbrClrs; i++) { - CGU_FLOAT shortest = 10000000.f; - - // Get the original alpha - CGU_FLOAT acur = _blockIn[i]; - - for (CGU_UINT8 j = 0; j < nNumPoints; j++) { - CGU_FLOAT adist = (acur - alpha[j]); - adist *= adist; - - if (adist < shortest) { - shortest = adist; - pcIndices[i] = j; - } - } - - Err += shortest; - } - - return Err; + return cmpBlock; } -#endif // !BC2 - -#if !defined(BC2_ENCODE_KERNEL_H) -static CGU_FLOAT CompBlock1XF(CGU_FLOAT *_Blk, CGU_UINT32 dwBlockSize, - CGU_UINT8 nEndpoints[2], CGU_UINT8 *pcIndices, - CGU_UINT8 dwNumPoints, CGU_BOOL bFixedRampPoints, - CGU_INT _intPrec, CGU_INT _fracPrec, - CGU_BOOL _bFixedRamp) { - // just to make them initialized - if (!_bFixedRamp) { - _intPrec = 8; - _fracPrec = 0; - } - // this one makes the bulk of the work - CGU_FLOAT Ramp[NUM_ENDPOINTS]; - CompBlock1(Ramp, _Blk, dwBlockSize, dwNumPoints, bFixedRampPoints, _intPrec, - _fracPrec, _bFixedRamp); +//============================= Alpha: New single header interfaces: supports GPU shader interface ================================================== - // final clusterization applied - CGU_FLOAT fError = Clstr1(pcIndices, _Blk, Ramp, dwBlockSize, dwNumPoints, - bFixedRampPoints, _intPrec, _fracPrec, _bFixedRamp); - nEndpoints[0] = (CGU_UINT8)Ramp[0]; - nEndpoints[1] = (CGU_UINT8)Ramp[1]; - - return fError; -} -#endif //! BC2 -#endif //! BC1 - -#if !defined(BC1_ENCODE_KERNEL_H) -#if !defined(BC2_ENCODE_KERNEL_H) -static CGU_FLOAT CompBlock1X(const CGU_UINT8 *_Blk, CGU_UINT32 dwBlockSize, - CGU_UINT8 nEndpoints[2], CGU_UINT8 *pcIndices, - CGU_UINT8 dwNumPoints, CGU_BOOL bFixedRampPoints, - CGU_INT _intPrec, CGU_INT _fracPrec, - CGU_BOOL _bFixedRamp) { - // convert the input and call the float equivalent. - CGU_FLOAT fBlk[MAX_BLOCK]; - for (CGU_UINT32 i = 0; i < dwBlockSize; i++) - fBlk[i] = (CGU_FLOAT)_Blk[i] / 255.f; - - return CompBlock1XF(fBlk, dwBlockSize, nEndpoints, pcIndices, dwNumPoints, - bFixedRampPoints, _intPrec, _fracPrec, _bFixedRamp); +// Compress a BC1 block +static CGU_Vec2ui CompressBlockBC1_UNORM(CGU_Vec3f rgbablockf[BLOCK_SIZE_4X4],CMP_IN CGU_FLOAT fquality,CGU_BOOL isSRGB) +{ + CGU_FLOAT BlockA[BLOCK_SIZE_4X4]; // Not used but required + CGU_Vec3f channelWeights = {1.0f,1.0f,1.0f}; + + return CompressBlockBC1_RGBA_Internal( + rgbablockf, + BlockA, // ToDo support nullptr + channelWeights, + 0,1, + fquality, + isSRGB); } -#endif -#if !defined(BC2_ENCODE_KERNEL_H) -static void EncodeAlphaBlock(CMP_GLOBAL CGU_UINT32 compressedBlock[2], - CGU_UINT8 nEndpoints[2], - CGU_UINT8 nIndices[BLOCK_SIZE_4X4]) { - compressedBlock[0] = - ((CGU_UINT32)nEndpoints[0]) | (((CGU_UINT32)nEndpoints[1]) << 8); - compressedBlock[1] = 0; - - for (CGU_INT i = 0; i < BLOCK_SIZE_4X4; i++) { - if (i < 5) - compressedBlock[0] |= (nIndices[i] & 0x7) << (16 + (i * 3)); - else if (i > 5) - compressedBlock[1] |= (nIndices[i] & 0x7) << (2 + (i - 6) * 3); - else { - compressedBlock[0] |= (nIndices[i] & 0x1) << 31; - compressedBlock[1] |= (nIndices[i] & 0x6) >> 1; - } - } +// Compress a BC2 block +static CGU_Vec4ui CompressBlockBC2_UNORM( CMP_IN CGU_Vec3f BlockRGB[16], CMP_IN CGU_FLOAT BlockA[16], CGU_FLOAT fquality, CGU_BOOL isSRGB) +{ + CGU_Vec2ui compressedBlocks; + CGU_Vec4ui compBlock; + compressedBlocks = cmp_compressExplicitAlphaBlock(BlockA); + compBlock.x = compressedBlocks.x; + compBlock.y = compressedBlocks.y; + + CGU_Vec3f channelWeights = {1.0f,1.0f,1.0f}; + compressedBlocks = CompressBlockBC1_RGBA_Internal( + BlockRGB, + BlockA, + channelWeights, + 0,1, + fquality, + isSRGB); + compBlock.z = compressedBlocks.x; + compBlock.w = compressedBlocks.y; + return compBlock; } -#endif -#endif - -#if !defined(BC1_ENCODE_KERNEL_H) -#if !defined(BC2_ENCODE_KERNEL_H) -static CGU_INT32 CompressAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], - CMP_GLOBAL CGU_UINT32 compressedBlock[2]) { - CGU_UINT8 nEndpoints[2][2]; - CGU_UINT8 nIndices[2][BLOCK_SIZE_4X4]; - CGU_FLOAT fError8 = CompBlock1X(alphaBlock, BLOCK_SIZE_4X4, nEndpoints[0], - nIndices[0], 8, false, 8, 0, true); - CGU_FLOAT fError6 = - (fError8 == 0.f) ? CMP_FLOAT_MAX - : CompBlock1X(alphaBlock, BLOCK_SIZE_4X4, nEndpoints[1], - nIndices[1], 6, true, 8, 0, true); - if (fError8 <= fError6) - EncodeAlphaBlock(compressedBlock, nEndpoints[0], nIndices[0]); - else - EncodeAlphaBlock(compressedBlock, nEndpoints[1], nIndices[1]); - return CGU_CORE_OK; +// Compress a BC3 block +static CGU_Vec4ui CompressBlockBC3_UNORM( CMP_IN CGU_Vec3f BlockRGB[16], CMP_IN CGU_FLOAT BlockA[16], CGU_FLOAT fquality,CGU_BOOL isSRGB) +{ + CGU_Vec4ui compBlock; + CGU_Vec2ui cmpBlock; + + cmpBlock = cmp_compressAlphaBlock(BlockA,fquality); + compBlock.x = cmpBlock.x; + compBlock.y = cmpBlock.y; + + CGU_Vec2ui compressedBlocks; + compressedBlocks = CompressBlockBC1_UNORM(BlockRGB, fquality,isSRGB); + compBlock.z = compressedBlocks.x; + compBlock.w = compressedBlocks.y; + return compBlock; } -#endif -#if !defined(BC2_ENCODE_KERNEL_H) -static void GetCompressedAlphaRamp(CGU_UINT8 alpha[8], - const CGU_UINT32 compressedBlock[2]) { - alpha[0] = (CGU_UINT8)(compressedBlock[0] & 0xff); - alpha[1] = (CGU_UINT8)((compressedBlock[0] >> 8) & 0xff); - - if (alpha[0] > alpha[1]) { - // 8-alpha block: derive the other six alphas. - // Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated. -#ifdef ASPM_GPU - alpha[2] = - (CGU_UINT8)((6 * alpha[0] + 1 * alpha[1] + 3) / 7); // bit code 010 - alpha[3] = - (CGU_UINT8)((5 * alpha[0] + 2 * alpha[1] + 3) / 7); // bit code 011 - alpha[4] = - (CGU_UINT8)((4 * alpha[0] + 3 * alpha[1] + 3) / 7); // bit code 100 - alpha[5] = - (CGU_UINT8)((3 * alpha[0] + 4 * alpha[1] + 3) / 7); // bit code 101 - alpha[6] = - (CGU_UINT8)((2 * alpha[0] + 5 * alpha[1] + 3) / 7); // bit code 110 - alpha[7] = - (CGU_UINT8)((1 * alpha[0] + 6 * alpha[1] + 3) / 7); // bit code 111 -#else - alpha[2] = static_cast((6 * alpha[0] + 1 * alpha[1] + 3) / - 7); // bit code 010 - alpha[3] = static_cast((5 * alpha[0] + 2 * alpha[1] + 3) / - 7); // bit code 011 - alpha[4] = static_cast((4 * alpha[0] + 3 * alpha[1] + 3) / - 7); // bit code 100 - alpha[5] = static_cast((3 * alpha[0] + 4 * alpha[1] + 3) / - 7); // bit code 101 - alpha[6] = static_cast((2 * alpha[0] + 5 * alpha[1] + 3) / - 7); // bit code 110 - alpha[7] = static_cast((1 * alpha[0] + 6 * alpha[1] + 3) / - 7); // bit code 111 -#endif - } else { - // 6-alpha block. - // Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated. -#ifdef ASPM_GPU - alpha[2] = - (CGU_UINT8)((4 * alpha[0] + 1 * alpha[1] + 2) / 5); // Bit code 010 - alpha[3] = - (CGU_UINT8)((3 * alpha[0] + 2 * alpha[1] + 2) / 5); // Bit code 011 - alpha[4] = - (CGU_UINT8)((2 * alpha[0] + 3 * alpha[1] + 2) / 5); // Bit code 100 - alpha[5] = - (CGU_UINT8)((1 * alpha[0] + 4 * alpha[1] + 2) / 5); // Bit code 101 -#else - alpha[2] = static_cast((4 * alpha[0] + 1 * alpha[1] + 2) / - 5); // Bit code 010 - alpha[3] = static_cast((3 * alpha[0] + 2 * alpha[1] + 2) / - 5); // Bit code 011 - alpha[4] = static_cast((2 * alpha[0] + 3 * alpha[1] + 2) / - 5); // Bit code 100 - alpha[5] = static_cast((1 * alpha[0] + 4 * alpha[1] + 2) / - 5); // Bit code 101 -#endif - alpha[6] = 0; // Bit code 110 - alpha[7] = 255; // Bit code 111 - } +// Compress a BC4 block +static CGU_Vec2ui CompressBlockBC4_UNORM( CMP_IN CGU_FLOAT Block[16], CGU_FLOAT fquality) +{ + CGU_Vec2ui cmpBlock; + cmpBlock = cmp_compressAlphaBlock(Block,fquality); + return cmpBlock; } -#endif // !BC2 -#if !defined(BC2_ENCODE_KERNEL_H) -static void DecompressAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4], - const CGU_UINT32 compressedBlock[2]) { - CGU_UINT8 alpha[8]; - GetCompressedAlphaRamp(alpha, compressedBlock); +// Compress a BC5 block +static CGU_Vec4ui CompressBlockBC5_UNORM( CMP_IN CGU_FLOAT BlockU[16], CMP_IN CGU_FLOAT BlockV[16], CGU_FLOAT fquality) +{ + CGU_Vec4ui compressedBlock = {0,0,0,0}; + CGU_Vec2ui cmpBlock; + cmpBlock = cmp_compressAlphaBlock(BlockU,fquality); + compressedBlock.x = cmpBlock.x; + compressedBlock.y = cmpBlock.y; - for (int i = 0; i < BLOCK_SIZE_4X4; i++) { - CGU_UINT32 index; - if (i < 5) - index = (compressedBlock[0] & (0x7 << (16 + (i * 3)))) >> (16 + (i * 3)); - else if (i > 5) - index = (compressedBlock[1] & (0x7 << (2 + (i - 6) * 3))) >> - (2 + (i - 6) * 3); - else { - index = (compressedBlock[0] & 0x80000000) >> 31; - index |= (compressedBlock[1] & 0x3) << 1; - } + cmpBlock = cmp_compressAlphaBlock(BlockV,fquality); + compressedBlock.z = cmpBlock.x; + compressedBlock.w = cmpBlock.y; - alphaBlock[i] = alpha[index]; - } + return compressedBlock; } -#endif // !BC2 -#endif // !BC1 + +// Compress a BC6 & BC7 UNORM block ToDo #endif diff --git a/extern/CMP_Core/shaders/Common_Def.h b/extern/CMP_Core/shaders/Common_Def.h index ed9e94a..b9df7a5 100644 --- a/extern/CMP_Core/shaders/Common_Def.h +++ b/extern/CMP_Core/shaders/Common_Def.h @@ -1,8 +1,5 @@ -#ifndef _COMMON_DEFINITIONS_H -#define _COMMON_DEFINITIONS_H - //=============================================================================== -// Copyright (c) 2007-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2007-2020 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2004-2006 ATI Technologies Inc. //=============================================================================== // @@ -25,11 +22,26 @@ // THE SOFTWARE. // // -// File Name: Common_Def.h +// File Name: Common_Def // Description: common definitions used for CPU/HPC/GPU // ////////////////////////////////////////////////////////////////////////////// +#ifndef _COMMON_DEFINITIONS_H +#define _COMMON_DEFINITIONS_H + +// The shaders for UE4 require extension in the form of .ush in place of standard .h +// this directive is used to make the change without users requiring to modify all of the include extensions +// specific to UE4 + +#ifdef ASPM_HLSL_UE4 +#pragma once +#define INC_cmp_math_vec4 "cmp_math_vec4.ush" +#define INC_cmp_math_func "cmp_math_func.ush" +#else +#define INC_cmp_math_vec4 "cmp_math_vec4.h" +#define INC_cmp_math_func "cmp_math_func.h" +#endif // Features #ifdef _WIN32 @@ -44,15 +56,24 @@ // Using OpenCL Compiler #ifdef __OPENCL_VERSION__ #define ASPM_GPU +#define ASPM_OPENCL #endif +// Using DirectX fxc Compiler +// Note use the /DASPM_HLSL command line to define this +#ifdef ASPM_HLSL +#define ASPM_GPU +#endif #ifdef _LINUX #undef ASPM_GPU +#undef ASPM_OPENCL +#ifndef ASPM_HLSL #include #include #include -#include "cmp_math_vec4.h" +#include INC_cmp_math_vec4 +#endif #endif #ifndef CMP_MAX @@ -63,6 +84,13 @@ #define CMP_MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif +#ifndef ASPM_GPU +#define CMP_STATIC_CAST(x,y) static_cast(y) +#else +#define CMP_STATIC_CAST(x,y) (x)(y) +#endif + + #define CMP_SET_BC13_DECODER_RGBA // Sets mapping BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha // RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3] // BC4 alpha always maps as AAAA to channels [0,1,2,3] @@ -70,8 +98,8 @@ //#define USE_BLOCK_LINEAR -#define CMP_FLOAT_MAX 3.402823466e+38F // max value used to detect an Error in processing -#define CMP_FLOAT_MAX_EXP 38 +#define CMP_FLOAT_MAX 3.402823466e+38F // max value used to detect an Error in processing +#define CMP_FLOAT_MAX_EXP 38 #define USE_PROCESS_SEPERATE_ALPHA // Enable this to use higher quality code using CompressDualIndexBlock #define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes #define MAX_DIMENSION_BIG 4 // Max number of channels (RGBA) @@ -84,6 +112,75 @@ //#define USE_BLOCK_LINEAR // Source Data is organized in linear form for each block : Experimental Code not fully developed //#define USE_DOUBLE // Default is to use float, enable to use double data types only for float definitions +//--------------------------------------------- +// Predefinitions for GPU and CPU compiled code +//--------------------------------------------- + +#ifdef ASPM_HLSL + // ==== Vectors ==== + typedef float2 CGU_Vec2f; + typedef float2 CGV_Vec2f; + typedef float3 CGU_Vec3f; + typedef float3 CGV_Vec3f; + typedef float4 CGU_Vec4f; + typedef float4 CGV_Vec4f; + + typedef int2 CGU_Vec2i; + typedef int2 CGV_Vec2i; + typedef uint2 CGU_Vec2ui; + typedef uint2 CGV_Vec2ui; + + typedef int3 CGU_Vec3i; + typedef int3 CGV_Vec3i; + typedef uint3 CGU_Vec3ui; + typedef uint3 CGV_Vec3ui; + + typedef uint4 CGU_Vec4ui; + typedef uint4 CGV_Vec4ui; + + // ==== Scalar Types ==== to remove from code + typedef int CGU_INT8; + typedef uint CGU_INT; + typedef int CGV_INT; + typedef uint CGU_UINT8; + typedef uint CGU_UINT; + + // ==== Scalar Types ==== + typedef int CGU_BOOL; + typedef int CGV_BOOL; + typedef int CGU_INT32; + typedef int CGV_INT32; + typedef uint CGU_UINT32; + typedef uint CGV_UINT32; + typedef float CGV_FLOAT; + typedef float CGU_FLOAT; + typedef min16float CGU_MIN16_FLOAT; // FP16 GPU support defaults to 32 bit if no HW support + + #define TRUE 1 + #define FALSE 0 + #define CMP_CDECL + + #define BC7_ENCODECLASS + #define CMP_EXPORT + #define INLINE + #define uniform + #define varying + #define CMP_GLOBAL + #define CMP_KERNEL + #define CMP_CONSTANT + #define CMP_STATIC + #define CMP_REFINOUT + #define CMP_PTRINOUT + #define CMP_INOUT inout + #define CMP_OUT out + #define CMP_IN in + #define CMP_UNUSED(x) (x); + #define CMP_UNROLL [unroll] + + + +#else + typedef enum { CGU_CORE_OK = 0, // No errors, call was successfull CGU_CORE_ERR_UNKOWN, // An unknown error occurred @@ -95,26 +192,41 @@ typedef enum { } CGU_ERROR_CODES; -//--------------------------------------------- -// Predefinitions for GPU and CPU compiled code -//--------------------------------------------- - -#ifdef ASPM_GPU // GPU Based code +#ifdef ASPM_OPENCL // GPU Based code using OpenCL // ==== Vectors ==== typedef float2 CGU_Vec2f; typedef float2 CGV_Vec2f; typedef float3 CMP_Vec3f; typedef float3 CGU_Vec3f; typedef float3 CGV_Vec3f; + typedef float4 CGU_Vec4f; + typedef float4 CGV_Vec4f; + typedef uchar3 CGU_Vec3uc; typedef uchar3 CGV_Vec3uc; + typedef uchar4 CMP_Vec4uc; typedef uchar4 CGU_Vec4uc; typedef uchar4 CGV_Vec4uc; + typedef int2 CGU_Vec2i; + typedef int2 CGV_Vec2i; + typedef int3 CGU_Vec3i; + typedef int3 CGV_Vec3i; + typedef int4 CGU_Vec4i; + typedef int4 CGV_Vec4i; + + typedef uint2 CGU_Vec2ui; + typedef uint2 CGV_Vec2ui; + typedef uint3 CGU_Vec3ui; + typedef uint3 CGV_Vec3ui; + typedef uint4 CGU_Vec4ui; + typedef uint4 CGV_Vec4ui; + + #define USE_BC7_SP_ERR_IDX - #define ASPM_PRINT(args) printf args #define BC7_ENCODECLASS + #define ASPM_PRINT(args) printf args #define CMP_EXPORT #define INLINE @@ -124,13 +236,20 @@ typedef enum { #define CMP_KERNEL __kernel #define CMP_CONSTANT __constant #define CMP_STATIC - + #define CMP_REFINOUT & + #define CMP_PTRINOUT * + #define CMP_INOUT + #define CMP_OUT + #define CMP_IN + #define CMP_UNUSED(x) + #define CMP_UNROLL typedef unsigned int CGU_DWORD; //32bits typedef int CGU_INT; //32bits - typedef int CGU_BOOL; + typedef bool CGU_BOOL; typedef unsigned short CGU_SHORT; //16bits typedef float CGU_FLOAT; + typedef half CGU_MIN16_FLOAT; // FP16 GPU support defaults to 32 bit if no HW support typedef unsigned int uint32; // need to remove this def typedef int CGV_INT; @@ -163,6 +282,15 @@ typedef enum { #else // CPU & ASPM definitions + #define CMP_REFINOUT & + #define CMP_PTRINOUT * + #define CMP_INOUT + #define CMP_OUT + #define CMP_IN + #define CMP_UNUSED(x) (void)(x); + #define CMP_UNROLL + + #ifdef ASPM // SPMD ,SIMD CPU code // using hybrid (CPU/GPU) aspm compiler #define ASPM_PRINT(args) print args @@ -185,6 +313,8 @@ typedef enum { typedef unsigned int64 uint64; typedef uniform float CGU_FLOAT; typedef varying float CGV_FLOAT; + typedef uniform float CGU_MIN16_FLOAT; + typedef uniform uint8 CGU_UINT8; typedef varying uint8 CGV_UINT8; @@ -192,18 +322,24 @@ typedef enum { typedef CGV_UINT8<4> CGV_Vec4uc; typedef CGU_UINT8<4> CGU_Vec4uc; + typedef CGU_FLOAT<2> CGU_Vec2f; + typedef CGV_FLOAT<2> CGV_Vec2f; typedef CGU_FLOAT<3> CGU_Vec3f; typedef CGV_FLOAT<3> CGV_Vec3f; + typedef CGU_FLOAT<4> CGU_Vec4f; + typedef CGV_FLOAT<4> CGV_Vec4f; - typedef CGU_FLOAT<2> CGU_Vec2f; - typedef CGV_FLOAT<2> CGV_Vec2f; + typedef CGU_UINT32<3> CGU_Vec3ui; + typedef CGV_UINT32<3> CGV_Vec3ui; - #define CMP_CDECL + typedef CGU_UINT32<4> CGU_Vec4ui; + typedef CGV_UINT32<4> CGV_Vec4ui; + #define CMP_CDECL #else // standard CPU code #include #include - #include "cmp_math_vec4.h" + #include INC_cmp_math_vec4 // using CPU compiler #define ASPM_PRINT(args) printf args @@ -227,7 +363,7 @@ typedef enum { typedef unsigned long uint64; typedef int8 CGV_BOOL; - typedef int8 CGU_BOOL; + typedef bool CGU_BOOL; typedef int16 CGU_WORD; typedef uint8 CGU_SHORT; typedef int64 CGU_LONG; @@ -235,8 +371,19 @@ typedef enum { typedef uniform float CGU_FLOAT; typedef varying float CGV_FLOAT; + typedef uniform float CGU_MIN16_FLOAT; + typedef uniform uint8 CGU_UINT8; typedef varying uint8 CGV_UINT8; + + typedef CMP_Vec3ui CGU_Vec3ui; + typedef CMP_Vec3ui CGV_Vec3ui; + + typedef CMP_Vec4ui CGU_Vec4ui; + typedef CMP_Vec4ui CGV_Vec4ui; + typedef CMP_Vec4f CGU_Vec4f; + typedef CMP_Vec4f CGV_Vec4f; + #if defined(WIN32) || defined(_WIN64) #define CMP_CDECL __cdecl #else @@ -275,9 +422,10 @@ typedef enum { typedef uint16 CGV_UINT16; typedef uint32 CGV_UINT32; typedef uint64 CGV_UINT64; -#endif // ASPM_GPU +#endif // else ASPM_GPU + typedef struct { CGU_UINT32 m_src_width; @@ -287,14 +435,20 @@ typedef struct CGU_FLOAT m_fquality; } Source_Info; +typedef unsigned char* CGU_PTR; + // Ref Compute_CPU_HPC struct texture_surface { - CGU_UINT8* ptr; + CGU_PTR ptr; CGU_INT width, height, stride; CGU_INT channels; }; -#endif + +#endif // else ASPM_HLSL + +#endif // Common_Def.h + \ No newline at end of file diff --git a/extern/CMP_Core/shaders/CopyFiles.bat b/extern/CMP_Core/shaders/CopyFiles.bat index fc125e9..e31392c 100644 --- a/extern/CMP_Core/shaders/CopyFiles.bat +++ b/extern/CMP_Core/shaders/CopyFiles.bat @@ -12,36 +12,55 @@ echo %mypath:~0,-1% IF NOT EXIST "%outpath%"\Plugins mkdir %BUILD_OUTDIR%Plugins IF NOT EXIST "%outpath%"\Plugins\Compute mkdir %BUILD_OUTDIR%Plugins\Compute -REM Build Vulkan Shader Binary -REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1.comp -o %BUILD_OUTDIR%\Plugins\Compute\BC1.spv +REM ToDo: Build Vulkan based shaders +REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1... -o %BUILD_OUTDIR%\Plugins\Compute\BC1....spv REM IF %ERRORLEVEL% GTR 0 exit 123 -REM Enabled in v4.0 +REM Remove any OpenCL compiled Binaries REM -REM del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_Kernel.cpp.cmp -REM del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_Kernel.cpp.cmp -REM del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_Kernel.cpp.cmp -REM del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_Kernel.cpp.cmp -REM del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_Kernel.cpp.cmp -REM del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_Kernel.cpp.cmp -REM del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp - -XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ -XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_kernel.cpp.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_kernel.hlsl.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_kernel.cpp.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_kernel.hlsl.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_kernel.cpp.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_kernel.hlsl.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_kernel.cpp.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_kernel.hlsl.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_kernel.cpp.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_kernel.hlsl.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.cpp.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.0.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_kernel.hlsl.1.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.0.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.1.cmp +del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.hlsl.2.cmp + +XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.hlsl" %BUILD_OUTDIR%Plugins\Compute\ +XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\ echo "Dependencies copied done" diff --git a/extern/CMP_Core/source/CMP_Core.h b/extern/CMP_Core/source/CMP_Core.h index d54dc27..d794a8c 100644 --- a/extern/CMP_Core/source/CMP_Core.h +++ b/extern/CMP_Core/source/CMP_Core.h @@ -1,5 +1,5 @@ //===================================================================== -// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -19,7 +19,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // -/// \file CMP_Core.h +/// \file CMP_Core.h CPU User Interface // //===================================================================== diff --git a/extern/CMP_Core/source/cmp_math_func.h b/extern/CMP_Core/source/cmp_math_func.h new file mode 100644 index 0000000..c2f8908 --- /dev/null +++ b/extern/CMP_Core/source/cmp_math_func.h @@ -0,0 +1,143 @@ +//===================================================================== +// Copyright 2020 (c), Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +//===================================================================== +#ifndef CMP_MATH_FUNC_H +#define CMP_MATH_FUNC_H + + +#include "Common_Def.h" + +#ifndef ASPM_GPU + +//============================================================================ +// Core API which have have GPU equivalents, defined here for HPC_CPU usage +//============================================================================ + +#include +using namespace std; + +static CGU_INT QSortFCmp(const void *Elem1, const void *Elem2) { + CGU_INT ret = 0; + + if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 < 0.) + ret = -1; + else if (*(CGU_FLOAT *)Elem1 - *(CGU_FLOAT *)Elem2 > 0.) + ret = 1; + return ret; +} + +static int QSortIntCmp(const void *Elem1, const void *Elem2) +{ + return (*(CGU_INT32 *)Elem1 - *(CGU_INT32 *)Elem2); +} + +static CGU_FLOAT dot(CMP_IN CGU_Vec3f Color,CMP_IN CGU_Vec3f Color2) +{ + CGU_FLOAT ColorDot; + ColorDot = (Color.x * Color2.x) + (Color.y * Color2.y) + (Color.z * Color2.z); + return ColorDot; +} + +static CGU_FLOAT dot(CMP_IN CGU_Vec2f Color,CMP_IN CGU_Vec2f Color2) +{ + CGU_FLOAT ColorDot; + ColorDot = Color.x * Color2.x + Color.y * Color2.y; + return ColorDot; +} + +static CGU_Vec2f abs(CMP_IN CGU_Vec2f Color) +{ + CGU_Vec2f ColorAbs; + ColorAbs.x = std::abs(Color.x); + ColorAbs.y = std::abs(Color.y); + return ColorAbs; +} + +static CGU_Vec3f fabs(CMP_IN CGU_Vec3f Color) +{ + CGU_Vec3f ColorAbs; + ColorAbs.x = std::abs(Color.x); + ColorAbs.y = std::abs(Color.y); + ColorAbs.z = std::abs(Color.z); + return ColorAbs; +} + +static CGU_Vec3f round(CMP_IN CGU_Vec3f Color) +{ + CGU_Vec3f ColorRound; + ColorRound.x = std::round(Color.x); + ColorRound.y = std::round(Color.y); + ColorRound.z = std::round(Color.z); + return ColorRound; +} + +static CGU_Vec2f round(CMP_IN CGU_Vec2f Color) +{ + CGU_Vec2f ColorRound; + ColorRound.x = std::round(Color.x); + ColorRound.y = std::round(Color.y); + return ColorRound; +} + +static CGU_Vec3f ceil(CMP_IN CGU_Vec3f Color) +{ + CGU_Vec3f ColorCeil; + ColorCeil.x = std::ceil(Color.x); + ColorCeil.y = std::ceil(Color.y); + ColorCeil.z = std::ceil(Color.z); + return ColorCeil; +} + +static CGU_Vec3f floor(CMP_IN CGU_Vec3f Color) +{ + CGU_Vec3f Colorfloor; + Colorfloor.x = std::floor(Color.x); + Colorfloor.y = std::floor(Color.y); + Colorfloor.z = std::floor(Color.z); + return Colorfloor; +} + +static CGU_Vec3f saturate(CGU_Vec3f value) +{ + if (value.x > 1.0f) value.x = 1.0f; + else + if (value.x < 0.0f) value.x = 0.0f; + + if (value.y > 1.0f) value.y = 1.0f; + else + if (value.y < 0.0f) value.y = 0.0f; + + if (value.z > 1.0f) value.z = 1.0f; + else + if (value.z < 0.0f) value.z = 0.0f; + + return value; +} + +#endif + +//============================================================================ +// Core API which are shared between GPU & CPU +//============================================================================ + +#endif // Header Guard + diff --git a/extern/CMP_Core/source/cmp_math_vec4.h b/extern/CMP_Core/source/cmp_math_vec4.h index d92080e..4467deb 100644 --- a/extern/CMP_Core/source/cmp_math_vec4.h +++ b/extern/CMP_Core/source/cmp_math_vec4.h @@ -30,14 +30,16 @@ #if defined (_LINUX) || defined (_WIN32) //============================================= VEC2 ================================================== +template class vec3; + template class Vec2 { public: - T x; T y; + // ***************************************** // Constructors // ***************************************** @@ -54,7 +56,6 @@ public: /// Single value constructor. Sets all components to the given value Vec2(const T& v) : x(v), y(v) {}; - // ***************************************** // Conversions/Assignment/Indexing // ***************************************** @@ -92,6 +93,13 @@ public: /// Subtraction const Vec2 operator-(const Vec2& rhs) const { return Vec2(x - rhs.x, y - rhs.y); }; + /// Multiply + const Vec2 operator*(const Vec2& rhs) const { return Vec2(x * rhs.x, y * rhs.y); }; + + /// Divide + const Vec2 operator/(const Vec2& rhs) const { return Vec2(x / rhs.x, y / rhs.y); }; + + /// Multiply by scalar const Vec2 operator*(const T& v) const { return Vec2(x * v, y * v); }; @@ -113,11 +121,12 @@ public: }; -typedef Vec2 CMP_Vec2f; -typedef Vec2 CGU_Vec2f; -typedef Vec2 CGV_Vec2f; -typedef Vec2 CMP_Vec2d; -typedef Vec2 CMP_Vec2i; +typedef Vec2 CMP_Vec2f; +typedef Vec2 CGU_Vec2f; +typedef Vec2 CGV_Vec2f; +typedef Vec2 CMP_Vec2d; +typedef Vec2 CMP_Vec2i; +typedef Vec2 CGU_Vec2ui; //} @@ -134,6 +143,7 @@ public: T y; T z; + // ***************************************** // Constructors // ***************************************** @@ -180,21 +190,24 @@ public: // Arithmetic // ***************************************** - /// Addition + /// Addition by vector const Vec3 operator+(const Vec3& rhs) const { return Vec3(x + rhs.x, y + rhs.y, z + rhs.z); }; - /// Subtraction + /// Subtraction by vector const Vec3 operator-(const Vec3& rhs) const { return Vec3(x - rhs.x, y - rhs.y, z - rhs.z); }; + /// Multiply by vector + const Vec3 operator*(const Vec3& rhs) const { return Vec3(x * rhs.x, y * rhs.y, z * rhs.z); }; + + /// Divide by vector + const Vec3 operator/(const Vec3& rhs) const { return Vec3(x / rhs.x, y / rhs.y, z / rhs.z); }; + /// Multiply by scalar const Vec3 operator*(const T& v) const { return Vec3(x * v, y * v, z * v); }; /// Divide by scalar const Vec3 operator/(const T& v) const { return Vec3(x / v, y / v, z / v); }; - /// Divide by vector - const Vec3 operator/(const Vec3& rhs) const { return Vec3(x / rhs.x, y / rhs.y, z / rhs.z); }; - /// Addition in-place Vec3& operator+= (const Vec3& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; }; @@ -208,6 +221,7 @@ public: Vec3& operator/= (const T& v) { x /= v; y /= v; z /= v; return *this; }; }; +typedef Vec3 CGU_Vec3bool; typedef Vec3 CGU_Vec3f; typedef Vec3 CGV_Vec3f; typedef Vec3 CGU_Vec3uc; @@ -217,6 +231,7 @@ typedef Vec3 CMP_Vec3f; typedef Vec3 CMP_Vec3d; typedef Vec3 CMP_Vec3i; typedef Vec3 CMP_Vec3uc; +typedef Vec3 CMP_Vec3ui; //============================================= VEC4 ================================================== template @@ -275,21 +290,24 @@ public: // Arithmetic // ***************************************** - /// Addition + /// Addition by vector const Vec4 operator+(const Vec4& rhs) const { return Vec4(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); }; - /// Subtraction + /// Subtraction by vector const Vec4 operator-(const Vec4& rhs) const { return Vec4(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); }; + /// Multiply by vector + const Vec4 operator*(const Vec4& rhs) const { return Vec4(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w); }; + + /// Divide by vector + const Vec4 operator/(const Vec4& rhs) const { return Vec4(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); }; + /// Multiply by scalar const Vec4 operator*(const T& v) const { return Vec4(x * v, y * v, z * v, w * v); }; /// Divide by scalar const Vec4 operator/(const T& v) const { return Vec4(x / v, y / v, z / v, w / v); }; - /// Divide by vector - const Vec4 operator/(const Vec4& rhs) const { return Vec4(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); }; - /// Addition in-place Vec4& operator+= (const Vec4& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; }; diff --git a/extern/CMP_Core/test/BlockConstants.h b/extern/CMP_Core/test/BlockConstants.h index e1c5232..389cd83 100644 --- a/extern/CMP_Core/test/BlockConstants.h +++ b/extern/CMP_Core/test/BlockConstants.h @@ -3,6 +3,7 @@ #include #include struct Block { const unsigned char* data; const unsigned char* color; }; +struct BlockBC6 { const unsigned char* data; const float* color; }; static const unsigned char BC1_Red_Ignore_Alpha [] {0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; static const unsigned char BC1_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -76,6 +77,102 @@ static const unsigned char BC3_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 static const unsigned char BC3_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; static const unsigned char BC3_Red_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 }; static const unsigned char BC3_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Red_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_White_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Black_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_Red_Blue_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Red_Green_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Green_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_Red_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_White_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Green_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_White_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Black_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_Red_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Red_Green_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Green_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC4_Red_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC4_Green_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Red_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_White_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Black_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Red_Blue_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Red_Green_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Green_Blue_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Red_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_White_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Green_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_White_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Black_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Red_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Red_Green_Ignore_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Green_Blue_Ignore_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC5_Red_Half_Alpha [] {0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24}; +static const unsigned char BC5_Green_Half_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Ignore_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Blue_Half_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_White_Half_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Black_Half_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Blue_Half_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Green_Half_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Green_Blue_Half_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Full_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Green_Full_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Blue_Full_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_White_Full_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Green_Ignore_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Black_Full_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Blue_Full_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Green_Full_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Green_Blue_Full_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Blue_Ignore_Alpha [] {0x3 , 0x0 , 0x0 , 0xde, 0x3 , 0x0 , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_White_Ignore_Alpha [] {0xe3, 0xbd, 0xf7, 0xde, 0x7b, 0xef, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Black_Ignore_Alpha [] {0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Blue_Ignore_Alpha [] {0xe3, 0x3d, 0x0 , 0xde, 0x7b, 0xf , 0x80, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Green_Ignore_Alpha [] {0xe3, 0xbd, 0xf7, 0x0 , 0x78, 0xef, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Green_Blue_Ignore_Alpha [] {0x3 , 0x80, 0xf7, 0xde, 0x3 , 0xe0, 0xbd, 0xf7, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Red_Half_Alpha [] {0xe3, 0x3d, 0x0 , 0x0 , 0x78, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC6_Green_Half_Alpha [] {0x3 , 0x80, 0xf7, 0x0 , 0x0 , 0xe0, 0x3d, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Ignore_Alpha [] {0x10, 0xff, 0x3 , 0x0 , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Blue_Half_Alpha [] {0x20, 0x0 , 0x0 , 0x0 , 0xf0, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_White_Half_Alpha [] {0x20, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Black_Half_Alpha [] {0x20, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Blue_Half_Alpha [] {0x20, 0xff, 0x3f, 0x0 , 0xf0, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Green_Half_Alpha [] {0x20, 0xff, 0xff, 0xff, 0xf , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Green_Blue_Half_Alpha [] {0x20, 0x0 , 0xc0, 0xff, 0xff, 0xff, 0xef, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Full_Alpha [] {0x10, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Green_Full_Alpha [] {0x10, 0x0 , 0xfc, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Blue_Full_Alpha [] {0x10, 0x0 , 0x0 , 0xf0, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_White_Full_Alpha [] {0x10, 0xff, 0xff, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Green_Ignore_Alpha [] {0x10, 0x0 , 0xfc, 0xf , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Black_Full_Alpha [] {0x10, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Blue_Full_Alpha [] {0x10, 0xff, 0x3 , 0xf0, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Green_Full_Alpha [] {0x10, 0xff, 0xff, 0xf , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Green_Blue_Full_Alpha [] {0x10, 0x0 , 0xfc, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Blue_Ignore_Alpha [] {0x10, 0x0 , 0x0 , 0xf0, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_White_Ignore_Alpha [] {0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Black_Ignore_Alpha [] {0x10, 0x0 , 0x0 , 0x0 , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Blue_Ignore_Alpha [] {0x10, 0xff, 0x3 , 0xf0, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Green_Ignore_Alpha [] {0x10, 0xff, 0xff, 0xf , 0xc0, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Green_Blue_Ignore_Alpha [] {0x10, 0x0 , 0xfc, 0xff, 0xff, 0xff, 0x3 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Red_Half_Alpha [] {0x20, 0xff, 0x3f, 0x0 , 0x0 , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; +static const unsigned char BC7_Green_Half_Alpha [] {0x20, 0x0 , 0xc0, 0xff, 0xf , 0x0 , 0xec, 0xed, 0x1 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }; Block BC1_Red_Ignore_Alpha_Block = {BC1_Red_Ignore_Alpha, nullptr}; Block BC1_Blue_Half_Alpha_Block = {BC1_Blue_Half_Alpha, nullptr}; @@ -149,6 +246,102 @@ Block BC3_Red_Green_Ignore_Alpha_Block = {BC3_Red_Green_Ignore_Alpha, nullptr}; Block BC3_Green_Blue_Ignore_Alpha_Block = {BC3_Green_Blue_Ignore_Alpha, nullptr}; Block BC3_Red_Half_Alpha_Block = {BC3_Red_Half_Alpha, nullptr}; Block BC3_Green_Half_Alpha_Block = {BC3_Green_Half_Alpha, nullptr}; +Block BC4_Red_Ignore_Alpha_Block = {BC4_Red_Ignore_Alpha, nullptr}; +Block BC4_Blue_Half_Alpha_Block = {BC4_Blue_Half_Alpha, nullptr}; +Block BC4_White_Half_Alpha_Block = {BC4_White_Half_Alpha, nullptr}; +Block BC4_Black_Half_Alpha_Block = {BC4_Black_Half_Alpha, nullptr}; +Block BC4_Red_Blue_Half_Alpha_Block = {BC4_Red_Blue_Half_Alpha, nullptr}; +Block BC4_Red_Green_Half_Alpha_Block = {BC4_Red_Green_Half_Alpha, nullptr}; +Block BC4_Green_Blue_Half_Alpha_Block = {BC4_Green_Blue_Half_Alpha, nullptr}; +Block BC4_Red_Full_Alpha_Block = {BC4_Red_Full_Alpha, nullptr}; +Block BC4_Green_Full_Alpha_Block = {BC4_Green_Full_Alpha, nullptr}; +Block BC4_Blue_Full_Alpha_Block = {BC4_Blue_Full_Alpha, nullptr}; +Block BC4_White_Full_Alpha_Block = {BC4_White_Full_Alpha, nullptr}; +Block BC4_Green_Ignore_Alpha_Block = {BC4_Green_Ignore_Alpha, nullptr}; +Block BC4_Black_Full_Alpha_Block = {BC4_Black_Full_Alpha, nullptr}; +Block BC4_Red_Blue_Full_Alpha_Block = {BC4_Red_Blue_Full_Alpha, nullptr}; +Block BC4_Red_Green_Full_Alpha_Block = {BC4_Red_Green_Full_Alpha, nullptr}; +Block BC4_Green_Blue_Full_Alpha_Block = {BC4_Green_Blue_Full_Alpha, nullptr}; +Block BC4_Blue_Ignore_Alpha_Block = {BC4_Blue_Ignore_Alpha, nullptr}; +Block BC4_White_Ignore_Alpha_Block = {BC4_White_Ignore_Alpha, nullptr}; +Block BC4_Black_Ignore_Alpha_Block = {BC4_Black_Ignore_Alpha, nullptr}; +Block BC4_Red_Blue_Ignore_Alpha_Block = {BC4_Red_Blue_Ignore_Alpha, nullptr}; +Block BC4_Red_Green_Ignore_Alpha_Block = {BC4_Red_Green_Ignore_Alpha, nullptr}; +Block BC4_Green_Blue_Ignore_Alpha_Block = {BC4_Green_Blue_Ignore_Alpha, nullptr}; +Block BC4_Red_Half_Alpha_Block = {BC4_Red_Half_Alpha, nullptr}; +Block BC4_Green_Half_Alpha_Block = {BC4_Green_Half_Alpha, nullptr}; +Block BC5_Red_Ignore_Alpha_Block = {BC5_Red_Ignore_Alpha, nullptr}; +Block BC5_Blue_Half_Alpha_Block = {BC5_Blue_Half_Alpha, nullptr}; +Block BC5_White_Half_Alpha_Block = {BC5_White_Half_Alpha, nullptr}; +Block BC5_Black_Half_Alpha_Block = {BC5_Black_Half_Alpha, nullptr}; +Block BC5_Red_Blue_Half_Alpha_Block = {BC5_Red_Blue_Half_Alpha, nullptr}; +Block BC5_Red_Green_Half_Alpha_Block = {BC5_Red_Green_Half_Alpha, nullptr}; +Block BC5_Green_Blue_Half_Alpha_Block = {BC5_Green_Blue_Half_Alpha, nullptr}; +Block BC5_Red_Full_Alpha_Block = {BC5_Red_Full_Alpha, nullptr}; +Block BC5_Green_Full_Alpha_Block = {BC5_Green_Full_Alpha, nullptr}; +Block BC5_Blue_Full_Alpha_Block = {BC5_Blue_Full_Alpha, nullptr}; +Block BC5_White_Full_Alpha_Block = {BC5_White_Full_Alpha, nullptr}; +Block BC5_Green_Ignore_Alpha_Block = {BC5_Green_Ignore_Alpha, nullptr}; +Block BC5_Black_Full_Alpha_Block = {BC5_Black_Full_Alpha, nullptr}; +Block BC5_Red_Blue_Full_Alpha_Block = {BC5_Red_Blue_Full_Alpha, nullptr}; +Block BC5_Red_Green_Full_Alpha_Block = {BC5_Red_Green_Full_Alpha, nullptr}; +Block BC5_Green_Blue_Full_Alpha_Block = {BC5_Green_Blue_Full_Alpha, nullptr}; +Block BC5_Blue_Ignore_Alpha_Block = {BC5_Blue_Ignore_Alpha, nullptr}; +Block BC5_White_Ignore_Alpha_Block = {BC5_White_Ignore_Alpha, nullptr}; +Block BC5_Black_Ignore_Alpha_Block = {BC5_Black_Ignore_Alpha, nullptr}; +Block BC5_Red_Blue_Ignore_Alpha_Block = {BC5_Red_Blue_Ignore_Alpha, nullptr}; +Block BC5_Red_Green_Ignore_Alpha_Block = {BC5_Red_Green_Ignore_Alpha, nullptr}; +Block BC5_Green_Blue_Ignore_Alpha_Block = {BC5_Green_Blue_Ignore_Alpha, nullptr}; +Block BC5_Red_Half_Alpha_Block = {BC5_Red_Half_Alpha, nullptr}; +Block BC5_Green_Half_Alpha_Block = {BC5_Green_Half_Alpha, nullptr}; +BlockBC6 BC6_Red_Ignore_Alpha_Block = {BC6_Red_Ignore_Alpha, nullptr}; +BlockBC6 BC6_Blue_Half_Alpha_Block = {BC6_Blue_Half_Alpha, nullptr}; +BlockBC6 BC6_White_Half_Alpha_Block = {BC6_White_Half_Alpha, nullptr}; +BlockBC6 BC6_Black_Half_Alpha_Block = {BC6_Black_Half_Alpha, nullptr}; +BlockBC6 BC6_Red_Blue_Half_Alpha_Block = {BC6_Red_Blue_Half_Alpha, nullptr}; +BlockBC6 BC6_Red_Green_Half_Alpha_Block = {BC6_Red_Green_Half_Alpha, nullptr}; +BlockBC6 BC6_Green_Blue_Half_Alpha_Block = {BC6_Green_Blue_Half_Alpha, nullptr}; +BlockBC6 BC6_Red_Full_Alpha_Block = {BC6_Red_Full_Alpha, nullptr}; +BlockBC6 BC6_Green_Full_Alpha_Block = {BC6_Green_Full_Alpha, nullptr}; +BlockBC6 BC6_Blue_Full_Alpha_Block = {BC6_Blue_Full_Alpha, nullptr}; +BlockBC6 BC6_White_Full_Alpha_Block = {BC6_White_Full_Alpha, nullptr}; +BlockBC6 BC6_Green_Ignore_Alpha_Block = {BC6_Green_Ignore_Alpha, nullptr}; +BlockBC6 BC6_Black_Full_Alpha_Block = {BC6_Black_Full_Alpha, nullptr}; +BlockBC6 BC6_Red_Blue_Full_Alpha_Block = {BC6_Red_Blue_Full_Alpha, nullptr}; +BlockBC6 BC6_Red_Green_Full_Alpha_Block = {BC6_Red_Green_Full_Alpha, nullptr}; +BlockBC6 BC6_Green_Blue_Full_Alpha_Block = {BC6_Green_Blue_Full_Alpha, nullptr}; +BlockBC6 BC6_Blue_Ignore_Alpha_Block = {BC6_Blue_Ignore_Alpha, nullptr}; +BlockBC6 BC6_White_Ignore_Alpha_Block = {BC6_White_Ignore_Alpha, nullptr}; +BlockBC6 BC6_Black_Ignore_Alpha_Block = {BC6_Black_Ignore_Alpha, nullptr}; +BlockBC6 BC6_Red_Blue_Ignore_Alpha_Block = {BC6_Red_Blue_Ignore_Alpha, nullptr}; +BlockBC6 BC6_Red_Green_Ignore_Alpha_Block = {BC6_Red_Green_Ignore_Alpha, nullptr}; +BlockBC6 BC6_Green_Blue_Ignore_Alpha_Block = {BC6_Green_Blue_Ignore_Alpha, nullptr}; +BlockBC6 BC6_Red_Half_Alpha_Block = {BC6_Red_Half_Alpha, nullptr}; +BlockBC6 BC6_Green_Half_Alpha_Block = {BC6_Green_Half_Alpha, nullptr}; +Block BC7_Red_Ignore_Alpha_Block = {BC7_Red_Ignore_Alpha, nullptr}; +Block BC7_Blue_Half_Alpha_Block = {BC7_Blue_Half_Alpha, nullptr}; +Block BC7_White_Half_Alpha_Block = {BC7_White_Half_Alpha, nullptr}; +Block BC7_Black_Half_Alpha_Block = {BC7_Black_Half_Alpha, nullptr}; +Block BC7_Red_Blue_Half_Alpha_Block = {BC7_Red_Blue_Half_Alpha, nullptr}; +Block BC7_Red_Green_Half_Alpha_Block = {BC7_Red_Green_Half_Alpha, nullptr}; +Block BC7_Green_Blue_Half_Alpha_Block = {BC7_Green_Blue_Half_Alpha, nullptr}; +Block BC7_Red_Full_Alpha_Block = {BC7_Red_Full_Alpha, nullptr}; +Block BC7_Green_Full_Alpha_Block = {BC7_Green_Full_Alpha, nullptr}; +Block BC7_Blue_Full_Alpha_Block = {BC7_Blue_Full_Alpha, nullptr}; +Block BC7_White_Full_Alpha_Block = {BC7_White_Full_Alpha, nullptr}; +Block BC7_Green_Ignore_Alpha_Block = {BC7_Green_Ignore_Alpha, nullptr}; +Block BC7_Black_Full_Alpha_Block = {BC7_Black_Full_Alpha, nullptr}; +Block BC7_Red_Blue_Full_Alpha_Block = {BC7_Red_Blue_Full_Alpha, nullptr}; +Block BC7_Red_Green_Full_Alpha_Block = {BC7_Red_Green_Full_Alpha, nullptr}; +Block BC7_Green_Blue_Full_Alpha_Block = {BC7_Green_Blue_Full_Alpha, nullptr}; +Block BC7_Blue_Ignore_Alpha_Block = {BC7_Blue_Ignore_Alpha, nullptr}; +Block BC7_White_Ignore_Alpha_Block = {BC7_White_Ignore_Alpha, nullptr}; +Block BC7_Black_Ignore_Alpha_Block = {BC7_Black_Ignore_Alpha, nullptr}; +Block BC7_Red_Blue_Ignore_Alpha_Block = {BC7_Red_Blue_Ignore_Alpha, nullptr}; +Block BC7_Red_Green_Ignore_Alpha_Block = {BC7_Red_Green_Ignore_Alpha, nullptr}; +Block BC7_Green_Blue_Ignore_Alpha_Block = {BC7_Green_Blue_Ignore_Alpha, nullptr}; +Block BC7_Red_Half_Alpha_Block = {BC7_Red_Half_Alpha, nullptr}; +Block BC7_Green_Half_Alpha_Block = {BC7_Green_Half_Alpha, nullptr}; static std::unordered_map blocks { { "BC1_Red_Ignore_Alpha", BC1_Red_Ignore_Alpha_Block}, @@ -222,7 +415,106 @@ static std::unordered_map blocks { { "BC3_Red_Green_Ignore_Alpha", BC3_Red_Green_Ignore_Alpha_Block}, { "BC3_Green_Blue_Ignore_Alpha", BC3_Green_Blue_Ignore_Alpha_Block}, { "BC3_Red_Half_Alpha", BC3_Red_Half_Alpha_Block}, - { "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block} + { "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block}, + { "BC4_Red_Ignore_Alpha", BC4_Red_Ignore_Alpha_Block}, + { "BC4_Blue_Half_Alpha", BC4_Blue_Half_Alpha_Block}, + { "BC4_White_Half_Alpha", BC4_White_Half_Alpha_Block}, + { "BC4_Black_Half_Alpha", BC4_Black_Half_Alpha_Block}, + { "BC4_Red_Blue_Half_Alpha", BC4_Red_Blue_Half_Alpha_Block}, + { "BC4_Red_Green_Half_Alpha", BC4_Red_Green_Half_Alpha_Block}, + { "BC4_Green_Blue_Half_Alpha", BC4_Green_Blue_Half_Alpha_Block}, + { "BC4_Red_Full_Alpha", BC4_Red_Full_Alpha_Block}, + { "BC4_Green_Full_Alpha", BC4_Green_Full_Alpha_Block}, + { "BC4_Blue_Full_Alpha", BC4_Blue_Full_Alpha_Block}, + { "BC4_White_Full_Alpha", BC4_White_Full_Alpha_Block}, + { "BC4_Green_Ignore_Alpha", BC4_Green_Ignore_Alpha_Block}, + { "BC4_Black_Full_Alpha", BC4_Black_Full_Alpha_Block}, + { "BC4_Red_Blue_Full_Alpha", BC4_Red_Blue_Full_Alpha_Block}, + { "BC4_Red_Green_Full_Alpha", BC4_Red_Green_Full_Alpha_Block}, + { "BC4_Green_Blue_Full_Alpha", BC4_Green_Blue_Full_Alpha_Block}, + { "BC4_Blue_Ignore_Alpha", BC4_Blue_Ignore_Alpha_Block}, + { "BC4_White_Ignore_Alpha", BC4_White_Ignore_Alpha_Block}, + { "BC4_Black_Ignore_Alpha", BC4_Black_Ignore_Alpha_Block}, + { "BC4_Red_Blue_Ignore_Alpha", BC4_Red_Blue_Ignore_Alpha_Block}, + { "BC4_Red_Green_Ignore_Alpha", BC4_Red_Green_Ignore_Alpha_Block}, + { "BC4_Green_Blue_Ignore_Alpha", BC4_Green_Blue_Ignore_Alpha_Block}, + { "BC4_Red_Half_Alpha", BC4_Red_Half_Alpha_Block}, + { "BC4_Green_Half_Alpha", BC4_Green_Half_Alpha_Block}, + { "BC5_Red_Ignore_Alpha", BC5_Red_Ignore_Alpha_Block}, + { "BC5_Blue_Half_Alpha", BC5_Blue_Half_Alpha_Block}, + { "BC5_White_Half_Alpha", BC5_White_Half_Alpha_Block}, + { "BC5_Black_Half_Alpha", BC5_Black_Half_Alpha_Block}, + { "BC5_Red_Blue_Half_Alpha", BC5_Red_Blue_Half_Alpha_Block}, + { "BC5_Red_Green_Half_Alpha", BC5_Red_Green_Half_Alpha_Block}, + { "BC5_Green_Blue_Half_Alpha", BC5_Green_Blue_Half_Alpha_Block}, + { "BC5_Red_Full_Alpha", BC5_Red_Full_Alpha_Block}, + { "BC5_Green_Full_Alpha", BC5_Green_Full_Alpha_Block}, + { "BC5_Blue_Full_Alpha", BC5_Blue_Full_Alpha_Block}, + { "BC5_White_Full_Alpha", BC5_White_Full_Alpha_Block}, + { "BC5_Green_Ignore_Alpha", BC5_Green_Ignore_Alpha_Block}, + { "BC5_Black_Full_Alpha", BC5_Black_Full_Alpha_Block}, + { "BC5_Red_Blue_Full_Alpha", BC5_Red_Blue_Full_Alpha_Block}, + { "BC5_Red_Green_Full_Alpha", BC5_Red_Green_Full_Alpha_Block}, + { "BC5_Green_Blue_Full_Alpha", BC5_Green_Blue_Full_Alpha_Block}, + { "BC5_Blue_Ignore_Alpha", BC5_Blue_Ignore_Alpha_Block}, + { "BC5_White_Ignore_Alpha", BC5_White_Ignore_Alpha_Block}, + { "BC5_Black_Ignore_Alpha", BC5_Black_Ignore_Alpha_Block}, + { "BC5_Red_Blue_Ignore_Alpha", BC5_Red_Blue_Ignore_Alpha_Block}, + { "BC5_Red_Green_Ignore_Alpha", BC5_Red_Green_Ignore_Alpha_Block}, + { "BC5_Green_Blue_Ignore_Alpha", BC5_Green_Blue_Ignore_Alpha_Block}, + { "BC5_Red_Half_Alpha", BC5_Red_Half_Alpha_Block}, + { "BC5_Green_Half_Alpha", BC5_Green_Half_Alpha_Block}, + { "BC7_Red_Ignore_Alpha", BC7_Red_Ignore_Alpha_Block}, + { "BC7_Blue_Half_Alpha", BC7_Blue_Half_Alpha_Block}, + { "BC7_White_Half_Alpha", BC7_White_Half_Alpha_Block}, + { "BC7_Black_Half_Alpha", BC7_Black_Half_Alpha_Block}, + { "BC7_Red_Blue_Half_Alpha", BC7_Red_Blue_Half_Alpha_Block}, + { "BC7_Red_Green_Half_Alpha", BC7_Red_Green_Half_Alpha_Block}, + { "BC7_Green_Blue_Half_Alpha", BC7_Green_Blue_Half_Alpha_Block}, + { "BC7_Red_Full_Alpha", BC7_Red_Full_Alpha_Block}, + { "BC7_Green_Full_Alpha", BC7_Green_Full_Alpha_Block}, + { "BC7_Blue_Full_Alpha", BC7_Blue_Full_Alpha_Block}, + { "BC7_White_Full_Alpha", BC7_White_Full_Alpha_Block}, + { "BC7_Green_Ignore_Alpha", BC7_Green_Ignore_Alpha_Block}, + { "BC7_Black_Full_Alpha", BC7_Black_Full_Alpha_Block}, + { "BC7_Red_Blue_Full_Alpha", BC7_Red_Blue_Full_Alpha_Block}, + { "BC7_Red_Green_Full_Alpha", BC7_Red_Green_Full_Alpha_Block}, + { "BC7_Green_Blue_Full_Alpha", BC7_Green_Blue_Full_Alpha_Block}, + { "BC7_Blue_Ignore_Alpha", BC7_Blue_Ignore_Alpha_Block}, + { "BC7_White_Ignore_Alpha", BC7_White_Ignore_Alpha_Block}, + { "BC7_Black_Ignore_Alpha", BC7_Black_Ignore_Alpha_Block}, + { "BC7_Red_Blue_Ignore_Alpha", BC7_Red_Blue_Ignore_Alpha_Block}, + { "BC7_Red_Green_Ignore_Alpha", BC7_Red_Green_Ignore_Alpha_Block}, + { "BC7_Green_Blue_Ignore_Alpha", BC7_Green_Blue_Ignore_Alpha_Block}, + { "BC7_Red_Half_Alpha", BC7_Red_Half_Alpha_Block}, + { "BC7_Green_Half_Alpha", BC7_Green_Half_Alpha_Block} +}; + +static std::unordered_map blocksBC6 { + { "BC6_Red_Ignore_Alpha", BC6_Red_Ignore_Alpha_Block}, + { "BC6_Blue_Half_Alpha", BC6_Blue_Half_Alpha_Block}, + { "BC6_White_Half_Alpha", BC6_White_Half_Alpha_Block}, + { "BC6_Black_Half_Alpha", BC6_Black_Half_Alpha_Block}, + { "BC6_Red_Blue_Half_Alpha", BC6_Red_Blue_Half_Alpha_Block}, + { "BC6_Red_Green_Half_Alpha", BC6_Red_Green_Half_Alpha_Block}, + { "BC6_Green_Blue_Half_Alpha", BC6_Green_Blue_Half_Alpha_Block}, + { "BC6_Red_Full_Alpha", BC6_Red_Full_Alpha_Block}, + { "BC6_Green_Full_Alpha", BC6_Green_Full_Alpha_Block}, + { "BC6_Blue_Full_Alpha", BC6_Blue_Full_Alpha_Block}, + { "BC6_White_Full_Alpha", BC6_White_Full_Alpha_Block}, + { "BC6_Green_Ignore_Alpha", BC6_Green_Ignore_Alpha_Block}, + { "BC6_Black_Full_Alpha", BC6_Black_Full_Alpha_Block}, + { "BC6_Red_Blue_Full_Alpha", BC6_Red_Blue_Full_Alpha_Block}, + { "BC6_Red_Green_Full_Alpha", BC6_Red_Green_Full_Alpha_Block}, + { "BC6_Green_Blue_Full_Alpha", BC6_Green_Blue_Full_Alpha_Block}, + { "BC6_Blue_Ignore_Alpha", BC6_Blue_Ignore_Alpha_Block}, + { "BC6_White_Ignore_Alpha", BC6_White_Ignore_Alpha_Block}, + { "BC6_Black_Ignore_Alpha", BC6_Black_Ignore_Alpha_Block}, + { "BC6_Red_Blue_Ignore_Alpha", BC6_Red_Blue_Ignore_Alpha_Block}, + { "BC6_Red_Green_Ignore_Alpha", BC6_Red_Green_Ignore_Alpha_Block}, + { "BC6_Green_Blue_Ignore_Alpha", BC6_Green_Blue_Ignore_Alpha_Block}, + { "BC6_Red_Half_Alpha", BC6_Red_Half_Alpha_Block}, + { "BC6_Green_Half_Alpha", BC6_Green_Half_Alpha_Block} }; #endif \ No newline at end of file diff --git a/extern/CMP_Core/test/CMakeLists.txt b/extern/CMP_Core/test/CMakeLists.txt index 710e8fa..f01de34 100644 --- a/extern/CMP_Core/test/CMakeLists.txt +++ b/extern/CMP_Core/test/CMakeLists.txt @@ -9,5 +9,7 @@ target_sources(Tests CompressonatorTests.cpp CompressonatorTests.h BlockConstants.h + ../../Applications/_Plugins/Common/UtilFuncs.cpp + ../../Applications/_Plugins/Common/UtilFuncs.h ) target_link_libraries(Tests Catch2::Catch2 CMP_Core) diff --git a/extern/CMP_Core/test/CompressonatorTests.cpp b/extern/CMP_Core/test/CompressonatorTests.cpp index a75c268..bb891ea 100644 --- a/extern/CMP_Core/test/CompressonatorTests.cpp +++ b/extern/CMP_Core/test/CompressonatorTests.cpp @@ -1,9 +1,10 @@ #include #include +#include "../../../Common/Lib/Ext/Catch2/catch.hpp" #include "../source/CMP_Core.h" +#include "../../Applications/_Plugins/Common/UtilFuncs.h" // incudes all compressed 4x4 blocks #include "BlockConstants.h" -#include "../../../Common/Lib/Ext/Catch2/catch.hpp" #include "CompressonatorTests.h" static const int BC1_BLOCK_SIZE = 8; @@ -41,6 +42,17 @@ static const std::map> colorValues{ { "Green_Blue_Full_Alpha" , { 0x0, 0xff, 0xff, 0x0 }} }; +const std::map> colorValuesBC6{ + { "Red_Ignore_Alpha", { 1.0f, 0.0f, 0.0f}}, + { "Green_Ignore_Alpha" , { 0.0f, 01.0f, 0.0f}}, + { "Blue_Ignore_Alpha" , { 0.0f, 0.0f, 1.0f}}, + { "White_Ignore_Alpha" , { 1.0f, 1.0f, 1.0f}}, + { "Black_Ignore_Alpha" , { 0.0f, 0.0f, 0.0f}}, + { "Red_Blue_Ignore_Alpha" , { 1.0f, 0.0f, 1.0f}}, + { "Red_Green_Ignore_Alpha" , { 1.0f, 1.0f, 0.0f}}, + { "Green_Blue_Ignore_Alpha", { 0.0f, 1.0f, 1.0f }}, +}; + //block storage format: [R, G, B, W, Black, RB, RG, GB]. Alpha: 100%, 50%, 0% enum ColorEnum { Red, Green, Blue, White, Black, Red_Blue, Red_Green, Green_Blue @@ -49,37 +61,39 @@ enum AlphaEnum { Ignore_Alpha, Half_Alpha, Full_Alpha }; enum CompEnum { - BC1, BC2, BC3 + BC1, BC2, BC3, BC4, BC5, BC7, BC6 }; -std::string BlockKeyName(CompEnum compression, ColorEnum color, AlphaEnum alpha) -{ +std::string BlockKeyName(CompEnum compression, ColorEnum color, AlphaEnum alpha) { std::string result = ""; switch (compression) { - case BC1: result += "BC1"; break; - case BC2: result += "BC2"; break; - case BC3: result += "BC3"; break; + case BC1: result += "BC1"; break; + case BC2: result += "BC2"; break; + case BC3: result += "BC3"; break; + case BC4: result += "BC4"; break; + case BC5: result += "BC5"; break; + case BC6: result += "BC6"; break; + case BC7: result += "BC7"; break; } switch (color) { - case Red: result += "_Red_"; break; - case Green: result += "_Green_"; break; - case Blue: result += "_Blue_"; break; - case White: result += "_White_"; break; - case Black: result += "_Black_"; break; - case Red_Blue: result += "_Red_Blue_"; break; - case Red_Green: result += "_Red_Green_"; break; - case Green_Blue: result += "_Green_Blue_"; break; + case Red: result += "_Red_"; break; + case Green: result += "_Green_"; break; + case Blue: result += "_Blue_"; break; + case White: result += "_White_"; break; + case Black: result += "_Black_"; break; + case Red_Blue: result += "_Red_Blue_"; break; + case Red_Green: result += "_Red_Green_"; break; + case Green_Blue: result += "_Green_Blue_"; break; } switch (alpha) { - case Ignore_Alpha: result += "Ignore_Alpha"; break; - case Half_Alpha: result += "Half_Alpha"; break; - case Full_Alpha: result += "Full_Alpha"; break; + case Ignore_Alpha: result += "Ignore_Alpha"; break; + case Half_Alpha: result += "Half_Alpha"; break; + case Full_Alpha: result += "Full_Alpha"; break; } return result; } -void AssignExpectedColorsToBlocks() -{ +void AssignExpectedColorsToBlocks() { ColorEnum color = Red; CompEnum comp = BC1; AlphaEnum alpha = Ignore_Alpha; @@ -87,14 +101,30 @@ void AssignExpectedColorsToBlocks() if (i % 24 == 0 && i > 0) { comp = static_cast(comp + 1); } + if (comp == CompEnum::BC6) //Bc6 blocks are stored in its own blocks map. + continue; + if (i % 8 == 0 && i > 0) { alpha = static_cast((alpha + 1) % 3); } const std::string keyBlocks = BlockKeyName(comp, color, alpha); std::string keyColor = keyBlocks; + keyColor.erase(0, 4); + auto it = (blocks.find(keyBlocks)); + it->second.color = ((colorValues.find(keyColor))->second).data(); + color = static_cast((color + 1) % 8); + } + // BC6 list + comp = CompEnum::BC6; + for (int i = 0; i < blocksBC6.size(); ++i){ + if (i % 8 == 0 && i > 0) { + alpha = static_cast((alpha + 1) % 3); + } + const std::string keyBlocks = BlockKeyName(comp, color, alpha); + std::string keyColor = BlockKeyName(comp, color, AlphaEnum::Ignore_Alpha); // string keyColor is in format BCn_color_alpha. To use it as key to access colorValues, delete the BCn_ part. keyColor.erase(0, 4); - ((blocks.find(keyBlocks))->second).color = ((colorValues.find(keyColor))->second).data(); + ((blocksBC6.find(keyBlocks))->second).color = ((colorValuesBC6.find(keyColor))->second).data(); color = static_cast((color + 1) % 8); } } @@ -104,7 +134,8 @@ bool ColorMatches(unsigned char* buffer, const unsigned char* expectedColor, boo unsigned char expectedColorBuffer[64]; // handle formats that do not support alpha. if (ignoreAlpha) { - // if alpha is ignored, BC should set all values to 0. Except the alpha value which can be 0 or 0xff only. + // if alpha is ignored, BC should set all values to 0. exept the alpha value which can be 0 or 0xff only. + // Since all blocks have the same color, there should always be the same alpha. if (buffer[3] != 0 && buffer[3] != 255) { return false; } @@ -116,19 +147,60 @@ bool ColorMatches(unsigned char* buffer, const unsigned char* expectedColor, boo // Set alpha value to the alpha value in the first pixel of the decompressed buffer. // The buffer contains only one color, so all pixels should have the same values. expColorWithoutAlpha[3] = buffer[3]; - + for (int idx = 0; idx < DECOMPRESSED_BLOCK_SIZE / 4; ++idx) { memcpy(expectedColorBuffer + (idx * 4), expColorWithoutAlpha, 4); } return memcmp(&expectedColorBuffer, buffer, DECOMPRESSED_BLOCK_SIZE) == 0; } - + for (int idx = 0; idx < DECOMPRESSED_BLOCK_SIZE / 4; ++idx) { memcpy(expectedColorBuffer + (idx * 4), expectedColor, 4); } return memcmp(&expectedColorBuffer, buffer, DECOMPRESSED_BLOCK_SIZE) == 0; } + +bool ColorMatchesBC4(unsigned char* buffer, const unsigned char* expectedColor) { + unsigned char expectedColorBuffer[16]; + for (int i = 0; i < 16; ++i) { + expectedColorBuffer[i] = expectedColor[0]; //Bc4 supports red channel only. + } + return memcmp(&expectedColorBuffer, buffer, sizeof(expectedColorBuffer)) == 0; +} + +bool ColorMatchesBC5(unsigned char* bufferR, unsigned char* bufferG, const unsigned char* expectedColor) { + unsigned char expectedColorR[16]; + unsigned char expectedColorG[16]; + for (int i = 0; i < 16; ++i) { + expectedColorR[i] = expectedColor[0]; //Bc5 supports red channel and green channel only. + expectedColorG[i] = expectedColor[1]; + } + return memcmp(&expectedColorR, bufferR, 16) == 0 && memcmp(&expectedColorG, bufferG, 16) == 0; +} + +bool ColorMatchesBC6(unsigned short* buffer, const float* expectedColor) +{ + float bufferInFloat[48]; + float expectedColorBuffer[48]; + for (int i = 0; i < 16; ++i) { + // SF16: 1:5:10 : 1bit signed, 5bit exponent, 10bit mantissa + // DecompressBC6 stores decompressed color as SF16 + // BC6 stores RGB channels only + for (int channel = 0; channel < 3; ++channel) { + // convert expcolor float to half-float with intrinsic + //__m128 val = _mm_load_ps1(&expColor); + //__m128i half = _mm_cvtps_ph(val, 0); + //unsigned short expColorSh = _mm_extract_epi32(half, 0); + unsigned short color = buffer[i * 3 + channel]; + bufferInFloat[i * 3 + channel] = HalfToFloat(color); + + expectedColorBuffer[i * 3 + channel] = expectedColor[channel]; + } + } + return memcmp(&expectedColorBuffer, bufferInFloat, sizeof(expectedColorBuffer)) == 0; +} + //*************************************************************************************** TEST_CASE("BC1_Red_Ignore_Alpha", "[BC1_Red_Ignore_Alpha]") @@ -136,1008 +208,2401 @@ TEST_CASE("BC1_Red_Ignore_Alpha", "[BC1_Red_Ignore_Alpha]") const auto block = blocks.find("BC1_Red_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Blue_Half_Alpha", "[BC1_Blue_Half_Alpha]") { const auto block = blocks.find("BC1_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_White_Half_Alpha", "[BC1_White_Half_Alpha]") { const auto block = blocks.find("BC1_White_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Black_Half_Alpha", "[BC1_Black_Half_Alpha]") { const auto block = blocks.find("BC1_Black_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Red_Blue_Half_Alpha", "[BC1_Red_Blue_Half_Alpha]") { const auto block = blocks.find("BC1_Red_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Red_Green_Half_Alpha", "[BC1_Red_Green_Half_Alpha]") { const auto block = blocks.find("BC1_Red_Green_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Green_Blue_Half_Alpha", "[BC1_Green_Blue_Half_Alpha]") { const auto block = blocks.find("BC1_Green_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Red_Full_Alpha", "[BC1_Red_Full_Alpha]") { const auto block = blocks.find("BC1_Red_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Green_Full_Alpha", "[BC1_Green_Full_Alpha]") { const auto block = blocks.find("BC1_Green_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Blue_Full_Alpha", "[BC1_Blue_Full_Alpha]") { const auto block = blocks.find("BC1_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_White_Full_Alpha", "[BC1_White_Full_Alpha]") { const auto block = blocks.find("BC1_White_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Green_Ignore_Alpha", "[BC1_Green_Ignore_Alpha]") { const auto block = blocks.find("BC1_Green_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Black_Full_Alpha", "[BC1_Black_Full_Alpha]") { const auto block = blocks.find("BC1_Black_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Red_Blue_Full_Alpha", "[BC1_Red_Blue_Full_Alpha]") { const auto block = blocks.find("BC1_Red_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Red_Green_Full_Alpha", "[BC1_Red_Green_Full_Alpha]") { const auto block = blocks.find("BC1_Red_Green_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Green_Blue_Full_Alpha", "[BC1_Green_Blue_Full_Alpha]") { const auto block = blocks.find("BC1_Green_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Blue_Ignore_Alpha", "[BC1_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC1_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_White_Ignore_Alpha", "[BC1_White_Ignore_Alpha]") { const auto block = blocks.find("BC1_White_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Black_Ignore_Alpha", "[BC1_Black_Ignore_Alpha]") { const auto block = blocks.find("BC1_Black_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Red_Blue_Ignore_Alpha", "[BC1_Red_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC1_Red_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Red_Green_Ignore_Alpha", "[BC1_Red_Green_Ignore_Alpha]") { const auto block = blocks.find("BC1_Red_Green_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Green_Blue_Ignore_Alpha", "[BC1_Green_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC1_Green_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Red_Half_Alpha", "[BC1_Red_Half_Alpha]") { const auto block = blocks.find("BC1_Red_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC1_Green_Half_Alpha", "[BC1_Green_Half_Alpha]") { const auto block = blocks.find("BC1_Green_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC1(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,true)); + CHECK(ColorMatches(decompBlock, blockColor, true)); unsigned char compBlock[8]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC1(decompBlock, 16, compBlock, nullptr); DecompressBlockBC1(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,true)); + CHECK(ColorMatches(decompCompBlock, blockColor, true)); } TEST_CASE("BC2_Red_Ignore_Alpha", "[BC2_Red_Ignore_Alpha]") { const auto block = blocks.find("BC2_Red_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Blue_Half_Alpha", "[BC2_Blue_Half_Alpha]") { const auto block = blocks.find("BC2_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_White_Half_Alpha", "[BC2_White_Half_Alpha]") { const auto block = blocks.find("BC2_White_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Black_Half_Alpha", "[BC2_Black_Half_Alpha]") { const auto block = blocks.find("BC2_Black_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Red_Blue_Half_Alpha", "[BC2_Red_Blue_Half_Alpha]") { const auto block = blocks.find("BC2_Red_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Red_Green_Half_Alpha", "[BC2_Red_Green_Half_Alpha]") { const auto block = blocks.find("BC2_Red_Green_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Green_Blue_Half_Alpha", "[BC2_Green_Blue_Half_Alpha]") { const auto block = blocks.find("BC2_Green_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Red_Full_Alpha", "[BC2_Red_Full_Alpha]") { const auto block = blocks.find("BC2_Red_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Green_Full_Alpha", "[BC2_Green_Full_Alpha]") { const auto block = blocks.find("BC2_Green_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Blue_Full_Alpha", "[BC2_Blue_Full_Alpha]") { const auto block = blocks.find("BC2_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_White_Full_Alpha", "[BC2_White_Full_Alpha]") { const auto block = blocks.find("BC2_White_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Green_Ignore_Alpha", "[BC2_Green_Ignore_Alpha]") { const auto block = blocks.find("BC2_Green_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Black_Full_Alpha", "[BC2_Black_Full_Alpha]") { const auto block = blocks.find("BC2_Black_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Red_Blue_Full_Alpha", "[BC2_Red_Blue_Full_Alpha]") { const auto block = blocks.find("BC2_Red_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Red_Green_Full_Alpha", "[BC2_Red_Green_Full_Alpha]") { const auto block = blocks.find("BC2_Red_Green_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Green_Blue_Full_Alpha", "[BC2_Green_Blue_Full_Alpha]") { const auto block = blocks.find("BC2_Green_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Blue_Ignore_Alpha", "[BC2_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC2_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_White_Ignore_Alpha", "[BC2_White_Ignore_Alpha]") { const auto block = blocks.find("BC2_White_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Black_Ignore_Alpha", "[BC2_Black_Ignore_Alpha]") { const auto block = blocks.find("BC2_Black_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Red_Blue_Ignore_Alpha", "[BC2_Red_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC2_Red_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Red_Green_Ignore_Alpha", "[BC2_Red_Green_Ignore_Alpha]") { const auto block = blocks.find("BC2_Red_Green_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Green_Blue_Ignore_Alpha", "[BC2_Green_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC2_Green_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Red_Half_Alpha", "[BC2_Red_Half_Alpha]") { const auto block = blocks.find("BC2_Red_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC2_Green_Half_Alpha", "[BC2_Green_Half_Alpha]") { const auto block = blocks.find("BC2_Green_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC2(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC2(decompBlock, 16, compBlock, nullptr); DecompressBlockBC2(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Ignore_Alpha", "[BC3_Red_Ignore_Alpha]") { const auto block = blocks.find("BC3_Red_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Blue_Half_Alpha", "[BC3_Blue_Half_Alpha]") { const auto block = blocks.find("BC3_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_White_Half_Alpha", "[BC3_White_Half_Alpha]") { const auto block = blocks.find("BC3_White_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Black_Half_Alpha", "[BC3_Black_Half_Alpha]") { const auto block = blocks.find("BC3_Black_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Blue_Half_Alpha", "[BC3_Red_Blue_Half_Alpha]") { const auto block = blocks.find("BC3_Red_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Green_Half_Alpha", "[BC3_Red_Green_Half_Alpha]") { const auto block = blocks.find("BC3_Red_Green_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Green_Blue_Half_Alpha", "[BC3_Green_Blue_Half_Alpha]") { const auto block = blocks.find("BC3_Green_Blue_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Full_Alpha", "[BC3_Red_Full_Alpha]") { const auto block = blocks.find("BC3_Red_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Green_Full_Alpha", "[BC3_Green_Full_Alpha]") { const auto block = blocks.find("BC3_Green_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Blue_Full_Alpha", "[BC3_Blue_Full_Alpha]") { const auto block = blocks.find("BC3_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_White_Full_Alpha", "[BC3_White_Full_Alpha]") { const auto block = blocks.find("BC3_White_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Green_Ignore_Alpha", "[BC3_Green_Ignore_Alpha]") { const auto block = blocks.find("BC3_Green_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Black_Full_Alpha", "[BC3_Black_Full_Alpha]") { const auto block = blocks.find("BC3_Black_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Blue_Full_Alpha", "[BC3_Red_Blue_Full_Alpha]") { const auto block = blocks.find("BC3_Red_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Green_Full_Alpha", "[BC3_Red_Green_Full_Alpha]") { const auto block = blocks.find("BC3_Red_Green_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Green_Blue_Full_Alpha", "[BC3_Green_Blue_Full_Alpha]") { const auto block = blocks.find("BC3_Green_Blue_Full_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Blue_Ignore_Alpha", "[BC3_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC3_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_White_Ignore_Alpha", "[BC3_White_Ignore_Alpha]") { const auto block = blocks.find("BC3_White_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Black_Ignore_Alpha", "[BC3_Black_Ignore_Alpha]") { const auto block = blocks.find("BC3_Black_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Blue_Ignore_Alpha", "[BC3_Red_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC3_Red_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Green_Ignore_Alpha", "[BC3_Red_Green_Ignore_Alpha]") { const auto block = blocks.find("BC3_Red_Green_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Green_Blue_Ignore_Alpha", "[BC3_Green_Blue_Ignore_Alpha]") { const auto block = blocks.find("BC3_Green_Blue_Ignore_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Red_Half_Alpha", "[BC3_Red_Half_Alpha]") { const auto block = blocks.find("BC3_Red_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); } TEST_CASE("BC3_Green_Half_Alpha", "[BC3_Green_Half_Alpha]") { const auto block = blocks.find("BC3_Green_Half_Alpha")->second; const auto blockData = block.data; const auto blockColor = block.color; - unsigned char decompBlock [64]; + unsigned char decompBlock[64]; DecompressBlockBC3(blockData, decompBlock, nullptr); - CHECK(ColorMatches(decompBlock, blockColor,false)); + CHECK(ColorMatches(decompBlock, blockColor, false)); unsigned char compBlock[16]; - unsigned char decompCompBlock [64]; + unsigned char decompCompBlock[64]; CompressBlockBC3(decompBlock, 16, compBlock, nullptr); DecompressBlockBC3(compBlock, decompCompBlock, nullptr); - CHECK(ColorMatches(decompCompBlock, blockColor,false)); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC4_Red_Ignore_Alpha", "[BC4_Red_Ignore_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Blue_Half_Alpha", "[BC4_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC4_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_White_Half_Alpha", "[BC4_White_Half_Alpha]") +{ + const auto block = blocks.find("BC4_White_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Black_Half_Alpha", "[BC4_Black_Half_Alpha]") +{ + const auto block = blocks.find("BC4_Black_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Red_Blue_Half_Alpha", "[BC4_Red_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Red_Green_Half_Alpha", "[BC4_Red_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Green_Blue_Half_Alpha", "[BC4_Green_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC4_Green_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Red_Full_Alpha", "[BC4_Red_Full_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Green_Full_Alpha", "[BC4_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC4_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Blue_Full_Alpha", "[BC4_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC4_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_White_Full_Alpha", "[BC4_White_Full_Alpha]") +{ + const auto block = blocks.find("BC4_White_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Green_Ignore_Alpha", "[BC4_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC4_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Black_Full_Alpha", "[BC4_Black_Full_Alpha]") +{ + const auto block = blocks.find("BC4_Black_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Red_Blue_Full_Alpha", "[BC4_Red_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Red_Green_Full_Alpha", "[BC4_Red_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Green_Blue_Full_Alpha", "[BC4_Green_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC4_Green_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Blue_Ignore_Alpha", "[BC4_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC4_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_White_Ignore_Alpha", "[BC4_White_Ignore_Alpha]") +{ + const auto block = blocks.find("BC4_White_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Black_Ignore_Alpha", "[BC4_Black_Ignore_Alpha]") +{ + const auto block = blocks.find("BC4_Black_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Red_Blue_Ignore_Alpha", "[BC4_Red_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); } +TEST_CASE("BC4_Red_Green_Ignore_Alpha", "[BC4_Red_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Green_Blue_Ignore_Alpha", "[BC4_Green_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC4_Green_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Red_Half_Alpha", "[BC4_Red_Half_Alpha]") +{ + const auto block = blocks.find("BC4_Red_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC4_Green_Half_Alpha", "[BC4_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC4_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[16]; + DecompressBlockBC4(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC4(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[16]; + CompressBlockBC4(decompBlock, 4, compBlock, nullptr); + DecompressBlockBC4(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC4(decompCompBlock, blockColor)); +} +TEST_CASE("BC5_Red_Ignore_Alpha", "[BC5_Red_Ignore_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Blue_Half_Alpha", "[BC5_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC5_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_White_Half_Alpha", "[BC5_White_Half_Alpha]") +{ + const auto block = blocks.find("BC5_White_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Black_Half_Alpha", "[BC5_Black_Half_Alpha]") +{ + const auto block = blocks.find("BC5_Black_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Red_Blue_Half_Alpha", "[BC5_Red_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Red_Green_Half_Alpha", "[BC5_Red_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Green_Blue_Half_Alpha", "[BC5_Green_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC5_Green_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Red_Full_Alpha", "[BC5_Red_Full_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Green_Full_Alpha", "[BC5_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC5_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Blue_Full_Alpha", "[BC5_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC5_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_White_Full_Alpha", "[BC5_White_Full_Alpha]") +{ + const auto block = blocks.find("BC5_White_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Green_Ignore_Alpha", "[BC5_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC5_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Black_Full_Alpha", "[BC5_Black_Full_Alpha]") +{ + const auto block = blocks.find("BC5_Black_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Red_Blue_Full_Alpha", "[BC5_Red_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Red_Green_Full_Alpha", "[BC5_Red_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Green_Blue_Full_Alpha", "[BC5_Green_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC5_Green_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Blue_Ignore_Alpha", "[BC5_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC5_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_White_Ignore_Alpha", "[BC5_White_Ignore_Alpha]") +{ + const auto block = blocks.find("BC5_White_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Black_Ignore_Alpha", "[BC5_Black_Ignore_Alpha]") +{ + const auto block = blocks.find("BC5_Black_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Red_Blue_Ignore_Alpha", "[BC5_Red_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Red_Green_Ignore_Alpha", "[BC5_Red_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Green_Blue_Ignore_Alpha", "[BC5_Green_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC5_Green_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Red_Half_Alpha", "[BC5_Red_Half_Alpha]") +{ + const auto block = blocks.find("BC5_Red_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC5_Green_Half_Alpha", "[BC5_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC5_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlockR[16]; + unsigned char decompBlockG[16]; + DecompressBlockBC5(blockData, decompBlockR, decompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompBlockR, decompBlockG, blockColor)); + unsigned char compBlock[16]; + unsigned char decompCompBlockR[16]; + unsigned char decompCompBlockG[16]; + CompressBlockBC5(decompBlockR, 4, decompBlockG, 4, compBlock, nullptr); + DecompressBlockBC5(compBlock, decompCompBlockR, decompCompBlockG, nullptr); + CHECK(ColorMatchesBC5(decompCompBlockR, decompCompBlockG, blockColor)); +} +TEST_CASE("BC6_Red_Ignore_Alpha", "[BC6_Red_Ignore_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Blue_Half_Alpha", "[BC6_Blue_Half_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_White_Half_Alpha", "[BC6_White_Half_Alpha]") +{ + const auto block = blocksBC6.find("BC6_White_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Black_Half_Alpha", "[BC6_Black_Half_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Black_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Red_Blue_Half_Alpha", "[BC6_Red_Blue_Half_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Red_Green_Half_Alpha", "[BC6_Red_Green_Half_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Green_Blue_Half_Alpha", "[BC6_Green_Blue_Half_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Green_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Red_Full_Alpha", "[BC6_Red_Full_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Green_Full_Alpha", "[BC6_Green_Full_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Blue_Full_Alpha", "[BC6_Blue_Full_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_White_Full_Alpha", "[BC6_White_Full_Alpha]") +{ + const auto block = blocksBC6.find("BC6_White_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Green_Ignore_Alpha", "[BC6_Green_Ignore_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Black_Full_Alpha", "[BC6_Black_Full_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Black_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Red_Blue_Full_Alpha", "[BC6_Red_Blue_Full_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Red_Green_Full_Alpha", "[BC6_Red_Green_Full_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Green_Blue_Full_Alpha", "[BC6_Green_Blue_Full_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Green_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Blue_Ignore_Alpha", "[BC6_Blue_Ignore_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_White_Ignore_Alpha", "[BC6_White_Ignore_Alpha]") +{ + const auto block = blocksBC6.find("BC6_White_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Black_Ignore_Alpha", "[BC6_Black_Ignore_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Black_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Red_Blue_Ignore_Alpha", "[BC6_Red_Blue_Ignore_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Red_Green_Ignore_Alpha", "[BC6_Red_Green_Ignore_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Green_Blue_Ignore_Alpha", "[BC6_Green_Blue_Ignore_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Green_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Red_Half_Alpha", "[BC6_Red_Half_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Red_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC6_Green_Half_Alpha", "[BC6_Green_Half_Alpha]") +{ + const auto block = blocksBC6.find("BC6_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned short decompBlock[64]; + DecompressBlockBC6(blockData, decompBlock, nullptr); + CHECK(ColorMatchesBC6(decompBlock, blockColor)); + unsigned char compBlock[16]; + unsigned short decompCompBlock[48]; + CompressBlockBC6(decompBlock, 12, compBlock, nullptr); + DecompressBlockBC6(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatchesBC6(decompCompBlock, blockColor)); +} +TEST_CASE("BC7_Red_Ignore_Alpha", "[BC7_Red_Ignore_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Blue_Half_Alpha", "[BC7_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC7_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_White_Half_Alpha", "[BC7_White_Half_Alpha]") +{ + const auto block = blocks.find("BC7_White_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Black_Half_Alpha", "[BC7_Black_Half_Alpha]") +{ + const auto block = blocks.find("BC7_Black_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Red_Blue_Half_Alpha", "[BC7_Red_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Red_Green_Half_Alpha", "[BC7_Red_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Green_Blue_Half_Alpha", "[BC7_Green_Blue_Half_Alpha]") +{ + const auto block = blocks.find("BC7_Green_Blue_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Red_Full_Alpha", "[BC7_Red_Full_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Green_Full_Alpha", "[BC7_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC7_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Blue_Full_Alpha", "[BC7_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC7_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_White_Full_Alpha", "[BC7_White_Full_Alpha]") +{ + const auto block = blocks.find("BC7_White_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Green_Ignore_Alpha", "[BC7_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC7_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Black_Full_Alpha", "[BC7_Black_Full_Alpha]") +{ + const auto block = blocks.find("BC7_Black_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Red_Blue_Full_Alpha", "[BC7_Red_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Red_Green_Full_Alpha", "[BC7_Red_Green_Full_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Green_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Green_Blue_Full_Alpha", "[BC7_Green_Blue_Full_Alpha]") +{ + const auto block = blocks.find("BC7_Green_Blue_Full_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Blue_Ignore_Alpha", "[BC7_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC7_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_White_Ignore_Alpha", "[BC7_White_Ignore_Alpha]") +{ + const auto block = blocks.find("BC7_White_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Black_Ignore_Alpha", "[BC7_Black_Ignore_Alpha]") +{ + const auto block = blocks.find("BC7_Black_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Red_Blue_Ignore_Alpha", "[BC7_Red_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Red_Green_Ignore_Alpha", "[BC7_Red_Green_Ignore_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Green_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Green_Blue_Ignore_Alpha", "[BC7_Green_Blue_Ignore_Alpha]") +{ + const auto block = blocks.find("BC7_Green_Blue_Ignore_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Red_Half_Alpha", "[BC7_Red_Half_Alpha]") +{ + const auto block = blocks.find("BC7_Red_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} +TEST_CASE("BC7_Green_Half_Alpha", "[BC7_Green_Half_Alpha]") +{ + const auto block = blocks.find("BC7_Green_Half_Alpha")->second; + const auto blockData = block.data; + const auto blockColor = block.color; + unsigned char decompBlock[64]; + DecompressBlockBC7(blockData, decompBlock, nullptr); + CHECK(ColorMatches(decompBlock, blockColor, false)); + unsigned char compBlock[16]; + unsigned char decompCompBlock[64]; + CompressBlockBC7(decompBlock, 16, compBlock, nullptr); + DecompressBlockBC7(compBlock, decompCompBlock, nullptr); + CHECK(ColorMatches(decompCompBlock, blockColor, false)); +} + //*************************************************************************************** \ No newline at end of file