//===================================================================== // Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions : // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // //===================================================================== #include "BC1_Encode_kernel.h" //============================================== BC1 INTERFACES ======================================================= void CompressBlockBC1_Fast( CMP_Vec4uc srcBlockTemp[16], CMP_GLOBAL CGU_UINT32 compressedBlock[2]) { int i, k; CMP_Vec3f rgb; CMP_Vec3f average_rgb; // The centrepoint of the axis CMP_Vec3f v_rgb; // The axis CMP_Vec3f uniques[16]; // The list of unique colours int unique_pixels; // The number of unique pixels CGU_FLOAT unique_recip; // Reciprocal of the above for fast multiplication int index_map[16]; // The map of source pixels to unique indices CGU_FLOAT pos_on_axis[16]; // The distance each unique falls along the compression axis CGU_FLOAT dist_from_axis[16]; // The distance each unique falls from the compression axis CGU_FLOAT left = 0, right = 0, centre = 0; // The extremities and centre (average of left/right) of uniques along the compression axis CGU_FLOAT axis_mapping_error = 0; // The total computed error in mapping pixels to the axis int swap; // Indicator if the RGB values need swapping to generate an opaque result // ------------------------------------------------------------------------------------- // (3) Find the array of unique pixel values and sum them to find their average position // ------------------------------------------------------------------------------------- { // Find the array of unique pixel values and sum them to find their average position int current_pixel, firstdiff; current_pixel = unique_pixels = 0; average_rgb = 0.0f; firstdiff = -1; for (i = 0; i<16; i++) { for (k = 0; k 0) { rg_pos += rgb.y; rb_pos += rgb.z; } if (rgb.z > 0) bg_pos += rgb.y; } v_rgb = v_rgb*unique_recip; if (rg_pos < 0) v_rgb.x = -v_rgb.x; if (bg_pos < 0) v_rgb.z = -v_rgb.z; if ((rg_pos == bg_pos) && (rg_pos == 0)) if (rb_pos < 0) v_rgb.z = -v_rgb.z; } // ------------------------------------------------------------------------------------- // (5) Axis projection and remapping // ------------------------------------------------------------------------------------- { CGU_FLOAT v2_recip; // Normalise the axis for simplicity of future calculation v2_recip = (v_rgb.x*v_rgb.x + v_rgb.y*v_rgb.y + v_rgb.z*v_rgb.z); if (v2_recip > 0) v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip); else v2_recip = 1.0f; v_rgb = v_rgb*v2_recip; } // ------------------------------------------------------------------------------------- // (6) Map the axis // ------------------------------------------------------------------------------------- // the line joining (and extended on either side of) average and axis // defines the axis onto which the points will be projected // Project all the points onto the axis, calculate the distance along // the axis from the centre of the axis (average) // From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is // P + ((R-P).v) / (v.v))v // The distance along v is therefore (R-P).v / (v.v) // (v.v) is 1 if v is a unit vector. // // Calculate the extremities at the same time - these need to be reasonably accurately // represented in all cases // // In this first calculation, also find the error of mapping the points to the axis - this // is our major indicator of whether or not the block has compressed well - if the points // map well onto the axis then most of the noise introduced is high-frequency noise { left = 10000.0f; right = -10000.0f; axis_mapping_error = 0; for (i = 0; i < unique_pixels; i++) { // Compute the distance along the axis of the point of closest approach CMP_Vec3f temp = (uniques[i] - average_rgb); pos_on_axis[i] = (temp.x * v_rgb.x) + (temp.y * v_rgb.y) + (temp.z * v_rgb.z); // Compute the actual point and thence the mapping error rgb = uniques[i] - (average_rgb + (v_rgb * pos_on_axis[i])); dist_from_axis[i] = rgb.x*rgb.x + rgb.y*rgb.y + rgb.z*rgb.z; axis_mapping_error += dist_from_axis[i]; // Work out the extremities if (pos_on_axis[i] < left) left = pos_on_axis[i]; if (pos_on_axis[i] > right) right = pos_on_axis[i]; } } // ------------------------------------------------------------------------------------- // (7) Now we have a good axis and the basic information about how the points are mapped // to it // Our initial guess is to represent the endpoints accurately, by moving the average // to the centre and recalculating the point positions along the line // ------------------------------------------------------------------------------------- { centre = (left + right) / 2; average_rgb = average_rgb + (v_rgb*centre); for (i = 0; i> 3); rgb = average_rgb + (v_rgb * right); rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z); gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z); bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z); ROUND_AND_CLAMP(rd, 5); ROUND_AND_CLAMP(gd, 6); ROUND_AND_CLAMP(bd, 5); c1 = (((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3)); // Force to be a 4-colour opaque block - in which case, c0 is greater than c1 // blocktype == 4 { if (c0 < c1) { t = c0; c0 = c1; c1 = t; swap = 1; } else if (c0 == c1) { // This block will always be encoded in 3-colour mode // Need to ensure that only one of the two points gets used, // avoiding accidentally setting some transparent pixels into the block for (i = 0; i average) are 0 and 1, while // interpolants are 2 and 3 if (fabs(rgb.z) >= division) bit = 0; else bit = 2; // Positive is in the latter half of the block if (rgb.z >= centre) bit += 1; // Set the output, taking swapping into account compressedBlock[1] |= ((bit^swap) << (2 * i)); // Average the X and Y locations for each cluster cluster_x[bit] += (CGU_FLOAT)(i & 3); cluster_y[bit] += (CGU_FLOAT)(i >> 2); cluster_count[bit]++; } for (i = 0; i<4; i++) { CGU_FLOAT cr; if (cluster_count[i]) { cr = 1.0f / cluster_count[i]; cluster_x[i] *= cr; cluster_y[i] *= cr; } else { cluster_x[i] = cluster_y[i] = -1; } } // patterns in axis position detection // (same algorithm as used in the SSE version) if ((compressedBlock[0] & 0xffff) != (compressedBlock[0] >> 16)) { CGU_UINT32 i1, k1; CGU_UINT32 x = 0, y = 0; int xstep = 0, ystep = 0; // Find a corner to search from for (k1 = 0; k1<4; k1++) { switch (k1) { case 0: x = 0; y = 0; xstep = 1; ystep = 1; break; case 1: x = 0; y = 3; xstep = 1; ystep = -1; break; case 2: x = 3; y = 0; xstep = -1; ystep = 1; break; case 3: x = 3; y = 3; xstep = -1; ystep = -1; break; } for (i1 = 0; i1<4; i1++) { if ((POS(x, y + ystep*i1) < POS(x + xstep, y + ystep*i1)) || (POS(x + xstep, y + ystep*i1) < POS(x + 2 * xstep, y + ystep*i1)) || (POS(x + 2 * xstep, y + ystep*i1) < POS(x + 3 * xstep, y + ystep*i1)) ) break; if ((POS(x + xstep*i1, y) < POS(x + xstep*i1, y + ystep)) || (POS(x + xstep*i1, y + ystep) < POS(x + xstep*i1, y + 2 * ystep)) || (POS(x + xstep*i1, y + 2 * ystep) < POS(x + xstep*i1, y + 3 * ystep)) ) break; } if (i1 == 4) break; } } } } // done } INLINE void store_uint8(CMP_GLOBAL CGU_UINT8 u_dstptr[8], CGU_UINT32 data[2]) { int shift = 0; for (CGU_INT k=0; k<4; k++) { u_dstptr[k] = (data[0] >> shift)&0xFF; shift += 8; } shift = 0; for (CGU_INT k=4; k<8; k++) { u_dstptr[k] = (data[1] >> shift)&0xFF; shift += 8; } } void CompressBlockBC1_Internal( const CMP_Vec4uc srcBlockTemp[16], CMP_GLOBAL CGU_UINT32 compressedBlock[2], CMP_GLOBAL const CMP_BC15Options *BC15options) { CGU_UINT8 blkindex = 0; CGU_UINT8 srcindex = 0; CGU_UINT8 rgbBlock[64]; for ( CGU_INT32 j = 0; j < 4; j++) { for ( CGU_INT32 i = 0; i < 4; i++) { rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A srcindex++; } } CMP_BC15Options internalOptions = *BC15options; CalculateColourWeightings(rgbBlock, &internalOptions); CompressRGBBlock(rgbBlock, compressedBlock, &internalOptions, TRUE, FALSE, internalOptions.m_nAlphaThreshold); } //============================================== USER INTERFACES ======================================================== #ifndef ASPM_GPU int CMP_CDECL CreateOptionsBC1(void **options) { CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options; if (BC15optionsDefault) { SetDefaultBC15Options(BC15optionsDefault); (*options) = BC15optionsDefault; } else { (*options) = NULL; return CGU_CORE_ERR_NEWMEM; } return CGU_CORE_OK; } int CMP_CDECL DestroyOptionsBC1(void *options) { if (!options) return CGU_CORE_ERR_INVALIDPTR; CMP_BC15Options *BCOptions = reinterpret_cast (options); delete BCOptions; return CGU_CORE_OK; } int CMP_CDECL SetQualityBC1(void *options, CGU_FLOAT fquality) { if (!options) return CGU_CORE_ERR_NEWMEM; CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); if (fquality < 0.0f) fquality = 0.0f; else if (fquality > 1.0f) fquality = 1.0f; BC15optionsDefault->m_fquality = fquality; return CGU_CORE_OK; } int CMP_CDECL SetAlphaThresholdBC1(void *options, CGU_UINT8 alphaThreshold) { if (!options) return CGU_CORE_ERR_INVALIDPTR; CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); BC15optionsDefault->m_nAlphaThreshold = alphaThreshold; return CGU_CORE_OK; } int CMP_CDECL SetDecodeChannelMapping(void *options, CGU_BOOL mapRGBA) { if (!options) return CGU_CORE_ERR_INVALIDPTR; CMP_BC15Options *BC15optionsDefault = reinterpret_cast (options); BC15optionsDefault->m_mapDecodeRGBA = mapRGBA; return CGU_CORE_OK; } int CMP_CDECL SetChannelWeightsBC1(void *options, CGU_FLOAT WeightRed, CGU_FLOAT WeightGreen, CGU_FLOAT WeightBlue) { if (!options) return CGU_CORE_ERR_INVALIDPTR; CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options; if ((WeightRed < 0.0f) || (WeightRed > 1.0f)) return CGU_CORE_ERR_RANGERED; if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f)) return CGU_CORE_ERR_RANGEGREEN; if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f)) return CGU_CORE_ERR_RANGEBLUE; BC15optionsDefault->m_bUseChannelWeighting = true; BC15optionsDefault->m_fChannelWeights[0] = WeightRed; BC15optionsDefault->m_fChannelWeights[1] = WeightGreen; BC15optionsDefault->m_fChannelWeights[2] = WeightBlue; return CGU_CORE_OK; } int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock, unsigned int srcStrideInBytes, CMP_GLOBAL unsigned char cmpBlock[8], const void *options = NULL) { CMP_Vec4uc inBlock[16]; //---------------------------------- // Fill the inBlock with source data //---------------------------------- CGU_INT srcpos = 0; CGU_INT dstptr = 0; for (CGU_UINT8 row=0; row < 4; row++) { srcpos = row * srcStrideInBytes; for (CGU_UINT8 col = 0; col < 4; col++) { inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]); inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]); inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]); inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]); dstptr++; } } CMP_BC15Options *BC15options = (CMP_BC15Options *)options; CMP_BC15Options BC15optionsDefault; if (BC15options == NULL) { BC15options = &BC15optionsDefault; SetDefaultBC15Options(BC15options); } CompressBlockBC1_Internal(inBlock, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options); return CGU_CORE_OK; } int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8], CMP_GLOBAL unsigned char srcBlock[64], const void *options = NULL) { CMP_BC15Options *BC15options = (CMP_BC15Options *)options; CMP_BC15Options BC15optionsDefault; if (BC15options == NULL) { BC15options = &BC15optionsDefault; SetDefaultBC15Options(BC15options); } DecompressDXTRGB_Internal(srcBlock, ( CGU_UINT32 *)cmpBlock, BC15options); return CGU_CORE_OK; } #endif //============================================== OpenCL USER INTERFACE ======================================================== #ifdef ASPM_GPU CMP_STATIC CMP_KERNEL void CMP_GPUEncoder( CMP_GLOBAL const CMP_Vec4uc* ImageSource, CMP_GLOBAL CGU_UINT8* ImageDestination, CMP_GLOBAL Source_Info* SourceInfo, CMP_GLOBAL CMP_BC15Options* BC15options ) { CGU_UINT32 xID; CGU_UINT32 yID; //printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality); #ifdef ASPM_GPU xID = get_global_id(0); yID = get_global_id(1); #else xID = 0; yID = 0; #endif if (xID >= (SourceInfo->m_src_width / BlockX)) return; if (yID >= (SourceInfo->m_src_height / BlockX)) return; int srcWidth = SourceInfo->m_src_width; CGU_UINT32 destI = (xID*BC1CompBlockSize) + (yID*(srcWidth / BlockX)*BC1CompBlockSize); int srcindex = 4 * (yID * srcWidth + xID); int blkindex = 0; CMP_Vec4uc srcData[16]; srcWidth = srcWidth - 4; for ( CGU_INT32 j = 0; j < 4; j++) { for ( CGU_INT32 i = 0; i < 4; i++) { srcData[blkindex++] = ImageSource[srcindex++]; } srcindex += srcWidth; } // fast low quality mode that matches v3.1 code if (SourceInfo->m_fquality <= 0.04f) CompressBlockBC1_Fast(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI]); else CompressBlockBC1_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options); } #endif