Add external libs for comparisons and benchmarks.

pull/310/head
Ignacio 4 years ago
parent 4a33d1ac75
commit 9a16bebf8f

@ -0,0 +1,56 @@
; Core def : Declares the module parameters for the DLL.
EXPORTS
CreateOptionsBC1
CreateOptionsBC2
CreateOptionsBC3
CreateOptionsBC4
CreateOptionsBC5
CreateOptionsBC6
CreateOptionsBC7
DestroyOptionsBC1
DestroyOptionsBC2
DestroyOptionsBC3
DestroyOptionsBC4
DestroyOptionsBC5
DestroyOptionsBC6
DestroyOptionsBC7
SetDecodeChannelMapping
SetChannelWeightsBC1
SetChannelWeightsBC2
SetChannelWeightsBC3
SetQualityBC1
SetQualityBC2
SetQualityBC3
SetQualityBC4
SetQualityBC5
SetQualityBC6
SetQualityBC7
SetAlphaThresholdBC1
SetMaskBC6
SetMaskBC7
SetErrorThresholdBC7
SetAlphaOptionsBC7
CompressBlockBC1
CompressBlockBC2
CompressBlockBC3
CompressBlockBC4
CompressBlockBC5
CompressBlockBC6
CompressBlockBC7
DecompressBlockBC1
DecompressBlockBC2
DecompressBlockBC3
DecompressBlockBC4
DecompressBlockBC5
DecompressBlockBC6
DecompressBlockBC7

@ -0,0 +1,33 @@
cmake_minimum_required(VERSION 3.10)
add_library(CMP_Core STATIC "")
target_sources(CMP_Core
PRIVATE
shaders/BC1_Encode_kernel.h
shaders/BC1_Encode_kernel.cpp
shaders/BC2_Encode_kernel.h
shaders/BC2_Encode_kernel.cpp
shaders/BC3_Encode_kernel.h
shaders/BC3_Encode_kernel.cpp
shaders/BC4_Encode_kernel.h
shaders/BC4_Encode_kernel.cpp
shaders/BC5_Encode_kernel.h
shaders/BC5_Encode_kernel.cpp
shaders/BC6_Encode_kernel.h
shaders/BC6_Encode_kernel.cpp
shaders/BC7_Encode_Kernel.h
shaders/BC7_Encode_Kernel.cpp
shaders/BCn_Common_Kernel.h
shaders/Common_Def.h
)
target_include_directories(CMP_Core
PRIVATE
shaders
source)
#add_subdirectory(test)
if (UNIX)
target_compile_definitions(CMP_Core PRIVATE _LINUX ASPM_GPU)
endif()

@ -0,0 +1,582 @@
//=====================================================================
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#include "BC1_Encode_kernel.h"
//============================================== BC1 INTERFACES =======================================================
void CompressBlockBC1_Fast(
CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[2])
{
int i, k;
CMP_Vec3f rgb;
CMP_Vec3f average_rgb; // The centrepoint of the axis
CMP_Vec3f v_rgb; // The axis
CMP_Vec3f uniques[16]; // The list of unique colours
int unique_pixels; // The number of unique pixels
CGU_FLOAT unique_recip; // Reciprocal of the above for fast multiplication
int index_map[16]; // The map of source pixels to unique indices
CGU_FLOAT pos_on_axis[16]; // The distance each unique falls along the compression axis
CGU_FLOAT dist_from_axis[16]; // The distance each unique falls from the compression axis
CGU_FLOAT left = 0, right = 0, centre = 0; // The extremities and centre (average of left/right) of uniques along the compression axis
CGU_FLOAT axis_mapping_error = 0; // The total computed error in mapping pixels to the axis
int swap; // Indicator if the RGB values need swapping to generate an opaque result
// -------------------------------------------------------------------------------------
// (3) Find the array of unique pixel values and sum them to find their average position
// -------------------------------------------------------------------------------------
{
// Find the array of unique pixel values and sum them to find their average position
int current_pixel, firstdiff;
current_pixel = unique_pixels = 0;
average_rgb = 0.0f;
firstdiff = -1;
for (i = 0; i<16; i++)
{
for (k = 0; k<i; k++)
if ((((srcBlockTemp[k].x ^ srcBlockTemp[i].x) & 0xf8) == 0) && (((srcBlockTemp[k].y ^ srcBlockTemp[i].y) & 0xfc) == 0) && (((srcBlockTemp[k].z ^ srcBlockTemp[i].z) & 0xf8) == 0))
break;
index_map[i] = current_pixel++;
//pixel_count[i] = 1;
CMP_Vec3f trgb;
rgb.x = (CGU_FLOAT)((srcBlockTemp[i].x) & 0xff);
rgb.y = (CGU_FLOAT)((srcBlockTemp[i].y) & 0xff);
rgb.z = (CGU_FLOAT)((srcBlockTemp[i].z) & 0xff);
trgb.x = CS_RED(rgb.x, rgb.y, rgb.z);
trgb.y = CS_GREEN(rgb.x, rgb.y, rgb.z);
trgb.z = CS_BLUE(rgb.x, rgb.y, rgb.z);
uniques[i] = trgb;
if (k == i)
{
unique_pixels++;
if ((i != 0) && (firstdiff < 0)) firstdiff = i;
}
average_rgb = average_rgb + trgb;
}
unique_pixels = 16;
// Compute average of the uniques
unique_recip = 1.0f / (CGU_FLOAT)unique_pixels;
average_rgb = average_rgb * unique_recip;
}
// -------------------------------------------------------------------------------------
// (4) For each component, reflect points about the average so all lie on the same side
// of the average, and compute the new average - this gives a second point that defines the axis
// To compute the sign of the axis sum the positive differences of G for each of R and B (the
// G axis is always positive in this implementation
// -------------------------------------------------------------------------------------
// An interesting situation occurs if the G axis contains no information, in which case the RB
// axis is also compared. I am not entirely sure if this is the correct implementation - should
// the priority axis be determined by magnitude?
{
CGU_FLOAT rg_pos, bg_pos, rb_pos;
v_rgb = 0.0f;
rg_pos = bg_pos = rb_pos = 0;
for (i = 0; i < unique_pixels; i++)
{
rgb = uniques[i] - average_rgb;
#ifndef ASPM_GPU
v_rgb.x += (CGU_FLOAT)fabs(rgb.x);
v_rgb.y += (CGU_FLOAT)fabs(rgb.y);
v_rgb.z += (CGU_FLOAT)fabs(rgb.z);
#else
v_rgb = v_rgb + fabs(rgb);
#endif
if (rgb.x > 0) { rg_pos += rgb.y; rb_pos += rgb.z; }
if (rgb.z > 0) bg_pos += rgb.y;
}
v_rgb = v_rgb*unique_recip;
if (rg_pos < 0) v_rgb.x = -v_rgb.x;
if (bg_pos < 0) v_rgb.z = -v_rgb.z;
if ((rg_pos == bg_pos) && (rg_pos == 0))
if (rb_pos < 0) v_rgb.z = -v_rgb.z;
}
// -------------------------------------------------------------------------------------
// (5) Axis projection and remapping
// -------------------------------------------------------------------------------------
{
CGU_FLOAT v2_recip;
// Normalise the axis for simplicity of future calculation
v2_recip = (v_rgb.x*v_rgb.x + v_rgb.y*v_rgb.y + v_rgb.z*v_rgb.z);
if (v2_recip > 0)
v2_recip = 1.0f / (CGU_FLOAT)sqrt(v2_recip);
else
v2_recip = 1.0f;
v_rgb = v_rgb*v2_recip;
}
// -------------------------------------------------------------------------------------
// (6) Map the axis
// -------------------------------------------------------------------------------------
// the line joining (and extended on either side of) average and axis
// defines the axis onto which the points will be projected
// Project all the points onto the axis, calculate the distance along
// the axis from the centre of the axis (average)
// From Foley & Van Dam: Closest point of approach of a line (P + v) to a point (R) is
// P + ((R-P).v) / (v.v))v
// The distance along v is therefore (R-P).v / (v.v)
// (v.v) is 1 if v is a unit vector.
//
// Calculate the extremities at the same time - these need to be reasonably accurately
// represented in all cases
//
// In this first calculation, also find the error of mapping the points to the axis - this
// is our major indicator of whether or not the block has compressed well - if the points
// map well onto the axis then most of the noise introduced is high-frequency noise
{
left = 10000.0f;
right = -10000.0f;
axis_mapping_error = 0;
for (i = 0; i < unique_pixels; i++)
{
// Compute the distance along the axis of the point of closest approach
CMP_Vec3f temp = (uniques[i] - average_rgb);
pos_on_axis[i] = (temp.x * v_rgb.x) + (temp.y * v_rgb.y) + (temp.z * v_rgb.z);
// Compute the actual point and thence the mapping error
rgb = uniques[i] - (average_rgb + (v_rgb * pos_on_axis[i]));
dist_from_axis[i] = rgb.x*rgb.x + rgb.y*rgb.y + rgb.z*rgb.z;
axis_mapping_error += dist_from_axis[i];
// Work out the extremities
if (pos_on_axis[i] < left)
left = pos_on_axis[i];
if (pos_on_axis[i] > right)
right = pos_on_axis[i];
}
}
// -------------------------------------------------------------------------------------
// (7) Now we have a good axis and the basic information about how the points are mapped
// to it
// Our initial guess is to represent the endpoints accurately, by moving the average
// to the centre and recalculating the point positions along the line
// -------------------------------------------------------------------------------------
{
centre = (left + right) / 2;
average_rgb = average_rgb + (v_rgb*centre);
for (i = 0; i<unique_pixels; i++)
pos_on_axis[i] -= centre;
right -= centre;
left -= centre;
// Accumulate our final resultant error
axis_mapping_error *= unique_recip * (1 / 255.0f);
}
// -------------------------------------------------------------------------------------
// (8) Calculate the high and low output colour values
// Involved in this is a rounding procedure which is undoubtedly slightly twitchy. A
// straight rounded average is not correct, as the decompressor 'unrounds' by replicating
// the top bits to the bottom.
// In order to take account of this process, we don't just apply a straight rounding correction,
// but base our rounding on the input value (a straight rounding is actually pretty good in terms of
// error measure, but creates a visual colour and/or brightness shift relative to the original image)
// The method used here is to apply a centre-biased rounding dependent on the input value, which was
// (mostly by experiment) found to give minimum MSE while preserving the visual characteristics of
// the image.
// rgb = (average_rgb + (left|right)*v_rgb);
// -------------------------------------------------------------------------------------
{
CGU_UINT32 c0, c1, t;
int rd, gd, bd;
rgb = (average_rgb + (v_rgb * left));
rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
ROUND_AND_CLAMP(rd, 5);
ROUND_AND_CLAMP(gd, 6);
ROUND_AND_CLAMP(bd, 5);
c0 = ((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3);
rgb = average_rgb + (v_rgb * right);
rd = ( CGU_INT32)DCS_RED(rgb.x, rgb.y, rgb.z);
gd = ( CGU_INT32)DCS_GREEN(rgb.x, rgb.y, rgb.z);
bd = ( CGU_INT32)DCS_BLUE(rgb.x, rgb.y, rgb.z);
ROUND_AND_CLAMP(rd, 5);
ROUND_AND_CLAMP(gd, 6);
ROUND_AND_CLAMP(bd, 5);
c1 = (((rd & 0xf8) << 8) + ((gd & 0xfc) << 3) + ((bd & 0xf8) >> 3));
// Force to be a 4-colour opaque block - in which case, c0 is greater than c1
// blocktype == 4
{
if (c0 < c1)
{
t = c0;
c0 = c1;
c1 = t;
swap = 1;
}
else if (c0 == c1)
{
// This block will always be encoded in 3-colour mode
// Need to ensure that only one of the two points gets used,
// avoiding accidentally setting some transparent pixels into the block
for (i = 0; i<unique_pixels; i++)
pos_on_axis[i] = left;
swap = 0;
}
else
swap = 0;
}
compressedBlock[0] = c0 | (c1 << 16);
}
// -------------------------------------------------------------------------------------
// (9) Final clustering, creating the 2-bit values that define the output
// -------------------------------------------------------------------------------------
{
CGU_UINT32 bit;
CGU_FLOAT division;
CGU_FLOAT cluster_x[4];
CGU_FLOAT cluster_y[4];
int cluster_count[4];
// (blocktype == 4)
{
compressedBlock[1] = 0;
division = right*2.0f / 3.0f;
centre = (left + right) / 2; // Actually, this code only works if centre is 0 or approximately so
for (i = 0; i<4; i++)
{
cluster_x[i] = cluster_y[i] = 0.0f;
cluster_count[i] = 0;
}
for (i = 0; i<16; i++)
{
rgb.z = pos_on_axis[index_map[i]];
// Endpoints (indicated by block > average) are 0 and 1, while
// interpolants are 2 and 3
if (fabs(rgb.z) >= division)
bit = 0;
else
bit = 2;
// Positive is in the latter half of the block
if (rgb.z >= centre)
bit += 1;
// Set the output, taking swapping into account
compressedBlock[1] |= ((bit^swap) << (2 * i));
// Average the X and Y locations for each cluster
cluster_x[bit] += (CGU_FLOAT)(i & 3);
cluster_y[bit] += (CGU_FLOAT)(i >> 2);
cluster_count[bit]++;
}
for (i = 0; i<4; i++)
{
CGU_FLOAT cr;
if (cluster_count[i])
{
cr = 1.0f / cluster_count[i];
cluster_x[i] *= cr;
cluster_y[i] *= cr;
}
else
{
cluster_x[i] = cluster_y[i] = -1;
}
}
// patterns in axis position detection
// (same algorithm as used in the SSE version)
if ((compressedBlock[0] & 0xffff) != (compressedBlock[0] >> 16))
{
CGU_UINT32 i1, k1;
CGU_UINT32 x = 0, y = 0;
int xstep = 0, ystep = 0;
// Find a corner to search from
for (k1 = 0; k1<4; k1++)
{
switch (k1)
{
case 0:
x = 0; y = 0; xstep = 1; ystep = 1;
break;
case 1:
x = 0; y = 3; xstep = 1; ystep = -1;
break;
case 2:
x = 3; y = 0; xstep = -1; ystep = 1;
break;
case 3:
x = 3; y = 3; xstep = -1; ystep = -1;
break;
}
for (i1 = 0; i1<4; i1++)
{
if ((POS(x, y + ystep*i1) < POS(x + xstep, y + ystep*i1)) ||
(POS(x + xstep, y + ystep*i1) < POS(x + 2 * xstep, y + ystep*i1)) ||
(POS(x + 2 * xstep, y + ystep*i1) < POS(x + 3 * xstep, y + ystep*i1))
)
break;
if ((POS(x + xstep*i1, y) < POS(x + xstep*i1, y + ystep)) ||
(POS(x + xstep*i1, y + ystep) < POS(x + xstep*i1, y + 2 * ystep)) ||
(POS(x + xstep*i1, y + 2 * ystep) < POS(x + xstep*i1, y + 3 * ystep))
)
break;
}
if (i1 == 4)
break;
}
}
}
}
// done
}
INLINE void store_uint8(CMP_GLOBAL CGU_UINT8 u_dstptr[8], CGU_UINT32 data[2])
{
int shift = 0;
for (CGU_INT k=0; k<4; k++)
{
u_dstptr[k] = (data[0] >> shift)&0xFF;
shift += 8;
}
shift = 0;
for (CGU_INT k=4; k<8; k++)
{
u_dstptr[k] = (data[1] >> shift)&0xFF;
shift += 8;
}
}
void CompressBlockBC1_Internal(
const CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[2],
CMP_GLOBAL const CMP_BC15Options *BC15options)
{
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0;
CGU_UINT8 rgbBlock[64];
for ( CGU_INT32 j = 0; j < 4; j++) {
for ( CGU_INT32 i = 0; i < 4; i++) {
rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B
rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G
rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R
rgbBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A
srcindex++;
}
}
CMP_BC15Options internalOptions = *BC15options;
CalculateColourWeightings(rgbBlock, &internalOptions);
CompressRGBBlock(rgbBlock,
compressedBlock,
&internalOptions,
TRUE,
FALSE,
internalOptions.m_nAlphaThreshold);
}
//============================================== USER INTERFACES ========================================================
#ifndef ASPM_GPU
int CMP_CDECL CreateOptionsBC1(void **options)
{
CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
if (BC15optionsDefault) {
SetDefaultBC15Options(BC15optionsDefault);
(*options) = BC15optionsDefault;
}
else {
(*options) = NULL;
return CGU_CORE_ERR_NEWMEM;
}
return CGU_CORE_OK;
}
int CMP_CDECL DestroyOptionsBC1(void *options)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
delete BCOptions;
return CGU_CORE_OK;
}
int CMP_CDECL SetQualityBC1(void *options,
CGU_FLOAT fquality)
{
if (!options) return CGU_CORE_ERR_NEWMEM;
CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
if (fquality < 0.0f) fquality = 0.0f;
else
if (fquality > 1.0f) fquality = 1.0f;
BC15optionsDefault->m_fquality = fquality;
return CGU_CORE_OK;
}
int CMP_CDECL SetAlphaThresholdBC1(void *options,
CGU_UINT8 alphaThreshold)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
BC15optionsDefault->m_nAlphaThreshold = alphaThreshold;
return CGU_CORE_OK;
}
int CMP_CDECL SetDecodeChannelMapping(void *options,
CGU_BOOL mapRGBA)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
BC15optionsDefault->m_mapDecodeRGBA = mapRGBA;
return CGU_CORE_OK;
}
int CMP_CDECL SetChannelWeightsBC1(void *options,
CGU_FLOAT WeightRed,
CGU_FLOAT WeightGreen,
CGU_FLOAT WeightBlue) {
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
if ((WeightRed < 0.0f) || (WeightRed > 1.0f)) return CGU_CORE_ERR_RANGERED;
if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f)) return CGU_CORE_ERR_RANGEGREEN;
if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f)) return CGU_CORE_ERR_RANGEBLUE;
BC15optionsDefault->m_bUseChannelWeighting = true;
BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
return CGU_CORE_OK;
}
int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock,
unsigned int srcStrideInBytes,
CMP_GLOBAL unsigned char cmpBlock[8],
const void *options = NULL) {
CMP_Vec4uc inBlock[16];
//----------------------------------
// Fill the inBlock with source data
//----------------------------------
CGU_INT srcpos = 0;
CGU_INT dstptr = 0;
for (CGU_UINT8 row=0; row < 4; row++)
{
srcpos = row * srcStrideInBytes;
for (CGU_UINT8 col = 0; col < 4; col++)
{
inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
dstptr++;
}
}
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL)
{
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
CompressBlockBC1_Internal(inBlock, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
return CGU_CORE_OK;
}
int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8],
CMP_GLOBAL unsigned char srcBlock[64],
const void *options = NULL) {
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL)
{
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
DecompressDXTRGB_Internal(srcBlock, ( CGU_UINT32 *)cmpBlock, BC15options);
return CGU_CORE_OK;
}
#endif
//============================================== OpenCL USER INTERFACE ========================================================
#ifdef ASPM_GPU
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL const CMP_Vec4uc* ImageSource,
CMP_GLOBAL CGU_UINT8* ImageDestination,
CMP_GLOBAL Source_Info* SourceInfo,
CMP_GLOBAL CMP_BC15Options* BC15options
)
{
CGU_UINT32 xID;
CGU_UINT32 yID;
//printf("SourceInfo: (H:%d,W:%d) Quality %1.2f \n", SourceInfo->m_src_height, SourceInfo->m_src_width, SourceInfo->m_fquality);
#ifdef ASPM_GPU
xID = get_global_id(0);
yID = get_global_id(1);
#else
xID = 0;
yID = 0;
#endif
if (xID >= (SourceInfo->m_src_width / BlockX)) return;
if (yID >= (SourceInfo->m_src_height / BlockX)) return;
int srcWidth = SourceInfo->m_src_width;
CGU_UINT32 destI = (xID*BC1CompBlockSize) + (yID*(srcWidth / BlockX)*BC1CompBlockSize);
int srcindex = 4 * (yID * srcWidth + xID);
int blkindex = 0;
CMP_Vec4uc srcData[16];
srcWidth = srcWidth - 4;
for ( CGU_INT32 j = 0; j < 4; j++) {
for ( CGU_INT32 i = 0; i < 4; i++) {
srcData[blkindex++] = ImageSource[srcindex++];
}
srcindex += srcWidth;
}
// fast low quality mode that matches v3.1 code
if (SourceInfo->m_fquality <= 0.04f)
CompressBlockBC1_Fast(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI]);
else
CompressBlockBC1_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
}
#endif

@ -0,0 +1,48 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#ifndef BC1_ENCODE_KERNEL_H
#define BC1_ENCODE_KERNEL_H
#include "Common_Def.h"
#include "BCn_Common_Kernel.h"
#define CS_RED(r, g, b) (r)
#define CS_GREEN(r, g, b) (g)
#define CS_BLUE(r, g, b) ((b+g)*0.5f)
#define DCS_RED(r, g, b) (r)
#define DCS_GREEN(r, g, b) (g)
#define DCS_BLUE(r, g, b) ((2.0f*b)-g)
#define BYTEPP 4
#define BC1CompBlockSize 8
#define ROUND_AND_CLAMP(v, shift) \
{\
if (v < 0) v = 0;\
else if (v > 255) v = 255;\
else v += (0x80>>shift) - (v>>shift);\
}
#define POS(x,y) (pos_on_axis[(x)+(y)*4])
#endif

@ -0,0 +1,261 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#include "BC2_Encode_kernel.h"
//============================================== BC2 INTERFACES =======================================================
void DXTCV11CompressExplicitAlphaBlock(const CGU_UINT8 block_8[16], CMP_GLOBAL CGU_UINT32 block_dxtc[2])
{
CGU_UINT8 i;
block_dxtc[0] = block_dxtc[1] = 0;
for (i = 0; i < 16; i++)
{
int v = block_8[i];
v = (v + 7 - (v >> 4));
v >>= 4;
if (v < 0)
v = 0;
if (v > 0xf)
v = 0xf;
if (i < 8)
block_dxtc[0] |= v << (4 * i);
else
block_dxtc[1] |= v << (4 * (i - 8));
}
}
#define EXPLICIT_ALPHA_PIXEL_MASK 0xf
#define EXPLICIT_ALPHA_PIXEL_BPP 4
CGU_INT CompressExplicitAlphaBlock(const CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4],
CMP_GLOBAL CGU_UINT32 compressedBlock[2])
{
DXTCV11CompressExplicitAlphaBlock(alphaBlock, compressedBlock);
return CGU_CORE_OK;
}
void CompressBlockBC2_Internal(const CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[4],
CMP_GLOBAL const CMP_BC15Options *BC15options)
{
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0;
CGU_UINT8 rgbaBlock[64];
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A
srcindex++;
}
}
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
for (CGU_INT32 i = 0; i < 16; i++)
alphaBlock[i] = (CGU_UINT8)(((CGU_INT32*)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
// Need a copy, as CalculateColourWeightings sets variables in the BC15options
CMP_BC15Options internalOptions = *BC15options;
CalculateColourWeightings(rgbaBlock, &internalOptions);
CGU_INT err = CompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
if (err != 0)
return;
CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,FALSE,FALSE,0);
}
//============================================== USER INTERFACES ========================================================
#ifndef ASPM_GPU
int CMP_CDECL CreateOptionsBC2(void **options)
{
CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
if (BC15optionsDefault) {
SetDefaultBC15Options(BC15optionsDefault);
(*options) = BC15optionsDefault;
}
else {
(*options) = NULL;
return CGU_CORE_ERR_NEWMEM;
}
return CGU_CORE_OK;
}
int CMP_CDECL DestroyOptionsBC2(void *options)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
delete BCOptions;
return CGU_CORE_OK;
}
int CMP_CDECL SetQualityBC2(void *options,
CGU_FLOAT fquality)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
if (fquality < 0.0f) fquality = 0.0f;
else
if (fquality > 1.0f) fquality = 1.0f;
BC15optionsDefault->m_fquality = fquality;
return CGU_CORE_OK;
}
int CMP_CDECL SetChannelWeightsBC2(void *options,
CGU_FLOAT WeightRed,
CGU_FLOAT WeightGreen,
CGU_FLOAT WeightBlue) {
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
if ((WeightRed < 0.0f) || (WeightRed > 1.0f)) return CGU_CORE_ERR_RANGERED;
if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f)) return CGU_CORE_ERR_RANGEGREEN;
if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f)) return CGU_CORE_ERR_RANGEBLUE;
BC15optionsDefault->m_bUseChannelWeighting = true;
BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
return CGU_CORE_OK;
}
// Decompresses an explicit alpha block (DXT3)
void DecompressExplicitAlphaBlock(CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4],
const CGU_UINT32 compressedBlock[2])
{
for (int i = 0; i < 16; i++)
{
int nBlock = i < 8 ? 0 : 1;
CGU_UINT8 cAlpha = (CGU_UINT8)((compressedBlock[nBlock] >> ((i % 8) * EXPLICIT_ALPHA_PIXEL_BPP)) & EXPLICIT_ALPHA_PIXEL_MASK);
alphaBlock[i] = (CGU_UINT8)((cAlpha << EXPLICIT_ALPHA_PIXEL_BPP) | cAlpha);
}
}
void DecompressBC2_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[BLOCK_SIZE_4X4X4],
const CGU_UINT32 compressedBlock[4],
const CMP_BC15Options *BC15options)
{
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
DecompressExplicitAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
for (CGU_UINT32 i = 0; i < 16; i++)
((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] = (alphaBlock[i] << RGBA8888_OFFSET_A) | (((CMP_GLOBAL CGU_UINT32*)rgbaBlock)[i] & ~(BYTE_MASK << RGBA8888_OFFSET_A));
}
int CMP_CDECL CompressBlockBC2(const unsigned char *srcBlock,
unsigned int srcStrideInBytes,
CMP_GLOBAL unsigned char cmpBlock[16],
CMP_GLOBAL const void *options = NULL) {
CMP_Vec4uc inBlock[16];
//----------------------------------
// Fill the inBlock with source data
//----------------------------------
CGU_INT srcpos = 0;
CGU_INT dstptr = 0;
for (CGU_UINT8 row = 0; row < 4; row++)
{
srcpos = row * srcStrideInBytes;
for (CGU_UINT8 col = 0; col < 4; col++)
{
inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
dstptr++;
}
}
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL)
{
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
CompressBlockBC2_Internal(inBlock, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
return CGU_CORE_OK;
}
int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16],
CMP_GLOBAL unsigned char srcBlock[64],
const void *options = NULL) {
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL)
{
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
DecompressBC2_Internal(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
return CGU_CORE_OK;
}
#endif
//============================================== OpenCL USER INTERFACE ========================================================
#ifdef ASPM_GPU
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL const CMP_Vec4uc* ImageSource,
CMP_GLOBAL CGU_UINT8* ImageDestination,
CMP_GLOBAL Source_Info* SourceInfo,
CMP_GLOBAL CMP_BC15Options* BC15options
)
{
CGU_UINT32 xID;
CGU_UINT32 yID;
#ifdef ASPM_GPU
xID = get_global_id(0);
yID = get_global_id(1);
#else
xID = 0;
yID = 0;
#endif
if (xID >= (SourceInfo->m_src_width / BlockX)) return;
if (yID >= (SourceInfo->m_src_height / BlockX)) return;
int srcWidth = SourceInfo->m_src_width;
CGU_UINT32 destI = (xID*BC2CompBlockSize) + (yID*(srcWidth / BlockX)*BC2CompBlockSize);
int srcindex = 4 * (yID * srcWidth + xID);
int blkindex = 0;
CMP_Vec4uc srcData[16];
srcWidth = srcWidth - 4;
for ( CGU_INT32 j = 0; j < 4; j++) {
for ( CGU_INT32 i = 0; i < 4; i++) {
srcData[blkindex++] = ImageSource[srcindex++];
}
srcindex += srcWidth;
}
CompressBlockBC2_Internal(srcData,(CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
}
#endif

@ -0,0 +1,34 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#ifndef BC2_ENCODE_KERNEL_H
#define BC2_ENCODE_KERNEL_H
#include "Common_Def.h"
#include "BCn_Common_Kernel.h"
#define BC2CompBlockSize 16
#define NUM_CHANNELS 4
#define NUM_ENDPOINTS 2
#endif

@ -0,0 +1,218 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#include "BC3_Encode_kernel.h"
//============================================== BC3 INTERFACES =======================================================
void CompressBlockBC3_Internal(const CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[4],
CMP_GLOBAL const CMP_BC15Options *BC15options) {
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0;
CGU_UINT8 rgbaBlock[64];
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].z; // B
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // G
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // R
rgbaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].w; // A
srcindex++;
}
}
CMP_BC15Options internalOptions = *BC15options;
CalculateColourWeightings(rgbaBlock, &internalOptions);
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
for (CGU_INT32 i = 0; i < 16; i++)
alphaBlock[i] =
(CGU_UINT8)(((CGU_INT32 *)rgbaBlock)[i] >> RGBA8888_OFFSET_A);
CGU_INT err = CompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
if (err != 0) return;
CompressRGBBlock(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB], &internalOptions,
FALSE, FALSE, 0);
}
//============================================== USER INTERFACES ========================================================
#ifndef ASPM_GPU
int CMP_CDECL CreateOptionsBC3(void **options)
{
CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
if (BC15optionsDefault) {
SetDefaultBC15Options(BC15optionsDefault);
(*options) = BC15optionsDefault;
}
else {
(*options) = NULL;
return CGU_CORE_ERR_NEWMEM;
}
return CGU_CORE_OK;
}
int CMP_CDECL DestroyOptionsBC3(void *options)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
delete BCOptions;
return CGU_CORE_OK;
}
int CMP_CDECL SetQualityBC3(void *options,
CGU_FLOAT fquality)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
if (fquality < 0.0f) fquality = 0.0f;
else
if (fquality > 1.0f) fquality = 1.0f;
BC15optionsDefault->m_fquality = fquality;
return CGU_CORE_OK;
}
int CMP_CDECL SetChannelWeightsBC3(void *options,
CGU_FLOAT WeightRed,
CGU_FLOAT WeightGreen,
CGU_FLOAT WeightBlue) {
if (!options) return 1;
CMP_BC15Options *BC15optionsDefault = (CMP_BC15Options *)options;
if ((WeightRed < 0.0f) || (WeightRed > 1.0f)) return CGU_CORE_ERR_RANGERED;
if ((WeightGreen < 0.0f) || (WeightGreen > 1.0f)) return CGU_CORE_ERR_RANGEGREEN;
if ((WeightBlue < 0.0f) || (WeightBlue > 1.0f)) return CGU_CORE_ERR_RANGEBLUE;
BC15optionsDefault->m_bUseChannelWeighting = true;
BC15optionsDefault->m_fChannelWeights[0] = WeightRed;
BC15optionsDefault->m_fChannelWeights[1] = WeightGreen;
BC15optionsDefault->m_fChannelWeights[2] = WeightBlue;
return CGU_CORE_OK;
}
void DecompressBC3_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
const CGU_UINT32 compressedBlock[4],
const CMP_BC15Options *BC15options) {
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
DecompressAlphaBlock(alphaBlock, &compressedBlock[DXTC_OFFSET_ALPHA]);
DecompressDXTRGB_Internal(rgbaBlock, &compressedBlock[DXTC_OFFSET_RGB],BC15options);
for (CGU_UINT32 i = 0; i < 16; i++)
((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] =
(alphaBlock[i] << RGBA8888_OFFSET_A) |
(((CMP_GLOBAL CGU_UINT32 *)rgbaBlock)[i] &
~(BYTE_MASK << RGBA8888_OFFSET_A));
}
int CMP_CDECL CompressBlockBC3( const unsigned char *srcBlock,
unsigned int srcStrideInBytes,
CMP_GLOBAL unsigned char cmpBlock[16],
const void *options = NULL) {
CMP_Vec4uc inBlock[16];
//----------------------------------
// Fill the inBlock with source data
//----------------------------------
CGU_INT srcpos = 0;
CGU_INT dstptr = 0;
for (CGU_UINT8 row = 0; row < 4; row++)
{
srcpos = row * srcStrideInBytes;
for (CGU_UINT8 col = 0; col < 4; col++)
{
inBlock[dstptr].x = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].y = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].z = CGU_UINT8(srcBlock[srcpos++]);
inBlock[dstptr].w = CGU_UINT8(srcBlock[srcpos++]);
dstptr++;
}
}
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL) {
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
CompressBlockBC3_Internal(inBlock,(CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
return CGU_CORE_OK;
}
int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16],
CMP_GLOBAL unsigned char srcBlock[64],
const void *options = NULL) {
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL)
{
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
DecompressBC3_Internal(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
return CGU_CORE_OK;
}
#endif
//============================================== OpenCL USER INTERFACE ====================================================
#ifdef ASPM_GPU
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL const CMP_Vec4uc *ImageSource,
CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,
CMP_GLOBAL CMP_BC15Options *BC15options) {
CGU_UINT32 xID;
CGU_UINT32 yID;
#ifdef ASPM_GPU
xID = get_global_id(0);
yID = get_global_id(1);
#else
xID = 0;
yID = 0;
#endif
if (xID >= (SourceInfo->m_src_width / BlockX)) return;
if (yID >= (SourceInfo->m_src_height / BlockX)) return;
int srcWidth = SourceInfo->m_src_width;
CGU_UINT32 destI =
(xID * BC3CompBlockSize) + (yID * (srcWidth / BlockX) * BC3CompBlockSize);
int srcindex = 4 * (yID * srcWidth + xID);
int blkindex = 0;
CMP_Vec4uc srcData[16];
srcWidth = srcWidth - 4;
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
srcData[blkindex++] = ImageSource[srcindex++];
}
srcindex += srcWidth;
}
CompressBlockBC3_Internal(
srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
}
#endif

@ -0,0 +1,31 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#ifndef BC3_ENCODE_KERNEL_H
#define BC3_ENCODE_KERNEL_H
#include "Common_Def.h"
#include "BCn_Common_Kernel.h"
#define BC3CompBlockSize 16
#endif

@ -0,0 +1,200 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#include "BC4_Encode_kernel.h"
//============================================== BC4 INTERFACES =======================================================
void CompressBlockBC4_Internal(const CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[2],
CMP_GLOBAL const CMP_BC15Options *BC15options) {
if (BC15options->m_fquality) {
// Reserved!
}
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0;
CGU_UINT8 alphaBlock[16];
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
alphaBlock[blkindex++] =
(CGU_UINT8)srcBlockTemp[srcindex].x; // Red channel
srcindex++;
}
}
CompressAlphaBlock(alphaBlock, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
}
void DecompressBC4_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
const CGU_UINT32 compressedBlock[2],
const CMP_BC15Options *BC15options) {
if (BC15options) {}
CGU_UINT8 alphaBlock[BLOCK_SIZE_4X4];
DecompressAlphaBlock(alphaBlock, compressedBlock);
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0;
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex]; // R
rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex]; // G
rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex]; // B
rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlock[srcindex]; // A
srcindex++;
}
}
}
void CompressBlockBC4_SingleChannel(const CGU_UINT8 srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[2],
CMP_GLOBAL const CMP_BC15Options *BC15options) {
if (BC15options) {}
CompressAlphaBlock(srcBlockTemp, (CMP_GLOBAL CGU_UINT32 *)compressedBlock);
}
void DecompressBlockBC4_SingleChannel(CGU_UINT8 srcBlockTemp[16],
const CGU_UINT32 compressedBlock[2],
const CMP_BC15Options *BC15options) {
if (BC15options) {}
DecompressAlphaBlock(srcBlockTemp, compressedBlock);
}
//============================================== USER INTERFACES ========================================================
#ifndef ASPM_GPU
int CMP_CDECL CreateOptionsBC4(void **options)
{
CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
if (BC15optionsDefault) {
SetDefaultBC15Options(BC15optionsDefault);
(*options) = BC15optionsDefault;
}
else {
(*options) = NULL;
return CGU_CORE_ERR_NEWMEM;
}
return CGU_CORE_OK;
}
int CMP_CDECL DestroyOptionsBC4(void *options)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
delete BCOptions;
return CGU_CORE_OK;
}
int CMP_CDECL SetQualityBC4(void *options,
CGU_FLOAT fquality)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
if (fquality < 0.0f) fquality = 0.0f;
else
if (fquality > 1.0f) fquality = 1.0f;
BC15optionsDefault->m_fquality = fquality;
return CGU_CORE_OK;
}
int CMP_CDECL CompressBlockBC4(const unsigned char *srcBlock,
unsigned int srcStrideInBytes,
CMP_GLOBAL unsigned char cmpBlock[8],
const void *options = NULL) {
unsigned char inBlock[16];
//----------------------------------
// Fill the inBlock with source data
//----------------------------------
CGU_INT srcpos = 0;
CGU_INT dstptr = 0;
for (CGU_UINT8 row = 0; row < 4; row++)
{
srcpos = row * srcStrideInBytes;
for (CGU_UINT8 col = 0; col < 4; col++)
{
inBlock[dstptr++] = CGU_UINT8(srcBlock[srcpos++]);
}
}
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
if (BC15options == NULL) {
CMP_BC15Options BC15optionsDefault;
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
CompressBlockBC4_SingleChannel(inBlock,(CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
return CGU_CORE_OK;
}
int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8],
CMP_GLOBAL unsigned char srcBlock[16],
const void *options = NULL) {
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL)
{
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
DecompressBlockBC4_SingleChannel(srcBlock, (CGU_UINT32 *)cmpBlock,BC15options);
return CGU_CORE_OK;
}
#endif
//============================================== OpenCL USER INTERFACE ====================================================
#ifdef ASPM_GPU
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(
CMP_GLOBAL const CMP_Vec4uc *ImageSource,
CMP_GLOBAL CGU_UINT8 *ImageDestination, CMP_GLOBAL Source_Info *SourceInfo,
CMP_GLOBAL CMP_BC15Options *BC15options) {
CGU_UINT32 xID;
CGU_UINT32 yID;
#ifdef ASPM_GPU
xID = get_global_id(0);
yID = get_global_id(1);
#else
xID = 0;
yID = 0;
#endif
if (xID >= (SourceInfo->m_src_width / BlockX)) return;
if (yID >= (SourceInfo->m_src_height / BlockX)) return;
int srcWidth = SourceInfo->m_src_width;
CGU_UINT32 destI =
(xID * BC4CompBlockSize) + (yID * (srcWidth / BlockX) * BC4CompBlockSize);
int srcindex = 4 * (yID * srcWidth + xID);
int blkindex = 0;
CMP_Vec4uc srcData[16];
srcWidth = srcWidth - 4;
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
srcData[blkindex++] = ImageSource[srcindex++];
}
srcindex += srcWidth;
}
CompressBlockBC4_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
}
#endif

@ -0,0 +1,31 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#ifndef BC4_ENCODE_KERNEL_H
#define BC4_ENCODE_KERNEL_H
#include "Common_Def.h"
#include "BCn_Common_Kernel.h"
#define BC4CompBlockSize 8
#endif

@ -0,0 +1,264 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#include "BC5_Encode_kernel.h"
//============================================== BC5 INTERFACES =======================================================
void CompressBlockBC5_Internal(CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[4],
CMP_GLOBAL CMP_BC15Options *BC15options)
{
if (BC15options->m_fquality) {
// Resreved
}
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0;
CGU_UINT8 alphaBlock[16];
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].x; // Red channel
srcindex++;
}
}
CompressAlphaBlock(alphaBlock,&compressedBlock[0]);
blkindex = 0;
srcindex = 0;
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
alphaBlock[blkindex++] = (CGU_UINT8)srcBlockTemp[srcindex].y; // Green channel
srcindex++;
}
}
CompressAlphaBlock(alphaBlock,&compressedBlock[2]);
}
void DecompressBC5_Internal(CMP_GLOBAL CGU_UINT8 rgbaBlock[64],
CGU_UINT32 compressedBlock[4],
CMP_BC15Options *BC15options)
{
CGU_UINT8 alphaBlockR[BLOCK_SIZE_4X4];
CGU_UINT8 alphaBlockG[BLOCK_SIZE_4X4];
DecompressAlphaBlock(alphaBlockR, &compressedBlock[0]);
DecompressAlphaBlock(alphaBlockG, &compressedBlock[2]);
CGU_UINT8 blkindex = 0;
CGU_UINT8 srcindex = 0;
if (BC15options->m_mapDecodeRGBA)
{
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex];
rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex];
rgbaBlock[blkindex++] = 0;
rgbaBlock[blkindex++] = 255;
srcindex++;
}
}
}
else
{
for (CGU_INT32 j = 0; j < 4; j++) {
for (CGU_INT32 i = 0; i < 4; i++) {
rgbaBlock[blkindex++] = 0;
rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockG[srcindex];
rgbaBlock[blkindex++] = (CGU_UINT8)alphaBlockR[srcindex];
rgbaBlock[blkindex++] = 255;
srcindex++;
}
}
}
}
void CompressBlockBC5_DualChannel_Internal(const CGU_UINT8 srcBlockR[16],
const CGU_UINT8 srcBlockG[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[4],
CMP_GLOBAL const CMP_BC15Options *BC15options)
{
if (BC15options) {}
CompressAlphaBlock(srcBlockR,&compressedBlock[0]);
CompressAlphaBlock(srcBlockG,&compressedBlock[2]);
}
void DecompressBC5_DualChannel_Internal(CMP_GLOBAL CGU_UINT8 srcBlockR[16],
CMP_GLOBAL CGU_UINT8 srcBlockG[16],
const CGU_UINT32 compressedBlock[4],
const CMP_BC15Options *BC15options)
{
if (BC15options) {}
DecompressAlphaBlock(srcBlockR, &compressedBlock[0]);
DecompressAlphaBlock(srcBlockG, &compressedBlock[2]);
}
//============================================== USER INTERFACES ========================================================
#ifndef ASPM_GPU
int CMP_CDECL CreateOptionsBC5(void **options)
{
CMP_BC15Options *BC15optionsDefault = new CMP_BC15Options;
if (BC15optionsDefault) {
SetDefaultBC15Options(BC15optionsDefault);
(*options) = BC15optionsDefault;
}
else {
(*options) = NULL;
return CGU_CORE_ERR_NEWMEM;
}
return CGU_CORE_OK;
}
int CMP_CDECL DestroyOptionsBC5(void *options)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BCOptions = reinterpret_cast <CMP_BC15Options *>(options);
delete BCOptions;
return CGU_CORE_OK;
}
int CMP_CDECL SetQualityBC5(void *options,
CGU_FLOAT fquality)
{
if (!options) return CGU_CORE_ERR_INVALIDPTR;
CMP_BC15Options *BC15optionsDefault = reinterpret_cast <CMP_BC15Options *>(options);
if (fquality < 0.0f) fquality = 0.0f;
else
if (fquality > 1.0f) fquality = 1.0f;
BC15optionsDefault->m_fquality = fquality;
return CGU_CORE_OK;
}
int CMP_CDECL CompressBlockBC5(const CGU_UINT8 *srcBlockR,
unsigned int srcStrideInBytes1,
const CGU_UINT8 *srcBlockG,
unsigned int srcStrideInBytes2,
CMP_GLOBAL CGU_UINT8 cmpBlock[16],
const void *options = NULL) {
CGU_UINT8 inBlockR[16];
//----------------------------------
// Fill the inBlock with source data
//----------------------------------
CGU_INT srcpos = 0;
CGU_INT dstptr = 0;
for (CGU_UINT8 row = 0; row < 4; row++)
{
srcpos = row * srcStrideInBytes1;
for (CGU_UINT8 col = 0; col < 4; col++)
{
inBlockR[dstptr++] = CGU_UINT8(srcBlockR[srcpos++]);
}
}
CGU_UINT8 inBlockG[16];
//----------------------------------
// Fill the inBlock with source data
//----------------------------------
srcpos = 0;
dstptr = 0;
for (CGU_UINT8 row = 0; row < 4; row++)
{
srcpos = row * srcStrideInBytes2;
for (CGU_UINT8 col = 0; col < 4; col++)
{
inBlockG[dstptr++] = CGU_UINT8(srcBlockG[srcpos++]);
}
}
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL)
{
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
CompressBlockBC5_DualChannel_Internal(inBlockR,inBlockG, (CMP_GLOBAL CGU_UINT32 *)cmpBlock, BC15options);
return CGU_CORE_OK;
}
int CMP_CDECL DecompressBlockBC5(const CGU_UINT8 cmpBlock[16],
CMP_GLOBAL CGU_UINT8 srcBlockR[16],
CMP_GLOBAL CGU_UINT8 srcBlockG[16],
const void *options = NULL) {
CMP_BC15Options *BC15options = (CMP_BC15Options *)options;
CMP_BC15Options BC15optionsDefault;
if (BC15options == NULL)
{
BC15options = &BC15optionsDefault;
SetDefaultBC15Options(BC15options);
}
DecompressBC5_DualChannel_Internal(srcBlockR,srcBlockG,(CGU_UINT32 *)cmpBlock,BC15options);
return CGU_CORE_OK;
}
#endif
//============================================== OpenCL USER INTERFACE ====================================================
#ifdef ASPM_GPU
CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL const CMP_Vec4uc* ImageSource,
CMP_GLOBAL CGU_UINT8* ImageDestination,
CMP_GLOBAL Source_Info* SourceInfo,
CMP_GLOBAL CMP_BC15Options* BC15options
)
{
CGU_UINT32 xID;
CGU_UINT32 yID;
#ifdef ASPM_GPU
xID = get_global_id(0);
yID = get_global_id(1);
#else
xID = 0;
yID = 0;
#endif
if (xID >= (SourceInfo->m_src_width / BlockX)) return;
if (yID >= (SourceInfo->m_src_height / BlockX)) return;
int srcWidth = SourceInfo->m_src_width;
CGU_UINT32 destI = (xID*BC5CompBlockSize) + (yID*(srcWidth / BlockX)*BC5CompBlockSize);
int srcindex = 4 * (yID * srcWidth + xID);
int blkindex = 0;
CMP_Vec4uc srcData[16];
srcWidth = srcWidth - 4;
for ( CGU_INT32 j = 0; j < 4; j++) {
for ( CGU_INT32 i = 0; i < 4; i++) {
srcData[blkindex++] = ImageSource[srcindex++];
}
srcindex += srcWidth;
}
CompressBlockBC5_Internal(srcData, (CMP_GLOBAL CGU_UINT32 *)&ImageDestination[destI], BC15options);
}
#endif

@ -0,0 +1,31 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#ifndef BC5_ENCODE_KERNEL_H
#define BC5_ENCODE_KERNEL_H
#include "Common_Def.h"
#include "BCn_Common_Kernel.h"
#define BC5CompBlockSize 16
#endif

File diff suppressed because it is too large Load Diff

@ -0,0 +1,480 @@
//=====================================================================
// Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#ifndef BC6_ENCODE_KERNEL_H
#define BC6_ENCODE_KERNEL_H
#include "Common_Def.h"
#define MAX_TRACE 10
#define MAX_ENTRIES_QUANT_TRACE 16
#define BlockX 4
#define BlockY 4
#define BYTEPP 4
#define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes
#define MAX_DIMENSION_BIG 4
#define MAX_SUBSET_SIZE 16 // Largest possible size for an individual subset
#define NUM_BLOCK_TYPES 8 // Number of block types in the format
#define MAX_SUBSETS 3 // Maximum number of possible subsets
#define MAX_PARTITIONS 64 // Maximum number of partition types
#define MAX_ENTRIES 64
#define MAX_TRY 20
#define MAX_PARTITIONS_TABLE (1+64+64)
#define DIMENSION 4
#define MAX_CLUSTERS_BIG 16
#define EPSILON 0.000001
#define MAX_CLUSTERS_QUANT_TRACE 8
//# Image Quality will increase as this number gets larger and end-to-end performance time will reduce
#define MAX_INDEX_BITS 4
#define HIGHQULITY_THRESHOLD 0.7F
#define qFAST_THRESHOLD 0.5F
#define F16NEGPREC_LIMIT_VAL -2048.0f //f16 negative precision limit value
#define LOG_CL_RANGE 5
#define LOG_CL_BASE 2
#define BIT_BASE 5
#define BIT_RANGE 9
#define MAX_CLUSTERS 8
#define BTT(bits) (bits-BIT_BASE)
#define CLT(cl) (cl-LOG_CL_BASE)
#define MASK(n) ((1<<(n))-1)
#define SIGN_EXTEND_TYPELESS(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x))
#define CMP_HALF_MAX 65504.0f // positive half max
#ifndef ASPM_GPU
#include <bitset>
#include <assert.h>
//typedef uint8_t byte;
#else
//typedef bitset uint8_t;
//typedef uint8 byte;
#endif
#define BC6CompBlockSize 16
#define BC6BlockX 4
#define BC6BlockY 4
typedef struct
{
CGU_INT k;
CGU_FLOAT d;
} BC6H_TRACE;
#define NCHANNELS 3
#define MAX_END_POINTS 2
#define MAX_BC6H_MODES 14
#define MAX_BC6H_PARTITIONS 32
#define MAX_TWOREGION_MODES 10
#define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes
#define ONE_REGION_INDEX_OFFSET 65 // bit location to start saving color index values for single region shape
#define TWO_REGION_INDEX_OFFSET 82 // bit location to start saving color index values for two region shapes
#define MIN_MODE_FOR_ONE_REGION 11 // Two regions shapes use modes 1..9 and single use 11..14
#define R_0(ep) (ep)[0][0][i]
#define R_1(ep) (ep)[0][1][i]
#define R_2(ep) (ep)[1][0][i]
#define R_3(ep) (ep)[1][1][i]
#define FLT16_MAX 0x7bff
#ifndef ASPM_GPU
#define USE_SHAKERHD
#endif
#define USE_NEWRAMP
typedef struct
{
CGU_FLOAT A[NCHANNELS];
CGU_FLOAT B[NCHANNELS];
} END_Points;
typedef struct
{
CGU_FLOAT x, y, z;
} BC6H_Vec3f;
typedef struct
{
CGU_INT nbits; // Number of bits
CGU_INT prec[3]; // precission of the Qunatized RGB endpoints
CGU_INT transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
CGU_INT modebits; // number of mode bits
CGU_INT IndexPrec; // Index Precision
CGU_INT mode; // Mode value to save
CGU_INT lowestPrec; // Step size of each precesion incriment
} ModePartitions;
__constant ModePartitions ModePartition[MAX_BC6H_MODES + 1] =
{
0, 0,0,0, 0, 0, 0, 0, 0, // Mode = Invaild
// Two region Partition
10, 5,5,5, 1, 2, 3, 0x00, 31, // Mode = 1
7, 6,6,6, 1, 2, 3, 0x01, 248, // Mode = 2
11, 5,4,4, 1, 5, 3, 0x02, 15, // Mode = 3
11, 4,5,4, 1, 5, 3, 0x06, 15, // Mode = 4
11, 4,4,5, 1, 5, 3, 0x0a, 15, // Mode = 5
9, 5,5,5, 1, 5, 3, 0x0e, 62, // Mode = 6
8, 6,5,5, 1, 5, 3, 0x12, 124, // Mode = 7
8, 5,6,5, 1, 5, 3, 0x16, 124, // Mode = 8
8, 5,5,6, 1, 5, 3, 0x1a, 124, // Mode = 9
6, 6,6,6, 0, 5, 3, 0x1e, 496, // Mode = 10
// One region Partition
10, 10,10,10, 0, 5, 4, 0x03, 31, // Mode = 11
11, 9,9,9, 1, 5, 4, 0x07, 15, // Mode = 12
12, 8,8,8, 1, 5, 4, 0x0b, 7, // Mode = 13
16, 4,4,4, 1, 5, 4, 0x0f, 1, // Mode = 14
};
//================================================
// Mode Pathern order to try on endpoints
// The order can be rearranged to set which modes gets processed first
// for now it is set in order.
//================================================
__constant CGU_INT8 ModeFitOrder[MAX_BC6H_MODES + 1] =
{
0, //0: N/A
// ---- 2 region lower bits ---
1, // 10 5 5 5
2, // 7 6 6 6
3, // 11 5 4 5
4, // 11 4 5 4
5, // 11 4 4 5
6, // 9 5 5 5
7, // 8 6 5 5
8, // 8 5 6 5
9, // 8 5 5 6
10, // 6 6 6 6
//------ 1 region high bits ---
11, // 10 10 10 10
12, // 11 9 9 9
13, // 12 8 8 8
14 // 16 4 4 4
};
// The Region2FixUps are for our index[subset = 2][16][3] locations
// indexed by shape region 2
__constant CGU_INT g_Region2FixUp[32] =
{
7 , 3 , 11, 7,
3 , 11, 9 , 5,
2 , 12, 7 , 3,
11, 7 , 11, 3,
7 , 1 , 0 , 1,
0 , 1 , 0 , 7,
0 , 1 , 1 , 0,
4 , 4 , 1 , 0,
};
// Indexed by all shape regions
// Partition Set Fixups for region 1 note region 0 is always at 0
// that means normally we use 3 bits to define an index value
// if its at the fix up location then its one bit less
__constant CGU_INT g_indexfixups[32] =
{
15,15,15,15,
15,15,15,15,
15,15,15,15,
15,15,15,15,
15, 2, 8, 2,
2, 8, 8,15,
2, 8, 2, 2,
8, 8, 2, 2,
};
typedef struct
{
CGU_INT8 region; // one or two
CGU_INT8 m_mode; // m
CGU_INT8 d_shape_index; // d
CGU_INT rw; // endpt[0].A[0]
CGU_INT rx; // endpt[0].B[0]
CGU_INT ry; // endpt[1].A[0]
CGU_INT rz; // endpt[1].B[0]
CGU_INT gw; // endpt[0].A[1]
CGU_INT gx; // endpt[0].B[1]
CGU_INT gy; // endpt[1].A[1]
CGU_INT gz; // endpt[1].B[1]
CGU_INT bw; // endpt[0].A[2]
CGU_INT bx; // endpt[0].B[2]
CGU_INT by; // endpt[1].A[2]
CGU_INT bz; // endpt[1].B[2]
union
{
CGU_UINT8 indices[4][4]; // Indices data after header block
CGU_UINT8 indices16[16];
};
union
{
CGU_FLOAT din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; // Original data input as floats
unsigned char cdin[256]; // as uchar to match float
};
END_Points EC[MAX_END_POINTS]; // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[]
END_Points E[MAX_END_POINTS]; // decompressed endpoints
CGU_BOOL issigned; // Format is 16 bit signed floating point
CGU_BOOL istransformed; // region two: all modes = true except mode=10
short wBits; // number of bits for the root endpoint
short tBits[NCHANNELS]; // number of bits used for the transformed endpoints
CGU_INT format; // floating point format are we using for decompression
BC6H_Vec3f Paletef[2][16];
CGU_INT index; // for debugging
CGU_FLOAT fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
CGU_FLOAT cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
CGU_INT shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
CGU_INT cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
CGU_INT entryCount[MAX_SUBSETS];
CGU_INT cur_best_entryCount[MAX_SUBSETS];
CGU_FLOAT partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
CGU_FLOAT cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
CGU_BOOL optimized; // were end points optimized during final encoding
} BC6H_Encode_local;
#ifndef ASPM_GPU
using namespace std;
class BitHeader
{
public:
BitHeader(const CGU_UINT8 in[], CGU_INT sizeinbytes)
{
m_bits.reset();
m_sizeinbytes = sizeinbytes;
if ((in != NULL) && (sizeinbytes <= 16))
{
// Init bits set with given data
CGU_INT bitpos = 0;
for (CGU_INT i = 0; i < sizeinbytes; i++)
{
CGU_INT bit = 1;
for (CGU_INT j = 0; j < 8; j++)
{
m_bits[bitpos] = in[i] & bit ? 1 : 0;
bit = bit << 1;
bitpos++;
}
}
}
}
~BitHeader()
{
}
void transferbits(CGU_UINT8 in[], CGU_INT sizeinbytes)
{
if ((sizeinbytes <= m_sizeinbytes) && (in != NULL))
{
// Init bits set with given data
memset(in, 0, sizeinbytes);
CGU_INT bitpos = 0;
for (CGU_INT i = 0; i < sizeinbytes; i++)
{
CGU_INT bit = 1;
for (CGU_INT j = 0; j < 8; j++)
{
if (m_bits[bitpos]) in[i] |= bit;
bit = bit << 1;
bitpos++;
}
}
}
}
CGU_INT getvalue(CGU_INT start, CGU_INT bitsize)
{
CGU_INT value = 0;
CGU_INT end = start + bitsize - 1;
for (; end >= start; end--)
{
value |= m_bits[end] ? 1 : 0;
if (end > start) value <<= 1;
}
return value;
}
void setvalue(CGU_INT start, CGU_INT bitsize, CGU_INT value, CGU_INT maskshift = 0)
{
CGU_INT end = start + bitsize - 1;
CGU_INT mask = 0x1 << maskshift;
for (; start <= end; start++)
{
m_bits[start] = (value&mask) ? 1 : 0;
mask <<= 1;
}
}
bitset<128> m_bits; // 16 bytes max
CGU_INT m_sizeinbytes;
};
//==================== DECODER CODE ======================
#define MAXENDPOINTS 2
#define U16MAX 0xffff
#define S16MAX 0x7fff
#define SIGN_EXTEND(w,tbits) ((((signed(w))&(1<<((tbits)-1)))?((~0)<<(tbits)):0)|(signed(w)))
enum
{
UNSIGNED_F16 = 1,
SIGNED_F16 = 2
};
enum
{
BC6_ONE = 0,
BC6_TWO
};
enum
{
C_RED = 0,
C_GREEN,
C_BLUE
};
struct BC6H_Vec3
{
int x,y,z;
};
struct AMD_BC6H_Format
{
unsigned short region; // one or two
unsigned short m_mode; // m
int d_shape_index; // d
int rw; // endpt[0].A[0]
int rx; // endpt[0].B[0]
int ry; // endpt[1].A[0]
int rz; // endpt[1].B[0]
int gw; // endpt[0].A[1]
int gx; // endpt[0].B[1]
int gy; // endpt[1].A[1]
int gz; // endpt[1].B[1]
int bw; // endpt[0].A[2]
int bx; // endpt[0].B[2]
int by; // endpt[1].A[2]
int bz; // endpt[1].B[2]
union
{
CGU_UINT8 indices[4][4]; // Indices data after header block
CGU_UINT8 indices16[16];
};
float din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; // Original data input
END_Points EC[MAXENDPOINTS]; // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[]
END_Points E[MAXENDPOINTS]; // decompressed endpoints
bool issigned; // Format is 16 bit signed floating point
bool istransformed; // region two: all modes = true except mode=10
short wBits; // number of bits for the root endpoint
short tBits[NCHANNELS]; // number of bits used for the transformed endpoints
int format; // floating point format are we using for decompression
BC6H_Vec3 Palete[2][16];
BC6H_Vec3f Paletef[2][16];
int index; // for debugging
float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
float cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
int shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
int cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
int entryCount[MAX_SUBSETS];
int cur_best_entryCount[MAX_SUBSETS];
float partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
float cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
bool optimized; // were end points optimized during final encoding
};
// =================================== END OF DECODER CODE ========================================================
#endif
//-------------------------------------------------
// Set by Host : Read only in kernel
//-------------------------------------------------
typedef struct
{
// Setup at initialization time
CGU_FLOAT m_quality;
CGU_FLOAT m_performance;
CGU_FLOAT m_errorThreshold;
CGU_DWORD m_validModeMask;
CGU_BOOL m_imageNeedsAlpha;
CGU_BOOL m_colourRestrict;
CGU_BOOL m_alphaRestrict;
CGU_BOOL m_isSigned;
} CMP_BC6HOptions;
typedef struct
{
// These are quality parameters used to select when to use the high precision quantizer
// and shaker paths
CGU_FLOAT m_quantizerRangeThreshold;
CGU_FLOAT m_shakerRangeThreshold;
CGU_FLOAT m_partitionSearchSize;
// Setup at initialization time
CGU_FLOAT m_quality;
CGU_FLOAT m_performance;
CGU_FLOAT m_errorThreshold;
CGU_DWORD m_validModeMask;
CGU_BOOL m_imageNeedsAlpha;
CGU_BOOL m_colourRestrict;
CGU_BOOL m_alphaRestrict;
CGU_BOOL m_isSigned;
// Source image info : must be set prior to use in kernel
CGU_UINT32 m_src_width;
CGU_UINT32 m_src_height;
CGU_UINT32 m_src_stride;
} BC6H_Encode;
CMP_STATIC void SetDefaultBC6Options(BC6H_Encode *BC6Encode)
{
if (BC6Encode)
{
BC6Encode->m_quality = 1.0f;
BC6Encode->m_quantizerRangeThreshold = 0.0f;
BC6Encode->m_shakerRangeThreshold = 0.0f;
BC6Encode->m_partitionSearchSize = 0.20f;
BC6Encode->m_performance = 0.0f;
BC6Encode->m_errorThreshold = 0.0f;
BC6Encode->m_validModeMask = 0;
BC6Encode->m_imageNeedsAlpha = 0;
BC6Encode->m_colourRestrict = 0;
BC6Encode->m_alphaRestrict = 0;
BC6Encode->m_isSigned = 0;
BC6Encode->m_src_width = 4;
BC6Encode->m_src_height = 4;
BC6Encode->m_src_stride = 0;
}
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,300 @@
#ifndef _COMMON_DEFINITIONS_H
#define _COMMON_DEFINITIONS_H
//===============================================================================
// Copyright (c) 2007-2019 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2004-2006 ATI Technologies Inc.
//===============================================================================
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//
// File Name: Common_Def.h
// Description: common definitions used for CPU/HPC/GPU
//
//////////////////////////////////////////////////////////////////////////////
// Features
#ifdef _WIN32
//#define USE_ASPM_CODE
#endif
// Proxy ISPC compiler (Warning! Not all ASPM features will be available : expect build errors for specialized ASPM code!
#ifdef ISPC
#define ASPM
#endif
// Using OpenCL Compiler
#ifdef __OPENCL_VERSION__
#define ASPM_GPU
#endif
#ifdef _LINUX
#undef ASPM_GPU
#include <cstring>
#include <cmath>
#include <stdio.h>
#include "cmp_math_vec4.h"
#endif
#ifndef CMP_MAX
#define CMP_MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif
#ifndef CMP_MIN
#define CMP_MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
#define CMP_SET_BC13_DECODER_RGBA // Sets mapping BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha
// RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3]
// BC4 alpha always maps as AAAA to channels [0,1,2,3]
// BC5 decoded (Red&Green) maps R,G,B=0,A=255 to [0,1,2,3] else maps [B=0,G,R,A=255] to [0,1,2,3]
//#define USE_BLOCK_LINEAR
#define CMP_FLOAT_MAX 3.402823466e+38F // max value used to detect an Error in processing
#define CMP_FLOAT_MAX_EXP 38
#define USE_PROCESS_SEPERATE_ALPHA // Enable this to use higher quality code using CompressDualIndexBlock
#define COMPRESSED_BLOCK_SIZE 16 // Size of a compressed block in bytes
#define MAX_DIMENSION_BIG 4 // Max number of channels (RGBA)
#define MAX_SUBSETS 3 // Maximum number of possible subsets
#define MAX_SUBSET_SIZE 16 // Largest possible size for an individual subset
#define BLOCK_SIZE_4X4X4 64
#define BLOCK_SIZE_4X4 16
#define BlockX 4
#define BlockY 4
//#define USE_BLOCK_LINEAR // Source Data is organized in linear form for each block : Experimental Code not fully developed
//#define USE_DOUBLE // Default is to use float, enable to use double data types only for float definitions
typedef enum {
CGU_CORE_OK = 0, // No errors, call was successfull
CGU_CORE_ERR_UNKOWN, // An unknown error occurred
CGU_CORE_ERR_NEWMEM, // New Memory Allocation Failed
CGU_CORE_ERR_INVALIDPTR, // The pointer value used is invalid or null
CGU_CORE_ERR_RANGERED, // values for Red Channel is out of range (too high or too low)
CGU_CORE_ERR_RANGEGREEN, // values for Green Channel is out of range (too high or too low)
CGU_CORE_ERR_RANGEBLUE, // values for Blue Channel is out of range (too high or too low)
} CGU_ERROR_CODES;
//---------------------------------------------
// Predefinitions for GPU and CPU compiled code
//---------------------------------------------
#ifdef ASPM_GPU // GPU Based code
// ==== Vectors ====
typedef float2 CGU_Vec2f;
typedef float2 CGV_Vec2f;
typedef float3 CMP_Vec3f;
typedef float3 CGU_Vec3f;
typedef float3 CGV_Vec3f;
typedef uchar3 CGU_Vec3uc;
typedef uchar3 CGV_Vec3uc;
typedef uchar4 CMP_Vec4uc;
typedef uchar4 CGU_Vec4uc;
typedef uchar4 CGV_Vec4uc;
#define USE_BC7_SP_ERR_IDX
#define ASPM_PRINT(args) printf args
#define BC7_ENCODECLASS
#define CMP_EXPORT
#define INLINE
#define uniform
#define varying
#define CMP_GLOBAL __global
#define CMP_KERNEL __kernel
#define CMP_CONSTANT __constant
#define CMP_STATIC
typedef unsigned int CGU_DWORD; //32bits
typedef int CGU_INT; //32bits
typedef int CGU_BOOL;
typedef unsigned short CGU_SHORT; //16bits
typedef float CGU_FLOAT;
typedef unsigned int uint32; // need to remove this def
typedef int CGV_INT;
typedef unsigned int CGU_UINT;
typedef int CGUV_INT;
typedef int CGV_BOOL;
typedef char CGU_INT8;
typedef unsigned char CGU_UINT8;
typedef short CGU_INT16;
typedef unsigned short CGU_UINT16;
typedef int CGU_INT32;
typedef unsigned int CGU_UINT32;
typedef unsigned long CGU_UINT64;
typedef char CGV_INT8;
typedef unsigned char CGV_UINT8;
typedef short CGV_INT16;
typedef unsigned short CGV_UINT16;
typedef int CGV_INT32;
typedef unsigned int CGV_UINT32;
typedef unsigned long CGV_UINT64;
typedef float CGV_FLOAT;
#define TRUE 1
#define FALSE 0
#define CMP_CDECL
#else
// CPU & ASPM definitions
#ifdef ASPM // SPMD ,SIMD CPU code
// using hybrid (CPU/GPU) aspm compiler
#define ASPM_PRINT(args) print args
#define CMP_USE_FOREACH_ASPM
#define __ASPM__
#define BC7_ENCODECLASS
#define USE_BC7_SP_ERR_IDX
//#define USE_BC7_RAMP
#define CMP_EXPORT export
#define TRUE true
#define FALSE false
typedef uniform bool CGU_BOOL;
typedef bool CGV_BOOL;
typedef unsigned int8 uint8;
typedef unsigned int16 uint16;
typedef unsigned int32 uint32;
typedef unsigned int64 uint64;
typedef uniform float CGU_FLOAT;
typedef varying float CGV_FLOAT;
typedef uniform uint8 CGU_UINT8;
typedef varying uint8 CGV_UINT8;
typedef CGV_UINT8<4> CGV_Vec4uc;
typedef CGU_UINT8<4> CGU_Vec4uc;
typedef CGU_FLOAT<3> CGU_Vec3f;
typedef CGV_FLOAT<3> CGV_Vec3f;
typedef CGU_FLOAT<2> CGU_Vec2f;
typedef CGV_FLOAT<2> CGV_Vec2f;
#define CMP_CDECL
#else // standard CPU code
#include <stdio.h>
#include <string>
#include "cmp_math_vec4.h"
// using CPU compiler
#define ASPM_PRINT(args) printf args
#define USE_BC7_RAMP
#define USE_BC7_SP_ERR_IDX
#define CMP_EXPORT
#define BC7_ENCODECLASS BC7_EncodeClass::
#define TRUE 1
#define FALSE 0
#define uniform
#define varying
typedef char int8;
typedef short int16;
typedef int int32;
typedef long int64;
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint32;
typedef unsigned long uint64;
typedef int8 CGV_BOOL;
typedef int8 CGU_BOOL;
typedef int16 CGU_WORD;
typedef uint8 CGU_SHORT;
typedef int64 CGU_LONG;
typedef uint64 CGU_ULONG;
typedef uniform float CGU_FLOAT;
typedef varying float CGV_FLOAT;
typedef uniform uint8 CGU_UINT8;
typedef varying uint8 CGV_UINT8;
#if defined(WIN32) || defined(_WIN64)
#define CMP_CDECL __cdecl
#else
#define CMP_CDECL
#endif
#endif
// Common CPU & ASPM definitions
#define CMP_ASSERT(arg)
#define CMP_GLOBAL
#define CMP_KERNEL
#define __local const
#define __constant const
#define CMP_CONSTANT const
#define INLINE inline
#define CMP_STATIC static
typedef uniform int32 CGU_DWORD;
typedef uniform uint8 CGU_UBYTE;
typedef uniform int CGU_INT;
typedef uniform int8 CGU_INT8;
typedef uniform int16 CGU_INT16;
typedef uniform uint16 CGU_UINT16;
typedef uniform int32 CGU_INT32;
typedef uniform uint32 CGU_UINT32;
typedef uniform uint64 CGU_UINT64;
typedef int CGV_INT;
typedef int8 CGV_INT8;
typedef int16 CGV_INT16;
typedef int32 CGV_INT32;
typedef uint16 CGV_UINT16;
typedef uint32 CGV_UINT32;
typedef uint64 CGV_UINT64;
#endif // ASPM_GPU
typedef struct
{
CGU_UINT32 m_src_width;
CGU_UINT32 m_src_height;
CGU_UINT32 m_width_in_blocks;
CGU_UINT32 m_height_in_blocks;
CGU_FLOAT m_fquality;
} Source_Info;
// Ref Compute_CPU_HPC
struct texture_surface
{
CGU_UINT8* ptr;
CGU_INT width,
height,
stride;
CGU_INT channels;
};
#endif

@ -0,0 +1,50 @@
REM ====================================
REM Hybrid Codecs: Full support in v4.0
REM ====================================
REM gets the output dir
set BUILD_OUTDIR=%1
REM get the batch files dir
SET mypath=%~dp0
echo %mypath:~0,-1%
IF NOT EXIST "%outpath%"\Plugins mkdir %BUILD_OUTDIR%Plugins
IF NOT EXIST "%outpath%"\Plugins\Compute mkdir %BUILD_OUTDIR%Plugins\Compute
REM Build Vulkan Shader Binary
REM "%VULKAN_SDK%"\bin\glslangvalidator -V %mypath:~0,-1%\BC1.comp -o %BUILD_OUTDIR%\Plugins\Compute\BC1.spv
REM IF %ERRORLEVEL% GTR 0 exit 123
REM Enabled in v4.0
REM
REM del %BUILD_OUTDIR%Plugins\Compute\BC1_Encode_Kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC2_Encode_Kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC3_Encode_Kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC4_Encode_Kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC5_Encode_Kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC6_Encode_Kernel.cpp.cmp
REM del %BUILD_OUTDIR%Plugins\Compute\BC7_Encode_Kernel.cpp.cmp
XCopy /r /d /y "%mypath:~0,-1%\Common_Def.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BCn_Common_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC1_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC2_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC3_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC4_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC5_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC6_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.h" %BUILD_OUTDIR%Plugins\Compute\
XCopy /r /d /y "%mypath:~0,-1%\BC7_Encode_Kernel.cpp" %BUILD_OUTDIR%Plugins\Compute\
echo "Dependencies copied done"

@ -0,0 +1,153 @@
//=====================================================================
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
/// \file CMP_Core.h
//
//=====================================================================
#ifndef CMP_CORE_H
#define CMP_CORE_H
#include <stdint.h>
#ifdef _WIN32
#define CMP_CDECL __cdecl
#else
#define CMP_CDECL
#endif
//====================================================================================
// API Definitions for Core API
//------------------------------------------------------------------------------------
// All API return 0 on success else error codes > 0
// See Common_Def.h CGU_CORE_ values for the error codes
//=====================================================================================
//======================================================================================================
// Block level setting option: Create and Destroy Reference Pointers
//======================================================================================================
// Context create and destroy to use for BCn codec settings, where n is the set [1,2,3,4,5,6,7]
// All codecs will use default max quality settings, users can create multiple contexts to
// set quality levels, masks , channel mapping, etc...
int CMP_CDECL CreateOptionsBC1(void **optionsBC1);
int CMP_CDECL CreateOptionsBC2(void **optionsBC2);
int CMP_CDECL CreateOptionsBC3(void **optionsBC3);
int CMP_CDECL CreateOptionsBC4(void **optionsBC4);
int CMP_CDECL CreateOptionsBC5(void **optionsBC5);
int CMP_CDECL CreateOptionsBC6(void **optionsBC6);
int CMP_CDECL CreateOptionsBC7(void **optionsBC7);
int CMP_CDECL DestroyOptionsBC1(void *optionsBC1);
int CMP_CDECL DestroyOptionsBC2(void *optionsBC2);
int CMP_CDECL DestroyOptionsBC3(void *optionsBC3);
int CMP_CDECL DestroyOptionsBC4(void *optionsBC4);
int CMP_CDECL DestroyOptionsBC5(void *optionsBC5);
int CMP_CDECL DestroyOptionsBC6(void *optionsBC6);
int CMP_CDECL DestroyOptionsBC7(void *optionsBC7);
//======================================================================================================
// Block level settings using the options Reference Pointers
//======================================================================================================
// Setting channel Weights : Applies to BC1, BC2 and BC3 valid ranges are [0..1.0f] Default is {1.0f, 1.0f , 1.0f}
// Use channel weightings. With swizzled formats the weighting applies to the data within the specified channel not the channel itself.
int CMP_CDECL SetChannelWeightsBC1(void *options, float WeightRed, float WeightGreen, float WeightBlue);
int CMP_CDECL SetChannelWeightsBC2(void *options, float WeightRed, float WeightGreen, float WeightBlue);
int CMP_CDECL SetChannelWeightsBC3(void *options, float WeightRed, float WeightGreen, float WeightBlue);
// True sets mapping CMP_Core BC1, BC2 & BC3 to decode Red,Green,Blue and Alpha as
// RGBA to channels [0,1,2,3] else BGRA maps to [0,1,2,3]
// Default is set to true.
int CMP_CDECL SetDecodeChannelMapping(void *options, bool mapRGBA);
int CMP_CDECL SetQualityBC1(void *options, float fquality);
int CMP_CDECL SetQualityBC2(void *options, float fquality);
int CMP_CDECL SetQualityBC3(void *options, float fquality);
int CMP_CDECL SetQualityBC4(void *options, float fquality);
int CMP_CDECL SetQualityBC5(void *options, float fquality);
int CMP_CDECL SetQualityBC6(void *options, float fquality);
int CMP_CDECL SetQualityBC7(void *options, float fquality);
int CMP_CDECL SetAlphaThresholdBC1(void *options, unsigned char alphaThreshold);
int CMP_CDECL SetMaskBC6(void *options, unsigned int mask);
int CMP_CDECL SetMaskBC7(void *options, unsigned char mask);
int CMP_CDECL SetAlphaOptionsBC7(void *options, bool imageNeedsAlpha, bool colourRestrict, bool alphaRestrict);
int CMP_CDECL SetErrorThresholdBC7(void *options, float minThreshold, float maxThreshold);
//======================================================================================================
// (4x4) Block level 4 channel source CompressBlock and DecompressBlock API for BCn Codecs
//======================================================================================================
// The options parameter for these API can be set to null in the calls if defaults settings is sufficient
// Example: CompressBlockBC1(srcBlock,16,cmpBlock,NULL); For "C" call
// CompressBlockBC1(srcBlock,16,cmpBlock); For "C++" calls
//
// To use this parameter first create the options context using the CreateOptions call
// then use the Set Options to set various codec settings and pass them to the appropriate
// Compress or Decompress API.
// The source (srcBlock) channel format is expected to be RGBA:8888 by default for LDR Codecs
// for BC6H the format is RGBA Half float (16 bits per channel)
//------------------------------------------------------------------------------------------------------
#ifdef __cplusplus
#define CMP_DEFAULTNULL =NULL
#else
#define CMP_DEFAULTNULL
#endif
//=========================================================================================================
// 4 channel Sources, default format RGBA:8888 is processed as a 4x4 block starting at srcBlock location
// where each row of the block is calculated from srcStride
//=========================================================================================================
int CMP_CDECL CompressBlockBC1(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[8 ], const void *options CMP_DEFAULTNULL);
int CMP_CDECL CompressBlockBC2(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
int CMP_CDECL CompressBlockBC3(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
int CMP_CDECL CompressBlockBC7(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
int CMP_CDECL DecompressBlockBC1(const unsigned char cmpBlock[8 ], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
int CMP_CDECL DecompressBlockBC2(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
int CMP_CDECL DecompressBlockBC3(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
int CMP_CDECL DecompressBlockBC7(const unsigned char cmpBlock[16], unsigned char srcBlock[64], const void *options CMP_DEFAULTNULL);
//================================================
// 1 channel Source 4x4 8 bits per block
//================================================
int CMP_CDECL CompressBlockBC4(const unsigned char *srcBlock, unsigned int srcStrideInBytes, unsigned char cmpBlock[8], const void *options CMP_DEFAULTNULL);
int CMP_CDECL DecompressBlockBC4(const unsigned char cmpBlock[8], unsigned char srcBlock[16], const void *options CMP_DEFAULTNULL);
//================================================
// 2 channel Source 2x(4x4 8 bits)
//================================================
int CMP_CDECL CompressBlockBC5(const unsigned char *srcBlock1, unsigned int srcStrideInBytes1,
const unsigned char *srcBlock2, unsigned int srcStrideInBytes2,
unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
int CMP_CDECL DecompressBlockBC5(const unsigned char cmpBlock[16], unsigned char srcBlock1[16], unsigned char srcBlock2[16], const void *options CMP_DEFAULTNULL);
//========================================================================================
// For 3 channel Source RGB_16, Note srcStride is in unsigned short steps (2 bytes each)
//========================================================================================
int CMP_CDECL CompressBlockBC6(const unsigned short *srcBlock, unsigned int srcStrideInShorts, unsigned char cmpBlock[16], const void *options CMP_DEFAULTNULL);
int CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16], unsigned short srcBlock[48], const void *options CMP_DEFAULTNULL);
#endif // CMP_CORE

@ -0,0 +1,417 @@
//=====================================================================
// Copyright 2019 (c), Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//=====================================================================
#ifndef CMP_MATH_VEC4_H
#define CMP_MATH_VEC4_H
//====================================================
// Vector Class definitions for CPU & Intrinsics
//====================================================
#if defined (_LINUX) || defined (_WIN32)
//============================================= VEC2 ==================================================
template<class T>
class Vec2
{
public:
T x;
T y;
// *****************************************
// Constructors
// *****************************************
/// Default constructor
Vec2() : x((T)0), y((T)0) {};
/// Value constructor
Vec2(const T& vx, const T& vy) : x(vx), y(vy) {};
/// Copy constructor
Vec2(const Vec2<T>& val) : x(val.x), y(val.y) {};
/// Single value constructor. Sets all components to the given value
Vec2(const T& v) : x(v), y(v) {};
// *****************************************
// Conversions/Assignment/Indexing
// *****************************************
/// cast to T*
operator const T* () const { return (const T*)this; };
/// cast to T*
operator T* () { return (T*)this; };
/// Indexing
const T& operator[](int i) const { return ((const T*)this)[i]; };
T& operator[](int i) { return ((T*)this)[i]; };
/// Assignment
const Vec2<T>& operator=(const Vec2<T>& rhs) { x = rhs.x; y = rhs.y; return *this; };
// *****************************************
// Comparison
// *****************************************
/// Equality comparison
bool operator==(const Vec2<T>& rhs) const { return (x == rhs.x && y == rhs.y); };
/// Inequality comparision
bool operator!=(const Vec2<T>& rhs) const { return (x != rhs.x || y != rhs.y); };
// *****************************************
// Arithmetic
// *****************************************
/// Addition
const Vec2<T> operator+(const Vec2<T>& rhs) const { return Vec2<T>(x + rhs.x, y + rhs.y); };
/// Subtraction
const Vec2<T> operator-(const Vec2<T>& rhs) const { return Vec2<T>(x - rhs.x, y - rhs.y); };
/// Multiply by scalar
const Vec2<T> operator*(const T& v) const { return Vec2<T>(x * v, y * v); };
/// Divide by scalar
const Vec2<T> operator/(const T& v) const { return Vec2<T>(x / v, y / v); };
/// Addition in-place
Vec2<T>& operator+= (const Vec2<T>& rhs) { x += rhs.x; y += rhs.y; return *this; };
/// Subtract in-place
Vec2<T>& operator-= (const Vec2<T>& rhs) { x -= rhs.x; y -= rhs.y; return *this; };
/// Scalar multiply in-place
Vec2<T>& operator*= (const T& v) { x *= v; y *= v; return *this; };
/// Scalar divide in-place
Vec2<T>& operator/= (const T& v) { x /= v; y /= v; return *this; };
};
typedef Vec2<float> CMP_Vec2f;
typedef Vec2<float> CGU_Vec2f;
typedef Vec2<float> CGV_Vec2f;
typedef Vec2<double> CMP_Vec2d;
typedef Vec2<int> CMP_Vec2i;
//}
//============================================= VEC3 ==================================================
template<class T>
class Vec3
{
public:
T x;
T y;
T z;
// *****************************************
// Constructors
// *****************************************
/// Default constructor
Vec3() : x((T)0), y((T)0), z((T)0) {};
/// Value constructor
Vec3(const T& vx, const T& vy, const T& vz) : x(vx), y(vy), z(vz) {};
/// Copy constructor
Vec3(const Vec3<T>& val) : x(val.x), y(val.y), z(val.z) {};
/// Single value constructor. Sets all components to the given value
Vec3(const T& v) : x(v), y(v), z(v) {};
/// Array constructor. Assumes a 3-component array
Vec3(const T* v) : x(v[0]), y(v[1]), z(v[2]) {};
// *****************************************
// Conversions/Assignment/Indexing
// *****************************************
/// cast to T*
operator const T* () const { return (const T*)this; };
/// cast to T*
operator T* () { return (T*)this; };
/// Assignment
const Vec3<T>& operator=(const Vec3<T>& rhs) { x = rhs.x; y = rhs.y; z = rhs.z; return *this; };
// *****************************************
// Comparison
// *****************************************
/// Equality comparison
bool operator==(const Vec3<T>& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); };
/// Inequality comparision
bool operator!=(const Vec3<T>& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z); };
// *****************************************
// Arithmetic
// *****************************************
/// Addition
const Vec3<T> operator+(const Vec3<T>& rhs) const { return Vec3<T>(x + rhs.x, y + rhs.y, z + rhs.z); };
/// Subtraction
const Vec3<T> operator-(const Vec3<T>& rhs) const { return Vec3<T>(x - rhs.x, y - rhs.y, z - rhs.z); };
/// Multiply by scalar
const Vec3<T> operator*(const T& v) const { return Vec3<T>(x * v, y * v, z * v); };
/// Divide by scalar
const Vec3<T> operator/(const T& v) const { return Vec3<T>(x / v, y / v, z / v); };
/// Divide by vector
const Vec3<T> operator/(const Vec3<T>& rhs) const { return Vec3<T>(x / rhs.x, y / rhs.y, z / rhs.z); };
/// Addition in-place
Vec3<T>& operator+= (const Vec3<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; };
/// Subtract in-place
Vec3<T>& operator-= (const Vec3<T>& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; return *this; };
/// Scalar multiply in-place
Vec3<T>& operator*= (const T& v) { x *= v; y *= v; z *= v; return *this; };
/// Scalar divide in-place
Vec3<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; return *this; };
};
typedef Vec3<float> CGU_Vec3f;
typedef Vec3<float> CGV_Vec3f;
typedef Vec3<unsigned char> CGU_Vec3uc;
typedef Vec3<unsigned char> CGV_Vec3uc;
typedef Vec3<float> CMP_Vec3f;
typedef Vec3<double> CMP_Vec3d;
typedef Vec3<int> CMP_Vec3i;
typedef Vec3<unsigned char> CMP_Vec3uc;
//============================================= VEC4 ==================================================
template<class T>
class Vec4
{
public:
T x;
T y;
T z;
T w;
// *****************************************
// Constructors
// *****************************************
/// Default constructor
Vec4() : x((T)0), y((T)0), z((T)0), w((T)0) {};
/// Value constructor
Vec4(const T& vx, const T& vy, const T& vz, const T& vw) : x(vx), y(vy), z(vz), w(vw) {};
/// Copy constructor
Vec4(const Vec4<T>& val) : x(val.x), y(val.y), z(val.z), w(val.w) {};
/// Single value constructor. Sets all components to the given value
Vec4(const T& v) : x(v), y(v), z(v), w(v) {};
/// Array constructor. Assumes a 4-component array
Vec4(const T* v) : x(v[0]), y(v[1]), z(v[2]), w(v[3]) {};
// *****************************************
// Conversions/Assignment/Indexing
// *****************************************
/// cast to T*
operator const T* () const { return (const T*)this; };
/// cast to T*
operator T* () { return (T*)this; };
/// Assignment
const Vec4<T>& operator=(const Vec4<T>& rhs) { x = rhs.x; y = rhs.y; z = rhs.z; w = rhs.w; return *this; };
// *****************************************
// Comparison
// *****************************************
/// Equality comparison
bool operator==(const Vec4<T>& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w); };
/// Inequality comparision
bool operator!=(const Vec4<T>& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z || w != rhs.w); };
// *****************************************
// Arithmetic
// *****************************************
/// Addition
const Vec4<T> operator+(const Vec4<T>& rhs) const { return Vec4<T>(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); };
/// Subtraction
const Vec4<T> operator-(const Vec4<T>& rhs) const { return Vec4<T>(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); };
/// Multiply by scalar
const Vec4<T> operator*(const T& v) const { return Vec4<T>(x * v, y * v, z * v, w * v); };
/// Divide by scalar
const Vec4<T> operator/(const T& v) const { return Vec4<T>(x / v, y / v, z / v, w / v); };
/// Divide by vector
const Vec4<T> operator/(const Vec4<T>& rhs) const { return Vec4<T>(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); };
/// Addition in-place
Vec4<T>& operator+= (const Vec4<T>& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; };
/// Subtract in-place
Vec4<T>& operator-= (const Vec4<T>& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; w -= rhs.w; return *this; };
/// Scalar multiply in-place
Vec4<T>& operator*= (const T& v) { x *= v; y *= v; z *= v; w *= v; return *this; };
/// Scalar divide in-place
Vec4<T>& operator/= (const T& v) { x /= v; y /= v; z /= v; w /= v; return *this; };
};
#include <stdio.h>
#include "xmmintrin.h"
#include <math.h>
#include <float.h>
// SSE Vec4
#ifdef _LINUX
class CMP_SSEVec4f
#else
#include "intrin.h"
class __declspec(align(16)) CMP_SSEVec4f
#endif
{
public:
union
{
__m128 vec128; // float Vector 128 bits in total (16 Bytes) = array of 4 floats
#ifdef _LINUX
float f32[4];
#endif
};
// constructors
inline CMP_SSEVec4f() {};
inline CMP_SSEVec4f(float x, float y, float z, float w) : vec128(_mm_setr_ps(x, y, z, w)) {};
inline CMP_SSEVec4f(__m128 vec) : vec128(vec) {}
inline CMP_SSEVec4f(const float* data) : vec128(_mm_load_ps(data)) {};
inline CMP_SSEVec4f(float scalar) : vec128(_mm_load1_ps(&scalar)) {};
// copy and assignment
inline CMP_SSEVec4f(const CMP_SSEVec4f& init) : vec128(init.vec128) {};
inline const CMP_SSEVec4f& operator=(const CMP_SSEVec4f& lhs) { vec128 = lhs.vec128; return *this; };
// conversion to m128 type for direct use in _mm intrinsics
inline operator __m128() { return vec128; };
inline operator const __m128() const { return vec128; };
// indexing
#ifdef _LINUX
inline const float& operator[](int i) const { return f32[i]; };
inline float& operator[](int i) { return f32[i]; };
#else
inline const float& operator[](int i) const { return vec128.m128_f32[i]; };
inline float& operator[](int i) { return vec128.m128_f32[i]; };
#endif
// addition
inline CMP_SSEVec4f operator+(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_add_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f& operator+=(const CMP_SSEVec4f& rhs) { vec128 = _mm_add_ps(vec128, rhs.vec128); return *this; };
// multiplication
inline CMP_SSEVec4f operator*(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_mul_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f& operator*=(const CMP_SSEVec4f& rhs) { vec128 = _mm_mul_ps(vec128, rhs.vec128); return *this; };
// scalar multiplication
//inline CMP_SSEVec4f operator*( float rhs ) const { return CMP_SSEVec4f( _mm_mul_ps(vec128, _mm_load1_ps(&rhs)) ); };
//inline CMP_SSEVec4f& operator*=( float rhs ) { vec128 = _mm_mul_ps(vec128, _mm_load1_ps(&rhs)); return *this; };
// subtraction
inline CMP_SSEVec4f operator-(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_sub_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f& operator-= (const CMP_SSEVec4f& rhs) { vec128 = _mm_sub_ps(vec128, rhs.vec128); return *this; };
// division
inline CMP_SSEVec4f operator/(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_div_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f& operator/= (const CMP_SSEVec4f& rhs) { vec128 = _mm_div_ps(vec128, rhs.vec128); return *this; };
// scalar division
inline CMP_SSEVec4f operator/(float rhs) const { return CMP_SSEVec4f(_mm_div_ps(vec128, _mm_load1_ps(&rhs))); };
inline CMP_SSEVec4f& operator/=(float rhs) { vec128 = _mm_div_ps(vec128, _mm_load1_ps(&rhs)); return *this; };
// comparison
// these return 0 or 0xffffffff in each component
inline CMP_SSEVec4f operator< (const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmplt_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f operator> (const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpgt_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f operator<=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmple_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f operator>=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpge_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f operator==(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpeq_ps(vec128, rhs.vec128)); };
// bitwise operators
inline CMP_SSEVec4f operator|(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_or_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f operator&(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_and_ps(vec128, rhs.vec128)); };
inline CMP_SSEVec4f operator^(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_xor_ps(vec128, rhs.vec128)); };
inline const CMP_SSEVec4f& operator|=(const CMP_SSEVec4f& rhs) { vec128 = _mm_or_ps(vec128, rhs.vec128); return *this; };
inline const CMP_SSEVec4f& operator&=(const CMP_SSEVec4f& rhs) { vec128 = _mm_and_ps(vec128, rhs.vec128); return *this; };
// for some horrible reason,there's no bitwise not instruction for SSE,
// so we have to do xor with 0xfffffff in order to fake it.
// TO get a 0xffffffff, we execute 0=0
inline CMP_SSEVec4f operator~() const
{
__m128 zero = _mm_setzero_ps();
__m128 is_true = _mm_cmpeq_ps(zero, zero);
return _mm_xor_ps(is_true, vec128);
};
};
typedef Vec4<float> CMP_Vec4f;
typedef Vec4<double> CMP_Vec4d;
typedef Vec4<int> CMP_Vec4i;
typedef Vec4<unsigned int> CMP_Vec4ui; // unsigned 16 bit x,y,x,w
typedef Vec4<unsigned char> CMP_Vec4uc; // unsigned 8 bit x,y,x,w
typedef Vec4<unsigned char> CGU_Vec4uc; // unsigned 8 bit x,y,x,w
typedef Vec4<unsigned char> CGV_Vec4uc; // unsigned 8 bit x,y,x,w
#endif // not ASPM_GPU
#endif // Header Guard

@ -0,0 +1,228 @@
#ifndef BLOCKCONSTANTS_H
#define BLOCKCONSTANTS_H
#include <string>
#include <unordered_map>
struct Block { const unsigned char* data; const unsigned char* color; };
static const unsigned char BC1_Red_Ignore_Alpha [] {0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_White_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Black_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Red_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Red_Green_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Green_Blue_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Red_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Green_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_White_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Green_Ignore_Alpha [] {0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_Black_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Red_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Red_Green_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Green_Blue_Full_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Blue_Ignore_Alpha [] {0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_White_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_Black_Ignore_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_Red_Blue_Ignore_Alpha [] {0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_Red_Green_Ignore_Alpha [] {0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_Green_Blue_Ignore_Alpha [] {0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC1_Red_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC1_Green_Half_Alpha [] {0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
static const unsigned char BC2_Red_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_White_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Black_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Red_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Red_Green_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Green_Blue_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Red_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Green_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_White_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Green_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Black_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Red_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Red_Green_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Green_Blue_Full_Alpha [] {0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_White_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Black_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Red_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Red_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC2_Green_Half_Alpha [] {0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_White_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Black_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Green_Blue_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_White_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Black_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Green_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Green_Blue_Full_Alpha [] {0xff, 0x0 , 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0x0 , 0x1f, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_White_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0xff, 0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Black_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x1f, 0xf8, 0x1f, 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Green_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0xff, 0xe0, 0xff, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Green_Blue_Ignore_Alpha [] {0xff, 0xff, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xff, 0x7 , 0xff, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Red_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xf8, 0x0 , 0xf8, 0x0 , 0x0 , 0x0 , 0x0 };
static const unsigned char BC3_Green_Half_Alpha [] {0x7b, 0x7b, 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0xe0, 0x7 , 0xe0, 0x7 , 0x0 , 0x0 , 0x0 , 0x0 };
Block BC1_Red_Ignore_Alpha_Block = {BC1_Red_Ignore_Alpha, nullptr};
Block BC1_Blue_Half_Alpha_Block = {BC1_Blue_Half_Alpha, nullptr};
Block BC1_White_Half_Alpha_Block = {BC1_White_Half_Alpha, nullptr};
Block BC1_Black_Half_Alpha_Block = {BC1_Black_Half_Alpha, nullptr};
Block BC1_Red_Blue_Half_Alpha_Block = {BC1_Red_Blue_Half_Alpha, nullptr};
Block BC1_Red_Green_Half_Alpha_Block = {BC1_Red_Green_Half_Alpha, nullptr};
Block BC1_Green_Blue_Half_Alpha_Block = {BC1_Green_Blue_Half_Alpha, nullptr};
Block BC1_Red_Full_Alpha_Block = {BC1_Red_Full_Alpha, nullptr};
Block BC1_Green_Full_Alpha_Block = {BC1_Green_Full_Alpha, nullptr};
Block BC1_Blue_Full_Alpha_Block = {BC1_Blue_Full_Alpha, nullptr};
Block BC1_White_Full_Alpha_Block = {BC1_White_Full_Alpha, nullptr};
Block BC1_Green_Ignore_Alpha_Block = {BC1_Green_Ignore_Alpha, nullptr};
Block BC1_Black_Full_Alpha_Block = {BC1_Black_Full_Alpha, nullptr};
Block BC1_Red_Blue_Full_Alpha_Block = {BC1_Red_Blue_Full_Alpha, nullptr};
Block BC1_Red_Green_Full_Alpha_Block = {BC1_Red_Green_Full_Alpha, nullptr};
Block BC1_Green_Blue_Full_Alpha_Block = {BC1_Green_Blue_Full_Alpha, nullptr};
Block BC1_Blue_Ignore_Alpha_Block = {BC1_Blue_Ignore_Alpha, nullptr};
Block BC1_White_Ignore_Alpha_Block = {BC1_White_Ignore_Alpha, nullptr};
Block BC1_Black_Ignore_Alpha_Block = {BC1_Black_Ignore_Alpha, nullptr};
Block BC1_Red_Blue_Ignore_Alpha_Block = {BC1_Red_Blue_Ignore_Alpha, nullptr};
Block BC1_Red_Green_Ignore_Alpha_Block = {BC1_Red_Green_Ignore_Alpha, nullptr};
Block BC1_Green_Blue_Ignore_Alpha_Block = {BC1_Green_Blue_Ignore_Alpha, nullptr};
Block BC1_Red_Half_Alpha_Block = {BC1_Red_Half_Alpha, nullptr};
Block BC1_Green_Half_Alpha_Block = {BC1_Green_Half_Alpha, nullptr};
Block BC2_Red_Ignore_Alpha_Block = {BC2_Red_Ignore_Alpha, nullptr};
Block BC2_Blue_Half_Alpha_Block = {BC2_Blue_Half_Alpha, nullptr};
Block BC2_White_Half_Alpha_Block = {BC2_White_Half_Alpha, nullptr};
Block BC2_Black_Half_Alpha_Block = {BC2_Black_Half_Alpha, nullptr};
Block BC2_Red_Blue_Half_Alpha_Block = {BC2_Red_Blue_Half_Alpha, nullptr};
Block BC2_Red_Green_Half_Alpha_Block = {BC2_Red_Green_Half_Alpha, nullptr};
Block BC2_Green_Blue_Half_Alpha_Block = {BC2_Green_Blue_Half_Alpha, nullptr};
Block BC2_Red_Full_Alpha_Block = {BC2_Red_Full_Alpha, nullptr};
Block BC2_Green_Full_Alpha_Block = {BC2_Green_Full_Alpha, nullptr};
Block BC2_Blue_Full_Alpha_Block = {BC2_Blue_Full_Alpha, nullptr};
Block BC2_White_Full_Alpha_Block = {BC2_White_Full_Alpha, nullptr};
Block BC2_Green_Ignore_Alpha_Block = {BC2_Green_Ignore_Alpha, nullptr};
Block BC2_Black_Full_Alpha_Block = {BC2_Black_Full_Alpha, nullptr};
Block BC2_Red_Blue_Full_Alpha_Block = {BC2_Red_Blue_Full_Alpha, nullptr};
Block BC2_Red_Green_Full_Alpha_Block = {BC2_Red_Green_Full_Alpha, nullptr};
Block BC2_Green_Blue_Full_Alpha_Block = {BC2_Green_Blue_Full_Alpha, nullptr};
Block BC2_Blue_Ignore_Alpha_Block = {BC2_Blue_Ignore_Alpha, nullptr};
Block BC2_White_Ignore_Alpha_Block = {BC2_White_Ignore_Alpha, nullptr};
Block BC2_Black_Ignore_Alpha_Block = {BC2_Black_Ignore_Alpha, nullptr};
Block BC2_Red_Blue_Ignore_Alpha_Block = {BC2_Red_Blue_Ignore_Alpha, nullptr};
Block BC2_Red_Green_Ignore_Alpha_Block = {BC2_Red_Green_Ignore_Alpha, nullptr};
Block BC2_Green_Blue_Ignore_Alpha_Block = {BC2_Green_Blue_Ignore_Alpha, nullptr};
Block BC2_Red_Half_Alpha_Block = {BC2_Red_Half_Alpha, nullptr};
Block BC2_Green_Half_Alpha_Block = {BC2_Green_Half_Alpha, nullptr};
Block BC3_Red_Ignore_Alpha_Block = {BC3_Red_Ignore_Alpha, nullptr};
Block BC3_Blue_Half_Alpha_Block = {BC3_Blue_Half_Alpha, nullptr};
Block BC3_White_Half_Alpha_Block = {BC3_White_Half_Alpha, nullptr};
Block BC3_Black_Half_Alpha_Block = {BC3_Black_Half_Alpha, nullptr};
Block BC3_Red_Blue_Half_Alpha_Block = {BC3_Red_Blue_Half_Alpha, nullptr};
Block BC3_Red_Green_Half_Alpha_Block = {BC3_Red_Green_Half_Alpha, nullptr};
Block BC3_Green_Blue_Half_Alpha_Block = {BC3_Green_Blue_Half_Alpha, nullptr};
Block BC3_Red_Full_Alpha_Block = {BC3_Red_Full_Alpha, nullptr};
Block BC3_Green_Full_Alpha_Block = {BC3_Green_Full_Alpha, nullptr};
Block BC3_Blue_Full_Alpha_Block = {BC3_Blue_Full_Alpha, nullptr};
Block BC3_White_Full_Alpha_Block = {BC3_White_Full_Alpha, nullptr};
Block BC3_Green_Ignore_Alpha_Block = {BC3_Green_Ignore_Alpha, nullptr};
Block BC3_Black_Full_Alpha_Block = {BC3_Black_Full_Alpha, nullptr};
Block BC3_Red_Blue_Full_Alpha_Block = {BC3_Red_Blue_Full_Alpha, nullptr};
Block BC3_Red_Green_Full_Alpha_Block = {BC3_Red_Green_Full_Alpha, nullptr};
Block BC3_Green_Blue_Full_Alpha_Block = {BC3_Green_Blue_Full_Alpha, nullptr};
Block BC3_Blue_Ignore_Alpha_Block = {BC3_Blue_Ignore_Alpha, nullptr};
Block BC3_White_Ignore_Alpha_Block = {BC3_White_Ignore_Alpha, nullptr};
Block BC3_Black_Ignore_Alpha_Block = {BC3_Black_Ignore_Alpha, nullptr};
Block BC3_Red_Blue_Ignore_Alpha_Block = {BC3_Red_Blue_Ignore_Alpha, nullptr};
Block BC3_Red_Green_Ignore_Alpha_Block = {BC3_Red_Green_Ignore_Alpha, nullptr};
Block BC3_Green_Blue_Ignore_Alpha_Block = {BC3_Green_Blue_Ignore_Alpha, nullptr};
Block BC3_Red_Half_Alpha_Block = {BC3_Red_Half_Alpha, nullptr};
Block BC3_Green_Half_Alpha_Block = {BC3_Green_Half_Alpha, nullptr};
static std::unordered_map<std::string, Block> blocks {
{ "BC1_Red_Ignore_Alpha", BC1_Red_Ignore_Alpha_Block},
{ "BC1_Blue_Half_Alpha", BC1_Blue_Half_Alpha_Block},
{ "BC1_White_Half_Alpha", BC1_White_Half_Alpha_Block},
{ "BC1_Black_Half_Alpha", BC1_Black_Half_Alpha_Block},
{ "BC1_Red_Blue_Half_Alpha", BC1_Red_Blue_Half_Alpha_Block},
{ "BC1_Red_Green_Half_Alpha", BC1_Red_Green_Half_Alpha_Block},
{ "BC1_Green_Blue_Half_Alpha", BC1_Green_Blue_Half_Alpha_Block},
{ "BC1_Red_Full_Alpha", BC1_Red_Full_Alpha_Block},
{ "BC1_Green_Full_Alpha", BC1_Green_Full_Alpha_Block},
{ "BC1_Blue_Full_Alpha", BC1_Blue_Full_Alpha_Block},
{ "BC1_White_Full_Alpha", BC1_White_Full_Alpha_Block},
{ "BC1_Green_Ignore_Alpha", BC1_Green_Ignore_Alpha_Block},
{ "BC1_Black_Full_Alpha", BC1_Black_Full_Alpha_Block},
{ "BC1_Red_Blue_Full_Alpha", BC1_Red_Blue_Full_Alpha_Block},
{ "BC1_Red_Green_Full_Alpha", BC1_Red_Green_Full_Alpha_Block},
{ "BC1_Green_Blue_Full_Alpha", BC1_Green_Blue_Full_Alpha_Block},
{ "BC1_Blue_Ignore_Alpha", BC1_Blue_Ignore_Alpha_Block},
{ "BC1_White_Ignore_Alpha", BC1_White_Ignore_Alpha_Block},
{ "BC1_Black_Ignore_Alpha", BC1_Black_Ignore_Alpha_Block},
{ "BC1_Red_Blue_Ignore_Alpha", BC1_Red_Blue_Ignore_Alpha_Block},
{ "BC1_Red_Green_Ignore_Alpha", BC1_Red_Green_Ignore_Alpha_Block},
{ "BC1_Green_Blue_Ignore_Alpha", BC1_Green_Blue_Ignore_Alpha_Block},
{ "BC1_Red_Half_Alpha", BC1_Red_Half_Alpha_Block},
{ "BC1_Green_Half_Alpha", BC1_Green_Half_Alpha_Block},
{ "BC2_Red_Ignore_Alpha", BC2_Red_Ignore_Alpha_Block},
{ "BC2_Blue_Half_Alpha", BC2_Blue_Half_Alpha_Block},
{ "BC2_White_Half_Alpha", BC2_White_Half_Alpha_Block},
{ "BC2_Black_Half_Alpha", BC2_Black_Half_Alpha_Block},
{ "BC2_Red_Blue_Half_Alpha", BC2_Red_Blue_Half_Alpha_Block},
{ "BC2_Red_Green_Half_Alpha", BC2_Red_Green_Half_Alpha_Block},
{ "BC2_Green_Blue_Half_Alpha", BC2_Green_Blue_Half_Alpha_Block},
{ "BC2_Red_Full_Alpha", BC2_Red_Full_Alpha_Block},
{ "BC2_Green_Full_Alpha", BC2_Green_Full_Alpha_Block},
{ "BC2_Blue_Full_Alpha", BC2_Blue_Full_Alpha_Block},
{ "BC2_White_Full_Alpha", BC2_White_Full_Alpha_Block},
{ "BC2_Green_Ignore_Alpha", BC2_Green_Ignore_Alpha_Block},
{ "BC2_Black_Full_Alpha", BC2_Black_Full_Alpha_Block},
{ "BC2_Red_Blue_Full_Alpha", BC2_Red_Blue_Full_Alpha_Block},
{ "BC2_Red_Green_Full_Alpha", BC2_Red_Green_Full_Alpha_Block},
{ "BC2_Green_Blue_Full_Alpha", BC2_Green_Blue_Full_Alpha_Block},
{ "BC2_Blue_Ignore_Alpha", BC2_Blue_Ignore_Alpha_Block},
{ "BC2_White_Ignore_Alpha", BC2_White_Ignore_Alpha_Block},
{ "BC2_Black_Ignore_Alpha", BC2_Black_Ignore_Alpha_Block},
{ "BC2_Red_Blue_Ignore_Alpha", BC2_Red_Blue_Ignore_Alpha_Block},
{ "BC2_Red_Green_Ignore_Alpha", BC2_Red_Green_Ignore_Alpha_Block},
{ "BC2_Green_Blue_Ignore_Alpha", BC2_Green_Blue_Ignore_Alpha_Block},
{ "BC2_Red_Half_Alpha", BC2_Red_Half_Alpha_Block},
{ "BC2_Green_Half_Alpha", BC2_Green_Half_Alpha_Block},
{ "BC3_Red_Ignore_Alpha", BC3_Red_Ignore_Alpha_Block},
{ "BC3_Blue_Half_Alpha", BC3_Blue_Half_Alpha_Block},
{ "BC3_White_Half_Alpha", BC3_White_Half_Alpha_Block},
{ "BC3_Black_Half_Alpha", BC3_Black_Half_Alpha_Block},
{ "BC3_Red_Blue_Half_Alpha", BC3_Red_Blue_Half_Alpha_Block},
{ "BC3_Red_Green_Half_Alpha", BC3_Red_Green_Half_Alpha_Block},
{ "BC3_Green_Blue_Half_Alpha", BC3_Green_Blue_Half_Alpha_Block},
{ "BC3_Red_Full_Alpha", BC3_Red_Full_Alpha_Block},
{ "BC3_Green_Full_Alpha", BC3_Green_Full_Alpha_Block},
{ "BC3_Blue_Full_Alpha", BC3_Blue_Full_Alpha_Block},
{ "BC3_White_Full_Alpha", BC3_White_Full_Alpha_Block},
{ "BC3_Green_Ignore_Alpha", BC3_Green_Ignore_Alpha_Block},
{ "BC3_Black_Full_Alpha", BC3_Black_Full_Alpha_Block},
{ "BC3_Red_Blue_Full_Alpha", BC3_Red_Blue_Full_Alpha_Block},
{ "BC3_Red_Green_Full_Alpha", BC3_Red_Green_Full_Alpha_Block},
{ "BC3_Green_Blue_Full_Alpha", BC3_Green_Blue_Full_Alpha_Block},
{ "BC3_Blue_Ignore_Alpha", BC3_Blue_Ignore_Alpha_Block},
{ "BC3_White_Ignore_Alpha", BC3_White_Ignore_Alpha_Block},
{ "BC3_Black_Ignore_Alpha", BC3_Black_Ignore_Alpha_Block},
{ "BC3_Red_Blue_Ignore_Alpha", BC3_Red_Blue_Ignore_Alpha_Block},
{ "BC3_Red_Green_Ignore_Alpha", BC3_Red_Green_Ignore_Alpha_Block},
{ "BC3_Green_Blue_Ignore_Alpha", BC3_Green_Blue_Ignore_Alpha_Block},
{ "BC3_Red_Half_Alpha", BC3_Red_Half_Alpha_Block},
{ "BC3_Green_Half_Alpha", BC3_Green_Half_Alpha_Block}
};
#endif

@ -0,0 +1,13 @@
cmake_minimum_required(VERSION 3.5)
project(CMP_Core_Tests)
add_executable(Tests TestsMain.cpp)
add_subdirectory(../../../Common/Lib/Ext/Catch2
Common/Lib/Ext/Catch2/bin)
target_sources(Tests
PRIVATE
CompressonatorTests.cpp
CompressonatorTests.h
BlockConstants.h
)
target_link_libraries(Tests Catch2::Catch2 CMP_Core)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,6 @@
#ifndef COMPRESSONATOR_TESTS_H
#define COMPRESSONATOR_TESTS_H
void AssignExpectedColorsToBlocks();
#endif

@ -0,0 +1,10 @@
#define CATCH_CONFIG_RUNNER
#include "../../../Common/Lib/Ext/Catch2/catch.hpp"
#include "CompressonatorTests.h"
int main(int argc, char* argv[]) {
AssignExpectedColorsToBlocks();
int result = Catch::Session().run(argc, argv);
return result;
}

@ -9,5 +9,9 @@ ADD_SUBDIRECTORY(EtcLib)
ADD_SUBDIRECTORY(rg_etc1_v104)
#ADD_SUBDIRECTORY(etcpack)
ADD_SUBDIRECTORY(butteraugli)
#ADD_SUBDIRECTORY(butteraugli)
ADD_SUBDIRECTORY(libsquish-1.15)
ADD_SUBDIRECTORY(CMP_Core)

@ -0,0 +1,117 @@
# cmake build file for squish
# by Stefan Roettger (snroettg@gmail.com)
# updated by Simon Brown (si@sjbrown.co.uk)
# features:
# uses -fopenmp when available
# use BUILD_SQUISH_WITH_OPENMP to override
# Xcode: builds universal binaries, uses SSE2 on i386 and Altivec on ppc
# Unix and VS: SSE2 support is enabled by default
# use BUILD_SQUISH_WITH_SSE2 and BUILD_SQUISH_WITH_ALTIVEC to override
PROJECT(squish)
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.3)
OPTION(BUILD_SQUISH_WITH_OPENMP "Build with OpenMP." ON)
OPTION(BUILD_SQUISH_WITH_SSE2 "Build with SSE2." ON)
OPTION(BUILD_SQUISH_WITH_ALTIVEC "Build with Altivec." OFF)
OPTION(BUILD_SHARED_LIBS "Build shared libraries." OFF)
OPTION(BUILD_SQUISH_EXTRA "Build extra source code." OFF)
IF (BUILD_SQUISH_WITH_OPENMP)
FIND_PACKAGE(OpenMP)
IF (OPENMP_FOUND)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
ADD_DEFINITIONS(-DSQUISH_USE_OPENMP)
ENDIF()
ENDIF()
IF (CMAKE_GENERATOR STREQUAL "Xcode")
SET(CMAKE_OSX_ARCHITECTURES "i386;ppc")
ELSE (CMAKE_GENERATOR STREQUAL "Xcode")
IF (BUILD_SQUISH_WITH_SSE2 AND NOT WIN32)
ADD_DEFINITIONS(-DSQUISH_USE_SSE=2 -msse2)
ENDIF (BUILD_SQUISH_WITH_SSE2 AND NOT WIN32)
IF (BUILD_SQUISH_WITH_ALTIVEC AND NOT WIN32)
ADD_DEFINITIONS(-DSQUISH_USE_ALTIVEC=1 -maltivec)
ENDIF (BUILD_SQUISH_WITH_ALTIVEC AND NOT WIN32)
ENDIF (CMAKE_GENERATOR STREQUAL "Xcode")
SET(SQUISH_HDRS
squish.h
)
SET(SQUISH_SRCS
alpha.cpp
alpha.h
clusterfit.cpp
clusterfit.h
colourblock.cpp
colourblock.h
colourfit.cpp
colourfit.h
colourset.cpp
colourset.h
maths.cpp
maths.h
rangefit.cpp
rangefit.h
simd.h
simd_float.h
simd_sse.h
simd_ve.h
singlecolourfit.cpp
singlecolourfit.h
singlecolourlookup.inl
squish.cpp
)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_LIBRARY(squish ${SQUISH_SRCS} ${SQUISH_HDRS})
SET_TARGET_PROPERTIES(
squish PROPERTIES
PUBLIC_HEADER "${SQUISH_HDRS}"
VERSION 0.0
SOVERSION 0.0
DEBUG_POSTFIX "d"
XCODE_ATTRIBUTE_GCC_PREPROCESSOR_DEFINITIONS "$(SQUISH_CPP_$(CURRENT_ARCH))"
XCODE_ATTRIBUTE_OTHER_CFLAGS "$(SQUISH_CFLAGS_$(CURRENT_ARCH))"
XCODE_ATTRIBUTE_SQUISH_CPP_i386 "SQUISH_USE_SSE=2"
XCODE_ATTRIBUTE_SQUISH_CFLAGS_i386 ""
XCODE_ATTRIBUTE_SQUISH_CPP_ppc "SQUISH_USE_ALTIVEC=1"
XCODE_ATTRIBUTE_SQUISH_CFLAGS_ppc "-maltivec"
)
IF (BUILD_SQUISH_EXTRA)
SET(SQUISHTEST_SRCS extra/squishtest.cpp)
ADD_EXECUTABLE(squishtest ${SQUISHTEST_SRCS})
SET_TARGET_PROPERTIES(squishtest PROPERTIES DEBUG_POSTFIX "d")
TARGET_LINK_LIBRARIES(squishtest squish)
SET(SQUISHPNG_SRCS extra/squishpng.cpp)
FIND_PACKAGE(PNG)
IF (PNG_FOUND)
SET(CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES)
INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR})
ADD_EXECUTABLE(squishpng ${SQUISHPNG_SRCS})
SET_TARGET_PROPERTIES(squishpng PROPERTIES DEBUG_POSTFIX "d")
TARGET_LINK_LIBRARIES(squishpng squish ${PNG_LIBRARIES})
ENDIF (PNG_FOUND)
ENDIF (BUILD_SQUISH_EXTRA)
INSTALL(
TARGETS squish
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib
PUBLIC_HEADER DESTINATION include
)

@ -0,0 +1,14 @@
# Defines
# LIBSQUISH_FOUND
# LIBSQUISH_INCLUDE_DIR
# LIBSQUISH_LIBRARIES
FIND_PATH(LIBSQUISH_INCLUDE_DIR squish.h PATHS . squish .. ../squish DOC "Directory containing libSquish headers")
FIND_LIBRARY(LIBSQUISH_LIBRARY NAMES squish libsquish PATHS . squish .. ../squish PATH_SUFFIXES lib lib64 release minsizerel relwithdebinfo DOC "Path to libSquish library")
SET(LIBSQUISH_LIBRARIES ${LIBSQUISH_LIBRARY})
IF (LIBSQUISH_LIBRARY AND LIBSQUISH_INCLUDE_DIR)
SET(LIBSQUISH_FOUND TRUE)
MESSAGE(STATUS "Found libSquish: ${LIBSQUISH_LIBRARY}")
ENDIF (LIBSQUISH_LIBRARY AND LIBSQUISH_INCLUDE_DIR)

@ -0,0 +1,66 @@
1.15
* parallel compression using openmp with cmake (Marian Krivos / Stefan Roettger)
* parallel decompression using openmp with cmake (Stefan Roettger)
1.14
* backport BGRA support
* backport BC4 and BC5 support
* backport BlockMSE support
1.11-1.13
* added support for CMake and QMake (Stefan Roettger)
* misc. minor changes on the build system (Stefan Roettger)
* added svg icon (Stefan Roettger)
1.10
* Iterative cluster fit is now considered to be a new compression mode
* The core cluster fit is now 4x faster using contributions by Ignacio
Castano from NVIDIA
* The single colour lookup table has been halved by exploiting symmetry
1.9
* Added contributed SSE1 truncate implementation
* Changed use of SQUISH_USE_SSE to be 1 for SSE and 2 for SSE2 instructions
* Cluster fit is now iterative to further reduce image error
1.8
* Switched from using floor to trunc for much better SSE performance (again)
* Xcode build now expects libpng in /usr/local for extra/squishpng
1.7
* Fixed floating-point equality issue in clusterfit sort (x86 affected only)
* Implemented proper SSE(2) floor function for 50% speedup on SSE builds
* The range fit implementation now uses the correct colour metric
1.6
* Fixed bug in CompressImage where masked pixels were not skipped over
* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels
* Fixed major DXT1 bug that can generate unexpected transparent pixels
1.5
* Added CompressMasked function to handle incomplete DXT blocks more cleanly
* Added kWeightColourByAlpha flag for better quality images when alpha blending
1.4
* Fixed stack overflow in rangefit
1.3
* Worked around SSE floor implementation bug, proper fix needed!
* This release has visual studio and makefile builds that work
1.2
* Added provably optimal single colour compressor
* Added extra/squishgen.cpp that generates single colour lookup tables
1.1
* Fixed a DXT1 colour output bug
* Changed argument order for Decompress function to match Compress
* Added GetStorageRequirements function
* Added CompressImage function
* Added DecompressImage function
* Moved squishtool.cpp to extra/squishpng.cpp
* Added extra/squishtest.cpp
1.0
* Initial release

@ -0,0 +1,214 @@
# Doxyfile 1.4.6
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = squish
PROJECT_NUMBER = 1.14
OUTPUT_DIRECTORY = docs
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = YES
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 4
ALIASES =
OPTIMIZE_OUTPUT_FOR_C = NO
OPTIMIZE_OUTPUT_JAVA = NO
BUILTIN_STL_SUPPORT = NO
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = NO
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
FILE_VERSION_FILTER =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = squish.h
FILE_PATTERNS =
RECURSIVE = NO
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = NO
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
PDF_HYPERLINKS = NO
USE_PDFLATEX = NO
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
DOTFILE_DIRS =
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = NO

@ -0,0 +1,20 @@
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@ -0,0 +1,65 @@
include config
VER = 1.15
SOVER = 0
SRC = alpha.cpp clusterfit.cpp colourblock.cpp colourfit.cpp colourset.cpp maths.cpp rangefit.cpp singlecolourfit.cpp squish.cpp
HDR = alpha.h clusterfit.h colourblock.h colourfit.h colourset.h maths.h rangefit.h singlecolourfit.h squish.h
HDR += config.h simd.h simd_float.h simd_sse.h simd_ve.h singlecolourlookup.inl
OBJ = $(SRC:%.cpp=%.o)
SOLIB = libsquish.so.$(SOVER)
LIB = $(SOLIB).0
CPPFLAGS += -fPIC
LIBA = libsquish.a
.PHONY: all install uninstall docs tgz clean
all: $(LIB) $(LIBA) docs libsquish.pc
install: $(LIB) $(LIBA) libsquish.pc
$(INSTALL_DIRECTORY) $(INSTALL_DIR)/include $(INSTALL_DIR)/$(LIB_PATH)
$(INSTALL_FILE) squish.h $(INSTALL_DIR)/include
$(INSTALL_FILE) $(LIBA) $(INSTALL_DIR)/$(LIB_PATH)
ifneq ($(USE_SHARED),0)
$(INSTALL_FILE) $(LIB) $(INSTALL_DIR)/$(LIB_PATH)
ln -s $(LIB) $(INSTALL_DIR)/$(LIB_PATH)/$(SOLIB)
ln -s $(LIB) $(INSTALL_DIR)/$(LIB_PATH)/libsquish.so
$(INSTALL_DIRECTORY) $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig
$(INSTALL_FILE) libsquish.pc $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig
endif
uninstall:
$(RM) $(INSTALL_DIR)/include/squish.h
$(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(LIBA)
-$(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(LIB)
-$(RM) $(INSTALL_DIR)/$(LIB_PATH)/$(SOLIB)
-$(RM) $(INSTALL_DIR)/$(LIB_PATH)/libsquish.so
-$(RM) $(INSTALL_DIR)/$(LIB_PATH)/pkgconfig/libsquish.pc
$(LIB): $(OBJ)
ifneq ($(USE_SHARED),0)
$(CXX) $(LDFLAGS) -shared -Wl,-soname,$(SOLIB) -o $@ $(OBJ)
endif
$(LIBA): $(OBJ)
$(AR) cr $@ $?
@ranlib $@
docs: $(SRC) $(HDR)
@if [ -x "`command -v doxygen`" ]; then doxygen; fi
libsquish.pc: libsquish.pc.in
@sed 's|@PREFIX@|$(PREFIX)|;s|@LIB_PATH@|$(LIB_PATH)|' $@.in > $@
tgz: clean
tar zcf libsquish-$(VER).tgz $(SRC) $(HDR) Makefile config CMakeLists.txt CMakeModules libSquish.* README.txt LICENSE.txt ChangeLog.txt Doxyfile libsquish.pc.in extra --exclude \*.svn\*
%.o: %.cpp
$(CXX) $(CPPFLAGS) -I. $(CXXFLAGS) -o $@ -c $<
clean:
$(RM) $(OBJ) $(LIB) $(LIBA) libsquish.pc
@-$(RM) -rf docs

@ -0,0 +1,18 @@
LICENSE
-------
The squish library is distributed under the terms and conditions of the MIT
license. This license is specified at the top of each source file and must be
preserved in its entirety.
BUILDING AND INSTALLING THE LIBRARY
-----------------------------------
The preferred way to install the library on Unix/Mac (and Windows) is via cmake:
cmake . && make && sudo make install
REPORTING BUGS OR FEATURE REQUESTS
----------------------------------
Feedback can be sent to Simon Brown (the developer) at si@sjbrown.co.uk
Feedback can also be sent to Stefan Roettger (the maintainer) at snroettg@gmail.com

@ -0,0 +1,350 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "alpha.h"
#include <climits>
#include <algorithm>
namespace squish {
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
void CompressAlphaDxt3( u8 const* rgba, int mask, void* block )
{
u8* bytes = reinterpret_cast< u8* >( block );
// quantise and pack the alpha values pairwise
for( int i = 0; i < 8; ++i )
{
// quantise down to 4 bits
float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
int quant1 = FloatToInt( alpha1, 15 );
int quant2 = FloatToInt( alpha2, 15 );
// set alpha to zero where masked
int bit1 = 1 << ( 2*i );
int bit2 = 1 << ( 2*i + 1 );
if( ( mask & bit1 ) == 0 )
quant1 = 0;
if( ( mask & bit2 ) == 0 )
quant2 = 0;
// pack into the byte
bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
}
}
void DecompressAlphaDxt3( u8* rgba, void const* block )
{
u8 const* bytes = reinterpret_cast< u8 const* >( block );
// unpack the alpha values pairwise
for( int i = 0; i < 8; ++i )
{
// quantise down to 4 bits
u8 quant = bytes[i];
// unpack the values
u8 lo = quant & 0x0f;
u8 hi = quant & 0xf0;
// convert back up to bytes
rgba[8*i + 3] = lo | ( lo << 4 );
rgba[8*i + 7] = hi | ( hi >> 4 );
}
}
static void FixRange( int& min, int& max, int steps )
{
if( max - min < steps )
max = std::min( min + steps, 255 );
if( max - min < steps )
min = std::max( 0, max - steps );
}
static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
{
// fit each alpha value to the codebook
int err = 0;
for( int i = 0; i < 16; ++i )
{
// check this pixel is valid
int bit = 1 << i;
if( ( mask & bit ) == 0 )
{
// use the first code
indices[i] = 0;
continue;
}
// find the least error and corresponding index
int value = rgba[4*i + 3];
int least = INT_MAX;
int index = 0;
for( int j = 0; j < 8; ++j )
{
// get the squared error from this code
int dist = ( int )value - ( int )codes[j];
dist *= dist;
// compare with the best so far
if( dist < least )
{
least = dist;
index = j;
}
}
// save this index and accumulate the error
indices[i] = ( u8 )index;
err += least;
}
// return the total error
return err;
}
static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
{
u8* bytes = reinterpret_cast< u8* >( block );
// write the first two bytes
bytes[0] = ( u8 )alpha0;
bytes[1] = ( u8 )alpha1;
// pack the indices with 3 bits each
u8* dest = bytes + 2;
u8 const* src = indices;
for( int i = 0; i < 2; ++i )
{
// pack 8 3-bit values
int value = 0;
for( int j = 0; j < 8; ++j )
{
int index = *src++;
value |= ( index << 3*j );
}
// store in 3 bytes
for( int j = 0; j < 3; ++j )
{
int byte = ( value >> 8*j ) & 0xff;
*dest++ = ( u8 )byte;
}
}
}
static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
{
// check the relative values of the endpoints
if( alpha0 > alpha1 )
{
// swap the indices
u8 swapped[16];
for( int i = 0; i < 16; ++i )
{
u8 index = indices[i];
if( index == 0 )
swapped[i] = 1;
else if( index == 1 )
swapped[i] = 0;
else if( index <= 5 )
swapped[i] = 7 - index;
else
swapped[i] = index;
}
// write the block
WriteAlphaBlock( alpha1, alpha0, swapped, block );
}
else
{
// write the block
WriteAlphaBlock( alpha0, alpha1, indices, block );
}
}
static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
{
// check the relative values of the endpoints
if( alpha0 < alpha1 )
{
// swap the indices
u8 swapped[16];
for( int i = 0; i < 16; ++i )
{
u8 index = indices[i];
if( index == 0 )
swapped[i] = 1;
else if( index == 1 )
swapped[i] = 0;
else
swapped[i] = 9 - index;
}
// write the block
WriteAlphaBlock( alpha1, alpha0, swapped, block );
}
else
{
// write the block
WriteAlphaBlock( alpha0, alpha1, indices, block );
}
}
void CompressAlphaDxt5( u8 const* rgba, int mask, void* block )
{
// get the range for 5-alpha and 7-alpha interpolation
int min5 = 255;
int max5 = 0;
int min7 = 255;
int max7 = 0;
for( int i = 0; i < 16; ++i )
{
// check this pixel is valid
int bit = 1 << i;
if( ( mask & bit ) == 0 )
continue;
// incorporate into the min/max
int value = rgba[4*i + 3];
if( value < min7 )
min7 = value;
if( value > max7 )
max7 = value;
if( value != 0 && value < min5 )
min5 = value;
if( value != 255 && value > max5 )
max5 = value;
}
// handle the case that no valid range was found
if( min5 > max5 )
min5 = max5;
if( min7 > max7 )
min7 = max7;
// fix the range to be the minimum in each case
FixRange( min5, max5, 5 );
FixRange( min7, max7, 7 );
// set up the 5-alpha code book
u8 codes5[8];
codes5[0] = ( u8 )min5;
codes5[1] = ( u8 )max5;
for( int i = 1; i < 5; ++i )
codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
codes5[6] = 0;
codes5[7] = 255;
// set up the 7-alpha code book
u8 codes7[8];
codes7[0] = ( u8 )min7;
codes7[1] = ( u8 )max7;
for( int i = 1; i < 7; ++i )
codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
// fit the data to both code books
u8 indices5[16];
u8 indices7[16];
int err5 = FitCodes( rgba, mask, codes5, indices5 );
int err7 = FitCodes( rgba, mask, codes7, indices7 );
// save the block with least error
if( err5 <= err7 )
WriteAlphaBlock5( min5, max5, indices5, block );
else
WriteAlphaBlock7( min7, max7, indices7, block );
}
void DecompressAlphaDxt5( u8* rgba, void const* block )
{
// get the two alpha values
u8 const* bytes = reinterpret_cast< u8 const* >( block );
int alpha0 = bytes[0];
int alpha1 = bytes[1];
// compare the values to build the codebook
u8 codes[8];
codes[0] = ( u8 )alpha0;
codes[1] = ( u8 )alpha1;
if( alpha0 <= alpha1 )
{
// use 5-alpha codebook
for( int i = 1; i < 5; ++i )
codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
codes[6] = 0;
codes[7] = 255;
}
else
{
// use 7-alpha codebook
for( int i = 1; i < 7; ++i )
codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
}
// decode the indices
u8 indices[16];
u8 const* src = bytes + 2;
u8* dest = indices;
for( int i = 0; i < 2; ++i )
{
// grab 3 bytes
int value = 0;
for( int j = 0; j < 3; ++j )
{
int byte = *src++;
value |= ( byte << 8*j );
}
// unpack 8 3-bit values from it
for( int j = 0; j < 8; ++j )
{
int index = ( value >> 3*j ) & 0x7;
*dest++ = ( u8 )index;
}
}
// write out the indexed codebook values
for( int i = 0; i < 16; ++i )
rgba[4*i + 3] = codes[indices[i]];
}
} // namespace squish

@ -0,0 +1,41 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_ALPHA_H
#define SQUISH_ALPHA_H
#include "squish.h"
namespace squish {
void CompressAlphaDxt3( u8 const* rgba, int mask, void* block );
void CompressAlphaDxt5( u8 const* rgba, int mask, void* block );
void DecompressAlphaDxt3( u8* rgba, void const* block );
void DecompressAlphaDxt5( u8* rgba, void const* block );
} // namespace squish
#endif // ndef SQUISH_ALPHA_H

@ -0,0 +1,392 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "clusterfit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
: ColourFit( colours, flags )
{
// set the iteration count
m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
if( metric )
m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
else
m_metric = VEC4_CONST( 1.0f );
// initialise the best error
m_besterror = VEC4_CONST( FLT_MAX );
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
// compute the principle component
m_principle = ComputePrincipleComponent( covariance );
}
bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// build the list of dot products
float dps[16];
u8* order = ( u8* )m_order + 16*iteration;
for( int i = 0; i < count; ++i )
{
dps[i] = Dot( values[i], axis );
order[i] = ( u8 )i;
}
// stable sort using them
for( int i = 0; i < count; ++i )
{
for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
{
std::swap( dps[j], dps[j - 1] );
std::swap( order[j], order[j - 1] );
}
}
// check this ordering is unique
for( int it = 0; it < iteration; ++it )
{
u8 const* prev = ( u8* )m_order + 16*it;
bool same = true;
for( int i = 0; i < count; ++i )
{
if( order[i] != prev[i] )
{
same = false;
break;
}
}
if( same )
return false;
}
// copy the ordering and weight all the points
Vec3 const* unweighted = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
m_xsum_wsum = VEC4_CONST( 0.0f );
for( int i = 0; i < count; ++i )
{
int j = order[i];
Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
Vec4 w( weights[j] );
Vec4 x = p*w;
m_points_weights[i] = x;
m_xsum_wsum += x;
}
return true;
}
void ClusterFit::Compress3( void* block )
{
// declare variables
int const count = m_colours->GetCount();
Vec4 const two = VEC4_CONST( 2.0 );
Vec4 const one = VEC4_CONST( 1.0f );
Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
Vec4 const zero = VEC4_CONST( 0.0f );
Vec4 const half = VEC4_CONST( 0.5f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// prepare an ordering using the principle axis
ConstructOrdering( m_principle, 0 );
// check all possible clusters and iterate on the total order
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = m_besterror;
u8 bestindices[16];
int bestiteration = 0;
int besti = 0, bestj = 0;
// loop over iterations (we avoid the case that all points in first or last cluster)
for( int iterationIndex = 0;; )
{
// first cluster [0,i) is at the start
Vec4 part0 = VEC4_CONST( 0.0f );
for( int i = 0; i < count; ++i )
{
// second cluster [i,j) is half along
Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
int jmin = ( i == 0 ) ? 1 : i;
for( int j = jmin;; )
{
// last cluster [j,count) is at the end
Vec4 part2 = m_xsum_wsum - part1 - part0;
// compute least squares terms directly
Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
Vec4 alpha2_sum = alphax_sum.SplatW();
Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
Vec4 beta2_sum = betax_sum.SplatW();
Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
// compute the least-squares optimal points
Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// apply the metric to the error term
Vec4 e5 = e4*m_metric;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
beststart = a;
bestend = b;
besti = i;
bestj = j;
besterror = error;
bestiteration = iterationIndex;
}
// advance
if( j == count )
break;
part1 += m_points_weights[j];
++j;
}
// advance
part0 += m_points_weights[i];
}
// stop if we didn't improve in this iteration
if( bestiteration != iterationIndex )
break;
// advance if possible
++iterationIndex;
if( iterationIndex == m_iterationCount )
break;
// stop if a new iteration is an ordering that has already been tried
Vec3 axis = ( bestend - beststart ).GetVec3();
if( !ConstructOrdering( axis, iterationIndex ) )
break;
}
// save the block if necessary
if( CompareAnyLessThan( besterror, m_besterror ) )
{
// remap the indices
u8 const* order = ( u8* )m_order + 16*bestiteration;
u8 unordered[16];
for( int m = 0; m < besti; ++m )
unordered[order[m]] = 0;
for( int m = besti; m < bestj; ++m )
unordered[order[m]] = 2;
for( int m = bestj; m < count; ++m )
unordered[order[m]] = 1;
m_colours->RemapIndices( unordered, bestindices );
// save the block
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
// save the error
m_besterror = besterror;
}
}
void ClusterFit::Compress4( void* block )
{
// declare variables
int const count = m_colours->GetCount();
Vec4 const two = VEC4_CONST( 2.0f );
Vec4 const one = VEC4_CONST( 1.0f );
Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
Vec4 const zero = VEC4_CONST( 0.0f );
Vec4 const half = VEC4_CONST( 0.5f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// prepare an ordering using the principle axis
ConstructOrdering( m_principle, 0 );
// check all possible clusters and iterate on the total order
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = m_besterror;
u8 bestindices[16];
int bestiteration = 0;
int besti = 0, bestj = 0, bestk = 0;
// loop over iterations (we avoid the case that all points in first or last cluster)
for( int iterationIndex = 0;; )
{
// first cluster [0,i) is at the start
Vec4 part0 = VEC4_CONST( 0.0f );
for( int i = 0; i < count; ++i )
{
// second cluster [i,j) is one third along
Vec4 part1 = VEC4_CONST( 0.0f );
for( int j = i;; )
{
// third cluster [j,k) is two thirds along
Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
int kmin = ( j == 0 ) ? 1 : j;
for( int k = kmin;; )
{
// last cluster [k,count) is at the end
Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
// compute least squares terms directly
Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
Vec4 const alpha2_sum = alphax_sum.SplatW();
Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
Vec4 const beta2_sum = betax_sum.SplatW();
Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
// compute the least-squares optimal points
Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// apply the metric to the error term
Vec4 e5 = e4*m_metric;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
beststart = a;
bestend = b;
besterror = error;
besti = i;
bestj = j;
bestk = k;
bestiteration = iterationIndex;
}
// advance
if( k == count )
break;
part2 += m_points_weights[k];
++k;
}
// advance
if( j == count )
break;
part1 += m_points_weights[j];
++j;
}
// advance
part0 += m_points_weights[i];
}
// stop if we didn't improve in this iteration
if( bestiteration != iterationIndex )
break;
// advance if possible
++iterationIndex;
if( iterationIndex == m_iterationCount )
break;
// stop if a new iteration is an ordering that has already been tried
Vec3 axis = ( bestend - beststart ).GetVec3();
if( !ConstructOrdering( axis, iterationIndex ) )
break;
}
// save the block if necessary
if( CompareAnyLessThan( besterror, m_besterror ) )
{
// remap the indices
u8 const* order = ( u8* )m_order + 16*bestiteration;
u8 unordered[16];
for( int m = 0; m < besti; ++m )
unordered[order[m]] = 0;
for( int m = besti; m < bestj; ++m )
unordered[order[m]] = 2;
for( int m = bestj; m < bestk; ++m )
unordered[order[m]] = 3;
for( int m = bestk; m < count; ++m )
unordered[order[m]] = 1;
m_colours->RemapIndices( unordered, bestindices );
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
// save the error
m_besterror = besterror;
}
}
} // namespace squish

@ -0,0 +1,61 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_CLUSTERFIT_H
#define SQUISH_CLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace squish {
class ClusterFit : public ColourFit
{
public:
ClusterFit( ColourSet const* colours, int flags, float* metric );
private:
bool ConstructOrdering( Vec3 const& axis, int iteration );
virtual void Compress3( void* block );
virtual void Compress4( void* block );
enum { kMaxIterations = 8 };
int m_iterationCount;
Vec3 m_principle;
u8 m_order[16*kMaxIterations];
Vec4 m_points_weights[16];
Vec4 m_xsum_wsum;
Vec4 m_metric;
Vec4 m_besterror;
};
} // namespace squish
#endif // ndef SQUISH_CLUSTERFIT_H

@ -0,0 +1,214 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourblock.h"
namespace squish {
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
static int FloatTo565( Vec3::Arg colour )
{
// get the components in the correct range
int r = FloatToInt( 31.0f*colour.X(), 31 );
int g = FloatToInt( 63.0f*colour.Y(), 63 );
int b = FloatToInt( 31.0f*colour.Z(), 31 );
// pack into a single value
return ( r << 11 ) | ( g << 5 ) | b;
}
static void WriteColourBlock( int a, int b, u8* indices, void* block )
{
// get the block as bytes
u8* bytes = ( u8* )block;
// write the endpoints
bytes[0] = ( u8 )( a & 0xff );
bytes[1] = ( u8 )( a >> 8 );
bytes[2] = ( u8 )( b & 0xff );
bytes[3] = ( u8 )( b >> 8 );
// write the indices
for( int i = 0; i < 4; ++i )
{
u8 const* ind = indices + 4*i;
bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
}
}
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
u8 remapped[16];
if( a <= b )
{
// use the indices directly
for( int i = 0; i < 16; ++i )
remapped[i] = indices[i];
}
else
{
// swap a and b
std::swap( a, b );
for( int i = 0; i < 16; ++i )
{
if( indices[i] == 0 )
remapped[i] = 1;
else if( indices[i] == 1 )
remapped[i] = 0;
else
remapped[i] = indices[i];
}
}
// write the block
WriteColourBlock( a, b, remapped, block );
}
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
u8 remapped[16];
if( a < b )
{
// swap a and b
std::swap( a, b );
for( int i = 0; i < 16; ++i )
remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
}
else if( a == b )
{
// use index 0
for( int i = 0; i < 16; ++i )
remapped[i] = 0;
}
else
{
// use the indices directly
for( int i = 0; i < 16; ++i )
remapped[i] = indices[i];
}
// write the block
WriteColourBlock( a, b, remapped, block );
}
static int Unpack565( u8 const* packed, u8* colour )
{
// build the packed value
int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
// get the components in the stored range
u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
u8 blue = ( u8 )( value & 0x1f );
// scale up to 8 bits
colour[0] = ( red << 3 ) | ( red >> 2 );
colour[1] = ( green << 2 ) | ( green >> 4 );
colour[2] = ( blue << 3 ) | ( blue >> 2 );
colour[3] = 255;
// return the value
return value;
}
void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
{
// get the block bytes
u8 const* bytes = reinterpret_cast< u8 const* >( block );
// unpack the endpoints
u8 codes[16];
int a = Unpack565( bytes, codes );
int b = Unpack565( bytes + 2, codes + 4 );
// generate the midpoints
for( int i = 0; i < 3; ++i )
{
int c = codes[i];
int d = codes[4 + i];
if( isDxt1 && a <= b )
{
codes[8 + i] = ( u8 )( ( c + d )/2 );
codes[12 + i] = 0;
}
else
{
codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
}
}
// fill in alpha for the intermediate values
codes[8 + 3] = 255;
codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
// unpack the indices
u8 indices[16];
for( int i = 0; i < 4; ++i )
{
u8* ind = indices + 4*i;
u8 packed = bytes[4 + i];
ind[0] = packed & 0x3;
ind[1] = ( packed >> 2 ) & 0x3;
ind[2] = ( packed >> 4 ) & 0x3;
ind[3] = ( packed >> 6 ) & 0x3;
}
// store out the colours
for( int i = 0; i < 16; ++i )
{
u8 offset = 4*indices[i];
for( int j = 0; j < 4; ++j )
rgba[4*i + j] = codes[offset + j];
}
}
} // namespace squish

@ -0,0 +1,41 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURBLOCK_H
#define SQUISH_COLOURBLOCK_H
#include "squish.h"
#include "maths.h"
namespace squish {
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
} // namespace squish
#endif // ndef SQUISH_COLOURBLOCK_H

@ -0,0 +1,54 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourfit.h"
#include "colourset.h"
namespace squish {
ColourFit::ColourFit( ColourSet const* colours, int flags )
: m_colours( colours ),
m_flags( flags )
{
}
ColourFit::~ColourFit()
{
}
void ColourFit::Compress( void* block )
{
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
if( isDxt1 )
{
Compress3( block );
if( !m_colours->IsTransparent() )
Compress4( block );
}
else
Compress4( block );
}
} // namespace squish

@ -0,0 +1,56 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURFIT_H
#define SQUISH_COLOURFIT_H
#include "squish.h"
#include "maths.h"
#include <climits>
namespace squish {
class ColourSet;
class ColourFit
{
public:
ColourFit( ColourSet const* colours, int flags );
virtual ~ColourFit();
void Compress( void* block );
protected:
virtual void Compress3( void* block ) = 0;
virtual void Compress4( void* block ) = 0;
ColourSet const* m_colours;
int m_flags;
};
} // namespace squish
#endif // ndef SQUISH_COLOURFIT_H

@ -0,0 +1,121 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourset.h"
namespace squish {
ColourSet::ColourSet( u8 const* rgba, int mask, int flags )
: m_count( 0 ),
m_transparent( false )
{
// check the compression mode for dxt1
bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
// create the minimal set
for( int i = 0; i < 16; ++i )
{
// check this pixel is enabled
int bit = 1 << i;
if( ( mask & bit ) == 0 )
{
m_remap[i] = -1;
continue;
}
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] < 128 )
{
m_remap[i] = -1;
m_transparent = true;
continue;
}
// loop over previous points for a match
for( int j = 0;; ++j )
{
// allocate a new point
if( j == i )
{
// normalise coordinates to [0,1]
float x = ( float )rgba[4*i] / 255.0f;
float y = ( float )rgba[4*i + 1] / 255.0f;
float z = ( float )rgba[4*i + 2] / 255.0f;
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// add the point
m_points[m_count] = Vec3( x, y, z );
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
m_remap[i] = m_count;
// advance
++m_count;
break;
}
// check for a match
int oldbit = 1 << j;
bool match = ( ( mask & oldbit ) != 0 )
&& ( rgba[4*i] == rgba[4*j] )
&& ( rgba[4*i + 1] == rgba[4*j + 1] )
&& ( rgba[4*i + 2] == rgba[4*j + 2] )
&& ( rgba[4*j + 3] >= 128 || !isDxt1 );
if( match )
{
// get the index of the match
int index = m_remap[j];
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// map to this point and increase the weight
m_weights[index] += ( weightByAlpha ? w : 1.0f );
m_remap[i] = index;
break;
}
}
}
// square root the weights
for( int i = 0; i < m_count; ++i )
m_weights[i] = std::sqrt( m_weights[i] );
}
void ColourSet::RemapIndices( u8 const* source, u8* target ) const
{
for( int i = 0; i < 16; ++i )
{
int j = m_remap[i];
if( j == -1 )
target[i] = 3;
else
target[i] = source[j];
}
}
} // namespace squish

@ -0,0 +1,58 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURSET_H
#define SQUISH_COLOURSET_H
#include "squish.h"
#include "maths.h"
namespace squish {
/*! @brief Represents a set of block colours
*/
class ColourSet
{
public:
ColourSet( u8 const* rgba, int mask, int flags );
int GetCount() const { return m_count; }
Vec3 const* GetPoints() const { return m_points; }
float const* GetWeights() const { return m_weights; }
bool IsTransparent() const { return m_transparent; }
void RemapIndices( u8 const* source, u8* target ) const;
private:
int m_count;
Vec3 m_points[16];
float m_weights[16];
int m_remap[16];
bool m_transparent;
};
} // namespace sqish
#endif // ndef SQUISH_COLOURSET_H

@ -0,0 +1,38 @@
# config file for GNUmake
# define to 1 to use OpenMP parallelization
USE_OPENMP ?= 0
# define to 1 to install shared library
USE_SHARED ?= 0
# define to 1 to use Altivec instructions
USE_ALTIVEC ?= 0
# define to 1 to use SSE2 instructions
USE_SSE ?= 0
# default flags
CXXFLAGS ?= -O2 -Wall
ifeq ($(USE_OPENMP),1)
CPPFLAGS += -DSQUISH_USE_OPENMP
CXXFLAGS += -fopenmp
endif
ifeq ($(USE_ALTIVEC),1)
CPPFLAGS += -DSQUISH_USE_ALTIVEC=1
CXXFLAGS += -maltivec
endif
ifeq ($(USE_SSE),1)
CPPFLAGS += -DSQUISH_USE_SSE=2
CXXFLAGS += -msse
endif
# install options
INSTALL = install
INSTALL_FILE = $(INSTALL) -p -m 644
INSTALL_PROGRAM = $(INSTALL) -p -m 755
INSTALL_DIRECTORY = $(INSTALL) -d -m 755
# where should we install to
INSTALL_DIR ?= /usr/local
LIB_PATH ?= lib

@ -0,0 +1,49 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_CONFIG_H
#define SQUISH_CONFIG_H
// Set to 1 when building squish to use Altivec instructions.
#ifndef SQUISH_USE_ALTIVEC
#define SQUISH_USE_ALTIVEC 0
#endif
// Set to 1 or 2 when building squish to use SSE or SSE2 instructions.
#ifndef SQUISH_USE_SSE
#define SQUISH_USE_SSE 2
#endif
// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
#error "Cannot enable both Altivec and SSE!"
#endif
#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
#define SQUISH_USE_SIMD 1
#else
#define SQUISH_USE_SIMD 0
#endif
#endif // ndef SQUISH_CONFIG_H

@ -0,0 +1,151 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include <iostream>
struct SourceBlock
{
int start;
int end;
int error;
};
struct TargetValue
{
SourceBlock sources[2];
};
static void GenerateData( std::string const& name, int bits, int colours )
{
TargetValue values[256];
// initialise the data
for( int target = 0; target < 256; ++target )
for( int index = 0; index < colours; ++index )
values[target].sources[index].error = 255;
// loop over all possible source points
int count = ( 1 << bits );
for( int value1 = 0; value1 < count; ++value1 )
{
for( int value2 = 0; value2 < count; ++value2 )
{
// compute the 8-bit endpoints
int a = ( value1 << ( 8 - bits ) ) | ( value1 >> ( 2*bits - 8 ) );
int b = ( value2 << ( 8 - bits ) ) | ( value2 >> ( 2*bits - 8 ) );
// fill in the codebook with the these and intermediates
int codes[2];
codes[0] = a;
if( colours == 3 )
codes[1] = ( a + b )/2;
else
codes[1] = ( 2*a + b )/3;
// mark each target point with the endpoints and index needed for it
for( int index = 0; index < 2; ++index )
{
int target = codes[index];
SourceBlock& block = values[target].sources[index];
if( block.error != 0 )
{
block.start = value1;
block.end = value2;
block.error = 0;
}
}
}
}
// iteratively fill in the missing values
for( ;; )
{
bool stable = true;
for( int index = 0; index < 2; ++index )
{
for( int target = 0; target < 256; ++target )
{
if( target != 255 )
{
SourceBlock& current = values[target].sources[index];
SourceBlock& next = values[target + 1].sources[index];
if( current.error > next.error + 1 )
{
current.start = next.start;
current.end = next.end;
current.error = next.error + 1;
stable = false;
}
}
if( target != 0 )
{
SourceBlock& current = values[target].sources[index];
SourceBlock& previous = values[target - 1].sources[index];
if( current.error > previous.error + 1 )
{
current.start = previous.start;
current.end = previous.end;
current.error = previous.error + 1;
stable = false;
}
}
}
}
if( stable )
break;
}
// debug
std::cout << "\nstatic SingleColourLookup const " << name << "[] = \n{\n";
for( int i = 0;; )
{
std::cout << "\t{ { ";
for( int j = 0;; )
{
SourceBlock const& block = values[i].sources[j];
if( j < colours )
std::cout << "{ " << block.start << ", " << block.end << ", " << block.error << " }";
else
std::cout << "{ 0, 0, 0 }";
if( ++j == 2 )
break;
std::cout << ", ";
}
std::cout << " } }";
if( ++i == 256 )
break;
std::cout << ",\n";
}
std::cout << "\n};\n";
}
int main()
{
GenerateData( "lookup_5_3", 5, 3 );
GenerateData( "lookup_6_3", 6, 3 );
GenerateData( "lookup_5_4", 5, 4 );
GenerateData( "lookup_6_4", 6, 4 );
}

@ -0,0 +1,546 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
@brief Test program that compresses images loaded using the PNG format.
This program requires libpng for PNG input and output, and is designed to
test the RMS error for DXT compression for a set of test images.
This program uses the high-level image compression and decompression
functions that process an entire image at a time.
*/
#include <iostream>
#include <string>
#include <sstream>
#include <ctime>
#include <cmath>
#include <squish.h>
#include <png.h>
#ifdef _MSC_VER
#pragma warning( disable: 4511 4512 )
#endif // def _MSC_VER
using namespace squish;
//! Simple exception class.
class Error : public std::exception
{
public:
Error( std::string const& excuse ) : m_excuse( excuse ) {}
~Error() throw() {}
virtual char const* what() const throw() { return m_excuse.c_str(); }
private:
std::string m_excuse;
};
//! Base class to make derived classes non-copyable
class NonCopyable
{
public:
NonCopyable() {}
private:
NonCopyable( NonCopyable const& );
NonCopyable& operator=( NonCopyable const& );
};
//! Memory object.
class Mem : NonCopyable
{
public:
Mem() : m_p( 0 ) {}
explicit Mem( int size ) : m_p( new u8[size] ) {}
~Mem() { delete[] m_p; }
void Reset( int size )
{
u8 *p = new u8[size];
delete m_p;
m_p = p;
}
u8* Get() const { return m_p; }
private:
u8* m_p;
};
//! File object.
class File : NonCopyable
{
public:
explicit File( FILE* fp ) : m_fp( fp ) {}
~File() { if( m_fp ) fclose( m_fp ); }
bool IsValid() const { return m_fp != 0; }
FILE* Get() const { return m_fp; }
private:
FILE* m_fp;
};
//! PNG read object.
class PngReadStruct : NonCopyable
{
public:
PngReadStruct()
: m_png( 0 ),
m_info( 0 ),
m_end( 0 )
{
m_png = png_create_read_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
if( !m_png )
throw Error( "failed to create png read struct" );
m_info = png_create_info_struct( m_png );
m_end = png_create_info_struct( m_png );
if( !m_info || !m_end )
{
png_infopp info = m_info ? &m_info : 0;
png_infopp end = m_end ? &m_end : 0;
png_destroy_read_struct( &m_png, info, end );
throw Error( "failed to create png info structs" );
}
}
~PngReadStruct()
{
png_destroy_read_struct( &m_png, &m_info, &m_end );
}
png_structp GetPng() const { return m_png; }
png_infop GetInfo() const { return m_info; }
private:
png_structp m_png;
png_infop m_info, m_end;
};
//! PNG write object.
class PngWriteStruct : NonCopyable
{
public:
PngWriteStruct()
: m_png( 0 ),
m_info( 0 )
{
m_png = png_create_write_struct( PNG_LIBPNG_VER_STRING, 0, 0, 0 );
if( !m_png )
throw Error( "failed to create png read struct" );
m_info = png_create_info_struct( m_png );
if( !m_info )
{
png_infopp info = m_info ? &m_info : 0;
png_destroy_write_struct( &m_png, info );
throw Error( "failed to create png info structs" );
}
}
~PngWriteStruct()
{
png_destroy_write_struct( &m_png, &m_info );
}
png_structp GetPng() const { return m_png; }
png_infop GetInfo() const { return m_info; }
private:
png_structp m_png;
png_infop m_info;
};
//! PNG rows object.
class PngRows : NonCopyable
{
public:
PngRows( int pitch, int height ) : m_height( height )
{
m_rows = new png_bytep[m_height];
for( int i = 0; i < m_height; ++i )
m_rows[i] = new png_byte[pitch];
}
~PngRows()
{
for( int i = 0; i < m_height; ++i )
delete[] m_rows[i];
delete[] m_rows;
}
png_bytep* Get() const { return m_rows; }
png_bytep operator[](int y) const { return m_rows[y]; }
private:
png_bytep* m_rows;
int m_height;
};
//! Represents a DXT compressed image in memory.
struct DxtData
{
int width;
int height;
int format; //!< Either kDxt1, kDxt3 or kDxt5.
Mem data;
bool isColour;
bool isAlpha;
};
//! Represents an uncompressed RGBA image in memory.
class Image
{
public:
Image();
void LoadPng( std::string const& fileName );
void SavePng( std::string const& fileName ) const;
void Decompress( DxtData const& dxt );
void Compress( DxtData& dxt, int flags ) const;
double GetRmsError( Image const& image ) const;
private:
int m_width;
int m_height;
bool m_isColour; //!< Either colour or luminance.
bool m_isAlpha; //!< Either alpha or not.
Mem m_pixels;
};
Image::Image()
: m_width( 0 ),
m_height( 0 ),
m_isColour( false ),
m_isAlpha( false )
{
}
void Image::LoadPng( std::string const& fileName )
{
// open the source file
File file( fopen( fileName.c_str(), "rb" ) );
if( !file.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << fileName << "\" for reading";
throw Error( oss.str() );
}
// check the signature bytes
png_byte header[8];
size_t check = fread( header, 1, 8, file.Get() );
if( check != 8 )
throw Error( "file read error" );
if( png_sig_cmp( header, 0, 8 ) )
{
std::ostringstream oss;
oss << "\"" << fileName << "\" does not look like a png file";
throw Error( oss.str() );
}
// read the image into memory
PngReadStruct png;
png_init_io( png.GetPng(), file.Get() );
png_set_sig_bytes( png.GetPng(), 8 );
png_read_png( png.GetPng(), png.GetInfo(), PNG_TRANSFORM_EXPAND, 0 );
// get the image info
png_uint_32 width;
png_uint_32 height;
int bitDepth;
int colourType;
png_get_IHDR( png.GetPng(), png.GetInfo(), &width, &height, &bitDepth, &colourType, 0, 0, 0 );
// check the image is 8 bit
if( bitDepth != 8 )
{
std::ostringstream oss;
oss << "cannot process " << bitDepth << "-bit image (bit depth must be 8)";
throw Error( oss.str() );
}
// copy the data into a contiguous array
m_width = width;
m_height = height;
m_isColour = ( ( colourType & PNG_COLOR_MASK_COLOR ) != 0 );
m_isAlpha = ( ( colourType & PNG_COLOR_MASK_ALPHA ) != 0 );
m_pixels.Reset(4*width*height);
// get the image rows
png_bytep const *rows = png_get_rows( png.GetPng(), png.GetInfo() );
if( !rows )
throw Error( "failed to get image rows" );
// copy the pixels into the storage
u8 *dest = m_pixels.Get();
for( int y = 0; y < m_height; ++y )
{
u8 const *src = rows[y];
for( int x = 0; x < m_width; ++x )
{
if( m_isColour )
{
dest[0] = src[0];
dest[1] = src[1];
dest[2] = src[2];
src += 3;
}
else
{
u8 lum = *src++;
dest[0] = lum;
dest[1] = lum;
dest[2] = lum;
}
if( m_isAlpha )
dest[3] = *src++;
else
dest[3] = 255;
dest += 4;
}
}
}
void Image::SavePng( std::string const& fileName ) const
{
// create the target rows
int const pixelSize = ( m_isColour ? 3 : 1 ) + ( m_isAlpha ? 1 : 0 );
PngRows rows( m_width*pixelSize, m_height );
// fill the rows with pixel data
u8 const *src = m_pixels.Get();
for( int y = 0; y < m_height; ++y )
{
u8 *dest = rows[y];
for( int x = 0; x < m_width; ++x )
{
if( m_isColour )
{
dest[0] = src[0];
dest[1] = src[1];
dest[2] = src[2];
dest += 3;
}
else
*dest++ = src[1];
if( m_isAlpha )
*dest++ = src[3];
src += 4;
}
}
// set up the image
PngWriteStruct png;
png_set_IHDR(
png.GetPng(), png.GetInfo(), m_width, m_height,
8, ( m_isColour ? PNG_COLOR_MASK_COLOR : 0) | ( m_isAlpha ? PNG_COLOR_MASK_ALPHA : 0 ),
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT
);
// open the target file
File file( fopen( fileName.c_str(), "wb" ) );
if( !file.IsValid() )
{
std::ostringstream oss;
oss << "failed to open \"" << fileName << "\" for writing";
throw Error( oss.str() );
}
// write the image
png_set_rows( png.GetPng(), png.GetInfo(), rows.Get() );
png_init_io( png.GetPng(), file.Get() );
png_write_png( png.GetPng(), png.GetInfo(), PNG_TRANSFORM_IDENTITY, 0 );
}
void Image::Decompress( DxtData const& dxt )
{
// allocate storage
m_width = dxt.width;
m_height = dxt.height;
m_isColour = dxt.isColour;
m_isAlpha = dxt.isAlpha;
m_pixels.Reset( 4*m_width*m_height );
// use the whole image decompression function to do the work
DecompressImage( m_pixels.Get(), m_width, m_height, dxt.data.Get(), dxt.format );
}
void Image::Compress( DxtData& dxt, int flags ) const
{
// work out how much memory we need
int storageSize = GetStorageRequirements( m_width, m_height, flags );
// set the structure fields and allocate it
dxt.width = m_width;
dxt.height = m_height;
dxt.format = flags & ( kDxt1 | kDxt3 | kDxt5 );
dxt.isColour = m_isColour;
dxt.isAlpha = m_isAlpha;
dxt.data.Reset( storageSize );
// use the whole image compression function to do the work
CompressImage( m_pixels.Get(), m_width, m_height, dxt.data.Get(), flags );
}
double Image::GetRmsError( Image const& image ) const
{
if( m_width != image.m_width || m_height != image.m_height )
throw Error( "image dimensions mismatch when computing RMS error" );
// accumulate colour error
double difference = 0;
u8 const *a = m_pixels.Get();
u8 const *b = image.m_pixels.Get();
for( int y = 0; y < m_height; ++y )
{
for( int x = 0; x < m_width; ++x )
{
int d0 = ( int )a[0] - ( int )b[0];
int d1 = ( int )a[1] - ( int )b[1];
int d2 = ( int )a[2] - ( int )b[2];
difference += ( double )( d0*d0 + d1*d1 + d2*d2 );
a += 4;
b += 4;
}
}
return std::sqrt( difference/( double )( m_width*m_height ) );
}
int main( int argc, char* argv[] )
{
try
{
// parse the command-line
std::string sourceFileName;
std::string targetFileName;
int format = kDxt1;
int fit = kColourClusterFit;
int extra = 0;
bool help = false;
bool arguments = true;
bool error = false;
for( int i = 1; i < argc; ++i )
{
// check for options
char const* word = argv[i];
if( arguments && word[0] == '-' )
{
for( int j = 1; word[j] != '\0'; ++j )
{
switch( word[j] )
{
case 'h': help = true; break;
case '1': format = kDxt1; break;
case '3': format = kDxt3; break;
case '5': format = kDxt5; break;
case 'r': fit = kColourRangeFit; break;
case 'i': fit = kColourIterativeClusterFit; break;
case 'w': extra = kWeightColourByAlpha; break;
case '-': arguments = false; break;
default:
std::cerr << "squishpng error: unknown option '" << word[j] << "'" << std::endl;
error = true;
}
}
}
else
{
if( sourceFileName.empty() )
sourceFileName.assign( word );
else if( targetFileName.empty() )
targetFileName.assign( word );
else
{
std::cerr << "squishpng error: unexpected argument \"" << word << "\"" << std::endl;
error = true;
}
}
}
// check arguments
if( sourceFileName.empty() )
{
std::cerr << "squishpng error: no source file given" << std::endl;
error = true;
}
if( help || error )
{
std::cout
<< "SYNTAX" << std::endl
<< "\tsquishpng [-135riw] <source> [<target>]" << std::endl
<< "OPTIONS" << std::endl
<< "\t-h\tPrint this help message" << std::endl
<< "\t-135\tSpecifies whether to use DXT1 (default), DXT3 or DXT5 compression" << std::endl
<< "\t-r\tUse the fast but inferior range-based colour compressor" << std::endl
<< "\t-i\tUse the very slow but slightly better iterative colour compressor" << std::endl
<< "\t-w\tWeight colour values by alpha in the cluster colour compressor" << std::endl
;
return error ? -1 : 0;
}
// load the source image
Image sourceImage;
sourceImage.LoadPng( sourceFileName );
// compress to DXT
DxtData dxt;
sourceImage.Compress( dxt, format | fit | extra );
// decompress back
Image targetImage;
targetImage.Decompress( dxt );
// compare the images
double rmsError = sourceImage.GetRmsError( targetImage );
std::cout << sourceFileName << " " << rmsError << std::endl;
// save the target image if necessary
if( !targetFileName.empty() )
targetImage.SavePng( targetFileName );
}
catch( std::exception& excuse )
{
// complain
std::cerr << "squishpng error: " << excuse.what() << std::endl;
return -1;
}
// done
return 0;
}

@ -0,0 +1,206 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
@brief This program tests the error for 1 and 2-colour DXT compression.
This tests the effectiveness of the DXT compression algorithm for all
possible 1 and 2-colour blocks of pixels.
*/
#include <squish.h>
#include <iostream>
#include <cmath>
#include <cfloat>
#include <cstdlib>
using namespace squish;
double GetColourError( u8 const* a, u8 const* b )
{
double error = 0.0;
for( int i = 0; i < 16; ++i )
{
for( int j = 0; j < 3; ++j )
{
int index = 4*i + j;
int diff = ( int )a[index] - ( int )b[index];
error += ( double )( diff*diff );
}
}
return error / 16.0;
}
void TestOneColour( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int i = 0; i < 16*4; ++i )
input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0;
for( int channel = 0; channel < 3; ++channel )
{
for( int value = 0; value < 255; ++value )
{
// set the channnel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = ( u8 )value;
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
// reset the channel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = 0;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "one colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
void TestOneColourRandom( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int test = 0; test < 1000; ++test )
{
// set a constant random colour
for( int channel = 0; channel < 3; ++channel )
{
u8 value = ( u8 )( rand() & 0xff );
for( int i = 0; i < 16; ++i )
input[4*i + channel] = value;
}
for( int i = 0; i < 16; ++i )
input[4*i + 3] = 255;
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "random one colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
void TestTwoColour( int flags )
{
u8 input[4*16];
u8 output[4*16];
u8 block[16];
double avg = 0.0, min = DBL_MAX, max = -DBL_MAX;
int counter = 0;
// test all single-channel colours
for( int i = 0; i < 16*4; ++i )
input[i] = ( ( i % 4 ) == 3 ) ? 255 : 0;
for( int channel = 0; channel < 3; ++channel )
{
for( int value1 = 0; value1 < 255; ++value1 )
{
for( int value2 = value1 + 1; value2 < 255; ++value2 )
{
// set the channnel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = ( u8 )( ( i < 8 ) ? value1 : value2 );
// compress and decompress
Compress( input, block, flags );
Decompress( output, block, flags );
// test the results
double rm = GetColourError( input, output );
double rms = std::sqrt( rm );
// accumulate stats
min = std::min( min, rms );
max = std::max( max, rms );
avg += rm;
++counter;
}
}
// reset the channel value
for( int i = 0; i < 16; ++i )
input[4*i + channel] = 0;
}
// finish stats
avg = std::sqrt( avg/counter );
// show stats
std::cout << "two colour error (min, max, avg): "
<< min << ", " << max << ", " << avg << std::endl;
}
int main()
{
TestOneColourRandom( kDxt1 | kColourRangeFit );
TestOneColour( kDxt1 );
TestTwoColour( kDxt1 );
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

@ -0,0 +1,26 @@
HEADERS += \
squish.h
SOURCES += \
alpha.cpp \
alpha.h \
clusterfit.cpp \
clusterfit.h \
colourblock.cpp \
colourblock.h \
colourfit.cpp \
colourfit.h \
colourset.cpp \
colourset.h \
maths.cpp \
maths.h \
rangefit.cpp \
rangefit.h \
simd.h \
simd_float.h \
simd_sse.h \
simd_ve.h \
singlecolourfit.cpp \
singlecolourfit.h \
singlecolourlookup.inl \
squish.cpp

@ -0,0 +1,32 @@
TARGET = squish
TEMPLATE = lib
include(libSquish.pri)
QT -= gui
CONFIG += staticlib thread
CONFIG += debug_and_release
CONFIG(debug, debug|release) {
unix:TARGET = $$join(TARGET,,,_debug)
}
MOC_DIR = mocs
OBJECTS_DIR = objs
RCC_DIR = rccs
UI_DIR = uics
CONFIG(debug, debug|release) {
unix:MOC_DIR = $$join(MOC_DIR,,,_debug)
unix:OBJECTS_DIR = $$join(OBJECTS_DIR,,,_debug)
unix:RCC_DIR = $$join(RCC_DIR,,,_debug)
unix:UI_DIR = $$join(UI_DIR,,,_debug)
win32:MOC_DIR = $$join(MOC_DIR,,,d)
win32:OBJECTS_DIR = $$join(OBJECTS_DIR,,,d)
win32:RCC_DIR = $$join(RCC_DIR,,,d)
win32:UI_DIR = $$join(UI_DIR,,,d)
}
unix:QMAKE_CXXFLAGS += -DSQUISH_USE_OPENMP -fopenmp

@ -0,0 +1,238 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="630"
height="230"
viewBox="0 0 630 230"
id="svg2"
version="1.1"
inkscape:version="0.48.0 r9654"
sodipodi:docname="libSquish.svg"
inkscape:export-filename="/Users/roettger/Projects/libsquish/libSquish.png"
inkscape:export-xdpi="119.99844"
inkscape:export-ydpi="119.99844">
<metadata
id="metadata26">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<defs
id="defs24">
<marker
inkscape:stockid="DotL"
orient="auto"
refY="0"
refX="0"
id="DotL"
style="overflow:visible">
<path
id="path3691"
d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none;marker-end:none"
transform="matrix(0.8,0,0,0.8,5.92,0.8)"
inkscape:connector-curvature="0" />
</marker>
<marker
inkscape:stockid="TriangleInM"
orient="auto"
refY="0"
refX="0"
id="TriangleInM"
style="overflow:visible">
<path
id="path3766"
d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
transform="scale(-0.4,-0.4)"
inkscape:connector-curvature="0" />
</marker>
<marker
inkscape:stockid="TriangleOutM"
orient="auto"
refY="0"
refX="0"
id="TriangleOutM"
style="overflow:visible">
<path
id="path3775"
d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
transform="scale(0.4,0.4)"
inkscape:connector-curvature="0" />
</marker>
<marker
inkscape:stockid="DotM"
orient="auto"
refY="0"
refX="0"
id="DotM"
style="overflow:visible">
<path
id="path3694"
d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none;marker-end:none"
transform="matrix(0.4,0,0,0.4,2.96,0.4)"
inkscape:connector-curvature="0" />
</marker>
<marker
inkscape:stockid="Arrow1Mend"
orient="auto"
refY="0"
refX="0"
id="Arrow1Mend"
style="overflow:visible">
<path
id="path3638"
d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
transform="matrix(-0.4,0,0,-0.4,-4,0)"
inkscape:connector-curvature="0" />
</marker>
<inkscape:perspective
sodipodi:type="inkscape:persp3d"
inkscape:vp_x="0 : 200 : 1"
inkscape:vp_y="0 : 1000 : 0"
inkscape:vp_z="420 : 200 : 1"
inkscape:persp3d-origin="210 : 133.33333 : 1"
id="perspective28" />
</defs>
<sodipodi:namedview
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1"
objecttolerance="10"
gridtolerance="10"
guidetolerance="10"
inkscape:pageopacity="0"
inkscape:pageshadow="2"
inkscape:window-width="1436"
inkscape:window-height="856"
id="namedview22"
showgrid="false"
inkscape:zoom="0.79420663"
inkscape:cx="437.50383"
inkscape:cy="-3.1396505"
inkscape:window-x="4"
inkscape:window-y="22"
inkscape:window-maximized="0"
inkscape:current-layer="g3960" />
<text
xml:space="preserve"
style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Verdana;-inkscape-font-specification:Verdana"
x="102.93208"
y="-7.535553"
id="text3010"
sodipodi:linespacing="125%"><tspan
sodipodi:role="line"
id="tspan3012"
x="102.93208"
y="-7.535553" /><tspan
sodipodi:role="line"
x="102.93208"
y="4.964447"
id="tspan3014" /></text>
<g
id="g3805"
transform="matrix(1,0,0,0.38948748,-4,-80.62777)" />
<text
xml:space="preserve"
style="font-size:11.97706985px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000080;fill-opacity:1;stroke:none;font-family:Sans;-inkscape-font-specification:Terminal"
x="205.95784"
y="34.59861"
id="text3041-8-9-9"
sodipodi:linespacing="125%"
transform="scale(1.1185212,0.8940376)"><tspan
sodipodi:role="line"
id="tspan3043-42-2-8"
x="205.95784"
y="34.59861"
style="font-size:86.2348938px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#000080;font-family:Sans;-inkscape-font-specification:Terminal" /></text>
<g
id="g3960"
transform="matrix(1.2774265,0,0,1.2774265,-32.35617,-208.47432)">
<text
xml:space="preserve"
style="font-size:10.98347282px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000080;fill-opacity:1;stroke:none;font-family:Sans;-inkscape-font-specification:Terminal"
x="192.10129"
y="275.97144"
id="text3041-8-9"
sodipodi:linespacing="125%"
transform="scale(1.0257307,0.9749148)"><tspan
sodipodi:role="line"
id="tspan3043-42-2"
x="192.10129"
y="275.97144"
style="font-size:79.08100128px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#000080;font-family:Sans;-inkscape-font-specification:Terminal" /></text>
<text
transform="scale(1.0257307,0.97491477)"
sodipodi:linespacing="125%"
id="text3041-8-6"
y="248.2854"
x="87.743195"
style="font-size:85.09155273px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000080;fill-opacity:1;stroke:none;font-family:Bank Gothic;-inkscape-font-specification:Bank Gothic"
xml:space="preserve"><tspan
style="font-size:85.09155273px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000080;font-family:Bank Gothic;-inkscape-font-specification:Bank Gothic"
y="248.2854"
x="87.743195"
id="tspan3043-42-9"
sodipodi:role="line">lib</tspan></text>
<flowRoot
xml:space="preserve"
id="flowRoot3097"
style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
transform="matrix(0.7828239,0,0,0.7828239,25.329183,69.259813)"><flowRegion
id="flowRegion3099"><rect
id="rect3101"
width="33.49387"
height="62.420395"
x="534.37952"
y="241.90614" /></flowRegion><flowPara
id="flowPara3103"></flowPara></flowRoot> <g
id="g3907"
transform="matrix(0.72690637,0,0,0.71996495,139.8595,-23.170483)">
<path
sodipodi:nodetypes="sscccssccsssssscss"
transform="matrix(0.7828239,0,0,0.7828239,25.329183,69.259813)"
inkscape:connector-curvature="0"
id="path3105"
d="m 339.50606,300.90773 c -1.95856,3.69785 -0.75722,15.26794 4.45166,22.34714 14.00779,19.03753 45.78914,51.10431 45.78914,51.10431 l 51.57631,56.40186 5.19946,85.81798 c 0,0 7.52237,4.52608 11.73442,5.45768 4.95508,1.09594 10.28542,1.16596 15.22449,0 4.41764,-1.04286 12.17959,-6.08979 12.17959,-6.08979 l 4.56735,-85.25713 c 0,0 64.30741,-68.15797 96.54638,-102.89439 3.40006,-3.66345 5.37777,-8.75489 6.0898,-13.70204 0.77539,-5.38744 1.0823,-11.8102 -2.15457,-16.18608 -5.64272,-7.62832 -16.74691,-9.28331 -25.88163,-11.85011 -20.66448,-5.80658 -42.50429,-6.55141 -63.94284,-7.61225 -25.85497,-1.27937 -51.84264,-0.56565 -77.64489,1.52245 -17.88376,1.44728 -53.2857,7.61224 -53.2857,7.61224 0,0 -13.56919,3.10259 -19.79183,6.0898 -3.87132,1.85844 -8.6472,3.44346 -10.65714,7.23833 z"
style="fill:#000000;fill-opacity:1;stroke:#8b8b8b;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
<path
transform="matrix(0.7828239,0,0,0.7828239,73.071791,66.123437)"
d="m 516.39198,310.38028 c 0,10.32604 -49.82684,18.69695 -111.29136,18.69695 -61.46453,0 -111.29137,-8.37091 -111.29137,-18.69695 0,-10.32604 49.82684,-18.69695 111.29137,-18.69695 61.46452,0 111.29136,8.37091 111.29136,18.69695 z"
sodipodi:ry="18.696951"
sodipodi:rx="111.29137"
sodipodi:cy="310.38028"
sodipodi:cx="405.10062"
id="path3905"
style="fill:#cccccc;fill-opacity:1;fill-rule:evenodd;stroke:#8b8b8b;stroke-opacity:1"
sodipodi:type="arc" />
</g>
<text
xml:space="preserve"
style="font-size:85.09155273px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000080;fill-opacity:1;stroke:none;font-family:Bank Gothic;-inkscape-font-specification:Bank Gothic"
x="40.24892"
y="316.10275"
id="text3041-8"
sodipodi:linespacing="125%"
transform="scale(1.0257307,0.97491477)"><tspan
sodipodi:role="line"
id="tspan3043-42"
x="40.24892"
y="316.10275"
style="font-size:85.09155273px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000080;font-family:Bank Gothic;-inkscape-font-specification:Bank Gothic">Squish</tspan></text>
</g>
</svg>

After

Width:  |  Height:  |  Size: 11 KiB

@ -0,0 +1,13 @@
prefix=@PREFIX@
exec_prefix=${prefix}
libdir=${prefix}/@LIB_PATH@
sharedlibdir=${libdir}
includedir=${prefix}/include
Name: libsquish
Description: squish DXT library
Version: 1.14
Requires:
Libs: -L${libdir} -L${sharedlibdir} -llibsquish
Cflags: -I${includedir}

@ -0,0 +1,259 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
The symmetric eigensystem solver algorithm is from
http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf
*/
#include "maths.h"
#include "simd.h"
#include <cfloat>
namespace squish {
Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
{
// compute the centroid
float total = 0.0f;
Vec3 centroid( 0.0f );
for( int i = 0; i < n; ++i )
{
total += weights[i];
centroid += weights[i]*points[i];
}
if( total > FLT_EPSILON )
centroid /= total;
// accumulate the covariance matrix
Sym3x3 covariance( 0.0f );
for( int i = 0; i < n; ++i )
{
Vec3 a = points[i] - centroid;
Vec3 b = weights[i]*a;
covariance[0] += a.X()*b.X();
covariance[1] += a.X()*b.Y();
covariance[2] += a.X()*b.Z();
covariance[3] += a.Y()*b.Y();
covariance[4] += a.Y()*b.Z();
covariance[5] += a.Z()*b.Z();
}
// return it
return covariance;
}
#if 0
static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
{
// compute M
Sym3x3 m;
m[0] = matrix[0] - evalue;
m[1] = matrix[1];
m[2] = matrix[2];
m[3] = matrix[3] - evalue;
m[4] = matrix[4];
m[5] = matrix[5] - evalue;
// compute U
Sym3x3 u;
u[0] = m[3]*m[5] - m[4]*m[4];
u[1] = m[2]*m[4] - m[1]*m[5];
u[2] = m[1]*m[4] - m[2]*m[3];
u[3] = m[0]*m[5] - m[2]*m[2];
u[4] = m[1]*m[2] - m[4]*m[0];
u[5] = m[0]*m[3] - m[1]*m[1];
// find the largest component
float mc = std::fabs( u[0] );
int mi = 0;
for( int i = 1; i < 6; ++i )
{
float c = std::fabs( u[i] );
if( c > mc )
{
mc = c;
mi = i;
}
}
// pick the column with this component
switch( mi )
{
case 0:
return Vec3( u[0], u[1], u[2] );
case 1:
case 3:
return Vec3( u[1], u[3], u[4] );
default:
return Vec3( u[2], u[4], u[5] );
}
}
static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
{
// compute M
Sym3x3 m;
m[0] = matrix[0] - evalue;
m[1] = matrix[1];
m[2] = matrix[2];
m[3] = matrix[3] - evalue;
m[4] = matrix[4];
m[5] = matrix[5] - evalue;
// find the largest component
float mc = std::fabs( m[0] );
int mi = 0;
for( int i = 1; i < 6; ++i )
{
float c = std::fabs( m[i] );
if( c > mc )
{
mc = c;
mi = i;
}
}
// pick the first eigenvector based on this index
switch( mi )
{
case 0:
case 1:
return Vec3( -m[1], m[0], 0.0f );
case 2:
return Vec3( m[2], 0.0f, -m[0] );
case 3:
case 4:
return Vec3( 0.0f, -m[4], m[3] );
default:
return Vec3( 0.0f, -m[5], m[4] );
}
}
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
// compute the cubic coefficients
float c0 = matrix[0]*matrix[3]*matrix[5]
+ 2.0f*matrix[1]*matrix[2]*matrix[4]
- matrix[0]*matrix[4]*matrix[4]
- matrix[3]*matrix[2]*matrix[2]
- matrix[5]*matrix[1]*matrix[1];
float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
- matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
float c2 = matrix[0] + matrix[3] + matrix[5];
// compute the quadratic coefficients
float a = c1 - ( 1.0f/3.0f )*c2*c2;
float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
// compute the root count check
float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
// test the multiplicity
if( FLT_EPSILON < Q )
{
// only one root, which implies we have a multiple of the identity
return Vec3( 1.0f );
}
else if( Q < -FLT_EPSILON )
{
// three distinct roots
float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
float rho = std::sqrt( 0.25f*b*b - Q );
float rt = std::pow( rho, 1.0f/3.0f );
float ct = std::cos( theta/3.0f );
float st = std::sin( theta/3.0f );
float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
// pick the larger
if( std::fabs( l2 ) > std::fabs( l1 ) )
l1 = l2;
if( std::fabs( l3 ) > std::fabs( l1 ) )
l1 = l3;
// get the eigenvector
return GetMultiplicity1Evector( matrix, l1 );
}
else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
{
// two roots
float rt;
if( b < 0.0f )
rt = -std::pow( -0.5f*b, 1.0f/3.0f );
else
rt = std::pow( 0.5f*b, 1.0f/3.0f );
float l1 = ( 1.0f/3.0f )*c2 + rt; // repeated
float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
// get the eigenvector
if( std::fabs( l1 ) > std::fabs( l2 ) )
return GetMultiplicity2Evector( matrix, l1 );
else
return GetMultiplicity1Evector( matrix, l2 );
}
}
#else
#define POWER_ITERATION_COUNT 8
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
Vec4 v = VEC4_CONST( 1.0f );
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
{
// matrix multiply
Vec4 w = row0*v.SplatX();
w = MultiplyAdd(row1, v.SplatY(), w);
w = MultiplyAdd(row2, v.SplatZ(), w);
// get max component from xyz in all channels
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
// divide through and advance
v = w*Reciprocal(a);
}
return v.GetVec3();
}
#endif
} // namespace squish

@ -0,0 +1,233 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_MATHS_H
#define SQUISH_MATHS_H
#include <cmath>
#include <algorithm>
#include "config.h"
namespace squish {
class Vec3
{
public:
typedef Vec3 const& Arg;
Vec3()
{
}
explicit Vec3( float s )
{
m_x = s;
m_y = s;
m_z = s;
}
Vec3( float x, float y, float z )
{
m_x = x;
m_y = y;
m_z = z;
}
float X() const { return m_x; }
float Y() const { return m_y; }
float Z() const { return m_z; }
Vec3 operator-() const
{
return Vec3( -m_x, -m_y, -m_z );
}
Vec3& operator+=( Arg v )
{
m_x += v.m_x;
m_y += v.m_y;
m_z += v.m_z;
return *this;
}
Vec3& operator-=( Arg v )
{
m_x -= v.m_x;
m_y -= v.m_y;
m_z -= v.m_z;
return *this;
}
Vec3& operator*=( Arg v )
{
m_x *= v.m_x;
m_y *= v.m_y;
m_z *= v.m_z;
return *this;
}
Vec3& operator*=( float s )
{
m_x *= s;
m_y *= s;
m_z *= s;
return *this;
}
Vec3& operator/=( Arg v )
{
m_x /= v.m_x;
m_y /= v.m_y;
m_z /= v.m_z;
return *this;
}
Vec3& operator/=( float s )
{
float t = 1.0f/s;
m_x *= t;
m_y *= t;
m_z *= t;
return *this;
}
friend Vec3 operator+( Arg left, Arg right )
{
Vec3 copy( left );
return copy += right;
}
friend Vec3 operator-( Arg left, Arg right )
{
Vec3 copy( left );
return copy -= right;
}
friend Vec3 operator*( Arg left, Arg right )
{
Vec3 copy( left );
return copy *= right;
}
friend Vec3 operator*( Arg left, float right )
{
Vec3 copy( left );
return copy *= right;
}
friend Vec3 operator*( float left, Arg right )
{
Vec3 copy( right );
return copy *= left;
}
friend Vec3 operator/( Arg left, Arg right )
{
Vec3 copy( left );
return copy /= right;
}
friend Vec3 operator/( Arg left, float right )
{
Vec3 copy( left );
return copy /= right;
}
friend float Dot( Arg left, Arg right )
{
return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z;
}
friend Vec3 Min( Arg left, Arg right )
{
return Vec3(
std::min( left.m_x, right.m_x ),
std::min( left.m_y, right.m_y ),
std::min( left.m_z, right.m_z )
);
}
friend Vec3 Max( Arg left, Arg right )
{
return Vec3(
std::max( left.m_x, right.m_x ),
std::max( left.m_y, right.m_y ),
std::max( left.m_z, right.m_z )
);
}
friend Vec3 Truncate( Arg v )
{
return Vec3(
v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ),
v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ),
v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z )
);
}
private:
float m_x;
float m_y;
float m_z;
};
inline float LengthSquared( Vec3::Arg v )
{
return Dot( v, v );
}
class Sym3x3
{
public:
Sym3x3()
{
}
Sym3x3( float s )
{
for( int i = 0; i < 6; ++i )
m_x[i] = s;
}
float operator[]( int index ) const
{
return m_x[index];
}
float& operator[]( int index )
{
return m_x[index];
}
private:
float m_x[6];
};
Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights );
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix );
} // namespace squish
#endif // ndef SQUISH_MATHS_H

@ -0,0 +1,201 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "rangefit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric )
: ColourFit( colours, flags )
{
// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
if( metric )
m_metric = Vec3( metric[0], metric[1], metric[2] );
else
m_metric = Vec3( 1.0f );
// initialise the best error
m_besterror = FLT_MAX;
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights );
// compute the principle component
Vec3 principle = ComputePrincipleComponent( covariance );
// get the min and max range as the codebook endpoints
Vec3 start( 0.0f );
Vec3 end( 0.0f );
if( count > 0 )
{
float min, max;
// compute the range
start = end = values[0];
min = max = Dot( values[0], principle );
for( int i = 1; i < count; ++i )
{
float val = Dot( values[i], principle );
if( val < min )
{
start = values[i];
min = val;
}
else if( val > max )
{
end = values[i];
max = val;
}
}
}
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
start = Min( one, Max( zero, start ) );
end = Min( one, Max( zero, end ) );
// clamp to the grid and save
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
Vec3 const half( 0.5f );
m_start = Truncate( grid*start + half )*gridrcp;
m_end = Truncate( grid*end + half )*gridrcp;
}
void RangeFit::Compress3( void* block )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// create a codebook
Vec3 codes[3];
codes[0] = m_start;
codes[1] = m_end;
codes[2] = 0.5f*m_start + 0.5f*m_end;
// match each point to the closest code
u8 closest[16];
float error = 0.0f;
for( int i = 0; i < count; ++i )
{
// find the closest code
float dist = FLT_MAX;
int idx = 0;
for( int j = 0; j < 3; ++j )
{
float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
if( d < dist )
{
dist = d;
idx = j;
}
}
// save the index
closest[i] = ( u8 )idx;
// accumulate the error
error += dist;
}
// save this scheme if it wins
if( error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( closest, indices );
// save the block
WriteColourBlock3( m_start, m_end, indices, block );
// save the error
m_besterror = error;
}
}
void RangeFit::Compress4( void* block )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// create a codebook
Vec3 codes[4];
codes[0] = m_start;
codes[1] = m_end;
codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;
// match each point to the closest code
u8 closest[16];
float error = 0.0f;
for( int i = 0; i < count; ++i )
{
// find the closest code
float dist = FLT_MAX;
int idx = 0;
for( int j = 0; j < 4; ++j )
{
float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
if( d < dist )
{
dist = d;
idx = j;
}
}
// save the index
closest[i] = ( u8 )idx;
// accumulate the error
error += dist;
}
// save this scheme if it wins
if( error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( closest, indices );
// save the block
WriteColourBlock4( m_start, m_end, indices, block );
// save the error
m_besterror = error;
}
}
} // namespace squish

@ -0,0 +1,54 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_RANGEFIT_H
#define SQUISH_RANGEFIT_H
#include "squish.h"
#include "colourfit.h"
#include "maths.h"
namespace squish {
class ColourSet;
class RangeFit : public ColourFit
{
public:
RangeFit( ColourSet const* colours, int flags, float* metric );
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
Vec3 m_metric;
Vec3 m_start;
Vec3 m_end;
float m_besterror;
};
} // squish
#endif // ndef SQUISH_RANGEFIT_H

@ -0,0 +1,40 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_H
#define SQUISH_SIMD_H
#include "maths.h"
#if SQUISH_USE_ALTIVEC
#include "simd_ve.h"
#elif SQUISH_USE_SSE
#include "simd_sse.h"
#else
#include "simd_float.h"
#endif
#endif // ndef SQUISH_SIMD_H

@ -0,0 +1,183 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_FLOAT_H
#define SQUISH_SIMD_FLOAT_H
#include <algorithm>
namespace squish {
#define VEC4_CONST( X ) Vec4( X )
class Vec4
{
public:
typedef Vec4 const& Arg;
Vec4() {}
explicit Vec4( float s )
: m_x( s ),
m_y( s ),
m_z( s ),
m_w( s )
{
}
Vec4( float x, float y, float z, float w )
: m_x( x ),
m_y( y ),
m_z( z ),
m_w( w )
{
}
Vec3 GetVec3() const
{
return Vec3( m_x, m_y, m_z );
}
Vec4 SplatX() const { return Vec4( m_x ); }
Vec4 SplatY() const { return Vec4( m_y ); }
Vec4 SplatZ() const { return Vec4( m_z ); }
Vec4 SplatW() const { return Vec4( m_w ); }
Vec4& operator+=( Arg v )
{
m_x += v.m_x;
m_y += v.m_y;
m_z += v.m_z;
m_w += v.m_w;
return *this;
}
Vec4& operator-=( Arg v )
{
m_x -= v.m_x;
m_y -= v.m_y;
m_z -= v.m_z;
m_w -= v.m_w;
return *this;
}
Vec4& operator*=( Arg v )
{
m_x *= v.m_x;
m_y *= v.m_y;
m_z *= v.m_z;
m_w *= v.m_w;
return *this;
}
friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right )
{
Vec4 copy( left );
return copy += right;
}
friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right )
{
Vec4 copy( left );
return copy -= right;
}
friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right )
{
Vec4 copy( left );
return copy *= right;
}
//! Returns a*b + c
friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return a*b + c;
}
//! Returns -( a*b - c )
friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return c - a*b;
}
friend Vec4 Reciprocal( Vec4::Arg v )
{
return Vec4(
1.0f/v.m_x,
1.0f/v.m_y,
1.0f/v.m_z,
1.0f/v.m_w
);
}
friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
{
return Vec4(
std::min( left.m_x, right.m_x ),
std::min( left.m_y, right.m_y ),
std::min( left.m_z, right.m_z ),
std::min( left.m_w, right.m_w )
);
}
friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
{
return Vec4(
std::max( left.m_x, right.m_x ),
std::max( left.m_y, right.m_y ),
std::max( left.m_z, right.m_z ),
std::max( left.m_w, right.m_w )
);
}
friend Vec4 Truncate( Vec4::Arg v )
{
return Vec4(
v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ),
v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ),
v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ),
v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w )
);
}
friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
{
return left.m_x < right.m_x
|| left.m_y < right.m_y
|| left.m_z < right.m_z
|| left.m_w < right.m_w;
}
private:
float m_x;
float m_y;
float m_z;
float m_w;
};
} // namespace squish
#endif // ndef SQUISH_SIMD_FLOAT_H

@ -0,0 +1,180 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_SSE_H
#define SQUISH_SIMD_SSE_H
#include <xmmintrin.h>
#if ( SQUISH_USE_SSE > 1 )
#include <emmintrin.h>
#endif
#define SQUISH_SSE_SPLAT( a ) \
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
#define SQUISH_SSE_SHUF( x, y, z, w ) \
( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) )
namespace squish {
#define VEC4_CONST( X ) Vec4( X )
class Vec4
{
public:
typedef Vec4 const& Arg;
Vec4() {}
explicit Vec4( __m128 v ) : m_v( v ) {}
Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
Vec4& operator=( Vec4 const& arg )
{
m_v = arg.m_v;
return *this;
}
explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {}
Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {}
Vec3 GetVec3() const
{
#ifdef __GNUC__
__attribute__ ((__aligned__ (16))) float c[4];
#else
__declspec(align(16)) float c[4];
#endif
_mm_store_ps( c, m_v );
return Vec3( c[0], c[1], c[2] );
}
Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }
Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); }
Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); }
Vec4& operator+=( Arg v )
{
m_v = _mm_add_ps( m_v, v.m_v );
return *this;
}
Vec4& operator-=( Arg v )
{
m_v = _mm_sub_ps( m_v, v.m_v );
return *this;
}
Vec4& operator*=( Arg v )
{
m_v = _mm_mul_ps( m_v, v.m_v );
return *this;
}
friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_add_ps( left.m_v, right.m_v ) );
}
friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_sub_ps( left.m_v, right.m_v ) );
}
friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_mul_ps( left.m_v, right.m_v ) );
}
//! Returns a*b + c
friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) );
}
//! Returns -( a*b - c )
friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) );
}
friend Vec4 Reciprocal( Vec4::Arg v )
{
// get the reciprocal estimate
__m128 estimate = _mm_rcp_ps( v.m_v );
// one round of Newton-Rhaphson refinement
__m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) );
return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
}
friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_min_ps( left.m_v, right.m_v ) );
}
friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( _mm_max_ps( left.m_v, right.m_v ) );
}
friend Vec4 Truncate( Vec4::Arg v )
{
#if ( SQUISH_USE_SSE == 1 )
// convert to ints
__m128 input = v.m_v;
__m64 lo = _mm_cvttps_pi32( input );
__m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) );
// convert to floats
__m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) );
__m128 truncated = _mm_cvtpi32_ps( part, lo );
// clear out the MMX multimedia state to allow FP calls later
_mm_empty();
return Vec4( truncated );
#else
// use SSE2 instructions
return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) );
#endif
}
friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
{
__m128 bits = _mm_cmplt_ps( left.m_v, right.m_v );
int value = _mm_movemask_ps( bits );
return value != 0;
}
private:
__m128 m_v;
};
} // namespace squish
#endif // ndef SQUISH_SIMD_SSE_H

@ -0,0 +1,166 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_VE_H
#define SQUISH_SIMD_VE_H
#include <altivec.h>
#undef bool
namespace squish {
#define VEC4_CONST( X ) Vec4( ( vector float ){ X } )
class Vec4
{
public:
typedef Vec4 Arg;
Vec4() {}
explicit Vec4( vector float v ) : m_v( v ) {}
Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
Vec4& operator=( Vec4 const& arg )
{
m_v = arg.m_v;
return *this;
}
explicit Vec4( float s )
{
union { vector float v; float c[4]; } u;
u.c[0] = s;
u.c[1] = s;
u.c[2] = s;
u.c[3] = s;
m_v = u.v;
}
Vec4( float x, float y, float z, float w )
{
union { vector float v; float c[4]; } u;
u.c[0] = x;
u.c[1] = y;
u.c[2] = z;
u.c[3] = w;
m_v = u.v;
}
Vec3 GetVec3() const
{
union { vector float v; float c[4]; } u;
u.v = m_v;
return Vec3( u.c[0], u.c[1], u.c[2] );
}
Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }
Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); }
Vec4& operator+=( Arg v )
{
m_v = vec_add( m_v, v.m_v );
return *this;
}
Vec4& operator-=( Arg v )
{
m_v = vec_sub( m_v, v.m_v );
return *this;
}
Vec4& operator*=( Arg v )
{
m_v = vec_madd( m_v, v.m_v, ( vector float ){ -0.0f } );
return *this;
}
friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_add( left.m_v, right.m_v ) );
}
friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_sub( left.m_v, right.m_v ) );
}
friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_madd( left.m_v, right.m_v, ( vector float ){ -0.0f } ) );
}
//! Returns a*b + c
friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) );
}
//! Returns -( a*b - c )
friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) );
}
friend Vec4 Reciprocal( Vec4::Arg v )
{
// get the reciprocal estimate
vector float estimate = vec_re( v.m_v );
// one round of Newton-Rhaphson refinement
vector float diff = vec_nmsub( estimate, v.m_v, ( vector float ){ 1.0f } );
return Vec4( vec_madd( diff, estimate, estimate ) );
}
friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_min( left.m_v, right.m_v ) );
}
friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
{
return Vec4( vec_max( left.m_v, right.m_v ) );
}
friend Vec4 Truncate( Vec4::Arg v )
{
return Vec4( vec_trunc( v.m_v ) );
}
friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
{
return vec_any_lt( left.m_v, right.m_v ) != 0;
}
private:
vector float m_v;
};
} // namespace squish
#endif // ndef SQUISH_SIMD_VE_H

@ -0,0 +1,172 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "singlecolourfit.h"
#include "colourset.h"
#include "colourblock.h"
namespace squish {
struct SourceBlock
{
u8 start;
u8 end;
u8 error;
};
struct SingleColourLookup
{
SourceBlock sources[2];
};
#include "singlecolourlookup.inl"
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
SingleColourFit::SingleColourFit( ColourSet const* colours, int flags )
: ColourFit( colours, flags )
{
// grab the single colour
Vec3 const* values = m_colours->GetPoints();
m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 );
m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 );
m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 );
// initialise the best error
m_besterror = INT_MAX;
}
void SingleColourFit::Compress3( void* block )
{
// build the table of lookups
SingleColourLookup const* const lookups[] =
{
lookup_5_3,
lookup_6_3,
lookup_5_3
};
// find the best end-points and index
ComputeEndPoints( lookups );
// build the block if we win
if( m_error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( &m_index, indices );
// save the block
WriteColourBlock3( m_start, m_end, indices, block );
// save the error
m_besterror = m_error;
}
}
void SingleColourFit::Compress4( void* block )
{
// build the table of lookups
SingleColourLookup const* const lookups[] =
{
lookup_5_4,
lookup_6_4,
lookup_5_4
};
// find the best end-points and index
ComputeEndPoints( lookups );
// build the block if we win
if( m_error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( &m_index, indices );
// save the block
WriteColourBlock4( m_start, m_end, indices, block );
// save the error
m_besterror = m_error;
}
}
void SingleColourFit::ComputeEndPoints( SingleColourLookup const* const* lookups )
{
// check each index combination (endpoint or intermediate)
m_error = INT_MAX;
for( int index = 0; index < 2; ++index )
{
// check the error for this codebook index
SourceBlock const* sources[3];
int error = 0;
for( int channel = 0; channel < 3; ++channel )
{
// grab the lookup table and index for this channel
SingleColourLookup const* lookup = lookups[channel];
int target = m_colour[channel];
// store a pointer to the source for this channel
sources[channel] = lookup[target].sources + index;
// accumulate the error
int diff = sources[channel]->error;
error += diff*diff;
}
// keep it if the error is lower
if( error < m_error )
{
m_start = Vec3(
( float )sources[0]->start/31.0f,
( float )sources[1]->start/63.0f,
( float )sources[2]->start/31.0f
);
m_end = Vec3(
( float )sources[0]->end/31.0f,
( float )sources[1]->end/63.0f,
( float )sources[2]->end/31.0f
);
m_index = ( u8 )( 2*index );
m_error = error;
}
}
}
} // namespace squish

@ -0,0 +1,58 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SINGLECOLOURFIT_H
#define SQUISH_SINGLECOLOURFIT_H
#include "squish.h"
#include "colourfit.h"
namespace squish {
class ColourSet;
struct SingleColourLookup;
class SingleColourFit : public ColourFit
{
public:
SingleColourFit( ColourSet const* colours, int flags );
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
void ComputeEndPoints( SingleColourLookup const* const* lookups );
u8 m_colour[3];
Vec3 m_start;
Vec3 m_end;
u8 m_index;
int m_error;
int m_besterror;
};
} // namespace squish
#endif // ndef SQUISH_SINGLECOLOURFIT_H

File diff suppressed because it is too large Load Diff

@ -0,0 +1,403 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include <string.h>
#include "squish.h"
#include "colourset.h"
#include "maths.h"
#include "rangefit.h"
#include "clusterfit.h"
#include "colourblock.h"
#include "alpha.h"
#include "singlecolourfit.h"
namespace squish {
static int FixFlags( int flags )
{
// grab the flag bits
int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
int extra = flags & kWeightColourByAlpha;
// set defaults
if ( method != kDxt3
&& method != kDxt5
&& method != kBc4
&& method != kBc5 )
{
method = kDxt1;
}
if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
fit = kColourClusterFit;
// done
return method | fit | extra;
}
void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
{
// fix any bad flags
flags = FixFlags( flags );
if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
{
u8 alpha[16*4];
for( int i = 0; i < 16; ++i )
{
alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
}
u8* rBlock = reinterpret_cast< u8* >( block );
CompressAlphaDxt5( alpha, mask, rBlock );
if ( ( flags & ( kBc5 ) ) != 0 )
{
for( int i = 0; i < 16; ++i )
{
alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
}
u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
CompressAlphaDxt5( alpha, mask, gBlock );
}
return;
}
// get the block locations
void* colourBlock = block;
void* alphaBlock = block;
if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
colourBlock = reinterpret_cast< u8* >( block ) + 8;
// create the minimal point set
ColourSet colours( rgba, mask, flags );
// check the compression type and compress colour
if( colours.GetCount() == 1 )
{
// always do a single colour fit
SingleColourFit fit( &colours, flags );
fit.Compress( colourBlock );
}
else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
{
// do a range fit
RangeFit fit( &colours, flags, metric );
fit.Compress( colourBlock );
}
else
{
// default to a cluster fit (could be iterative or not)
ClusterFit fit( &colours, flags, metric );
fit.Compress( colourBlock );
}
// compress alpha separately if necessary
if( ( flags & kDxt3 ) != 0 )
CompressAlphaDxt3( rgba, mask, alphaBlock );
else if( ( flags & kDxt5 ) != 0 )
CompressAlphaDxt5( rgba, mask, alphaBlock );
}
void Decompress( u8* rgba, void const* block, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// get the block locations
void const* colourBlock = block;
void const* alphaBlock = block;
if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
// decompress colour
DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
// decompress alpha separately if necessary
if( ( flags & kDxt3 ) != 0 )
DecompressAlphaDxt3( rgba, alphaBlock );
else if( ( flags & kDxt5 ) != 0 )
DecompressAlphaDxt5( rgba, alphaBlock );
}
int GetStorageRequirements( int width, int height, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// compute the storage requirements
int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
return blockcount*blocksize;
}
void CopyRGBA( u8 const* source, u8* dest, int flags )
{
if (flags & kSourceBGRA)
{
// convert from bgra to rgba
dest[0] = source[2];
dest[1] = source[1];
dest[2] = source[0];
dest[3] = source[3];
}
else
{
for( int i = 0; i < 4; ++i )
*dest++ = *source++;
}
}
void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
{
// fix any bad flags
flags = FixFlags( flags );
// loop over blocks
#ifdef SQUISH_USE_OPENMP
# pragma omp parallel for
#endif
for( int y = 0; y < height; y += 4 )
{
// initialise the block output
u8* targetBlock = reinterpret_cast< u8* >( blocks );
int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
for( int x = 0; x < width; x += 4 )
{
// build the 4x4 block of pixels
u8 sourceRgba[16*4];
u8* targetPixel = sourceRgba;
int mask = 0;
for( int py = 0; py < 4; ++py )
{
for( int px = 0; px < 4; ++px )
{
// get the source pixel in the image
int sx = x + px;
int sy = y + py;
// enable if we're in the image
if( sx < width && sy < height )
{
// copy the rgba value
u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
CopyRGBA(sourcePixel, targetPixel, flags);
// enable this pixel
mask |= ( 1 << ( 4*py + px ) );
}
// advance to the next pixel
targetPixel += 4;
}
}
// compress it into the output
CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
// advance
targetBlock += bytesPerBlock;
}
}
}
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
{
CompressImage(rgba, width, height, width*4, blocks, flags, metric);
}
void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// loop over blocks
#ifdef SQUISH_USE_OPENMP
# pragma omp parallel for
#endif
for( int y = 0; y < height; y += 4 )
{
// initialise the block input
u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
for( int x = 0; x < width; x += 4 )
{
// decompress the block
u8 targetRgba[4*16];
Decompress( targetRgba, sourceBlock, flags );
// write the decompressed pixels to the correct image locations
u8 const* sourcePixel = targetRgba;
for( int py = 0; py < 4; ++py )
{
for( int px = 0; px < 4; ++px )
{
// get the target location
int sx = x + px;
int sy = y + py;
// write if we're in the image
if( sx < width && sy < height )
{
// copy the rgba value
u8* targetPixel = rgba + pitch*sy + 4*sx;
CopyRGBA(sourcePixel, targetPixel, flags);
}
// advance to the next pixel
sourcePixel += 4;
}
}
// advance
sourceBlock += bytesPerBlock;
}
}
}
void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
{
DecompressImage( rgba, width, height, width*4, blocks, flags );
}
static double ErrorSq(double x, double y)
{
return (x - y) * (x - y);
}
static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
{
// Computes the MSE for the block and weights it by the variance of the original block.
// If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
// then the block is close to being a single colour. Quantisation errors in single colour blocks
// are easier to see than similar errors in blocks that contain more colours, particularly when there
// are many such blocks in a large area (eg a blue sky background) as they cause banding. Given that
// banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
// of 5. This implies that images with large, single colour areas will have a higher potential WMSE
// than images with lots of detail.
cmse = amse = 0;
unsigned int sum_p[4]; // per channel sum of pixels
unsigned int sum_p2[4]; // per channel sum of pixels squared
memset(sum_p, 0, sizeof(sum_p));
memset(sum_p2, 0, sizeof(sum_p2));
for( unsigned int py = 0; py < 4; ++py )
{
for( unsigned int px = 0; px < 4; ++px )
{
if( px < w && py < h )
{
double pixelCMSE = 0;
for( int i = 0; i < 3; ++i )
{
pixelCMSE += ErrorSq(original[i], compressed[i]);
sum_p[i] += original[i];
sum_p2[i] += (unsigned int)original[i]*original[i];
}
if( original[3] == 0 && compressed[3] == 0 )
pixelCMSE = 0; // transparent in both, so colour is inconsequential
amse += ErrorSq(original[3], compressed[3]);
cmse += pixelCMSE;
sum_p[3] += original[3];
sum_p2[3] += (unsigned int)original[3]*original[3];
}
original += 4;
compressed += 4;
}
}
unsigned int variance = 0;
for( int i = 0; i < 4; ++i )
variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
if( variance < 4 * w * w * h * h )
{
amse *= 5;
cmse *= 5;
}
}
void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
{
// fix any bad flags
flags = FixFlags( flags );
colourMSE = alphaMSE = 0;
// initialise the block input
squish::u8 const* sourceBlock = dxt;
int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
// loop over blocks
for( int y = 0; y < height; y += 4 )
{
for( int x = 0; x < width; x += 4 )
{
// decompress the block
u8 targetRgba[4*16];
Decompress( targetRgba, sourceBlock, flags );
u8 const* sourcePixel = targetRgba;
// copy across to a similar pixel block
u8 originalRgba[4*16];
u8* originalPixel = originalRgba;
for( int py = 0; py < 4; ++py )
{
for( int px = 0; px < 4; ++px )
{
int sx = x + px;
int sy = y + py;
if( sx < width && sy < height )
{
u8 const* targetPixel = rgba + pitch*sy + 4*sx;
CopyRGBA(targetPixel, originalPixel, flags);
}
sourcePixel += 4;
originalPixel += 4;
}
}
// compute the weighted MSE of the block
double blockCMSE, blockAMSE;
ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
colourMSE += blockCMSE;
alphaMSE += blockAMSE;
// advance
sourceBlock += bytesPerBlock;
}
}
colourMSE /= (width * height * 3);
alphaMSE /= (width * height);
}
void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
{
ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
}
} // namespace squish

@ -28,6 +28,9 @@ TARGET_LINK_LIBRARIES(cubemaptest nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvhdrtest hdrtest.cpp)
TARGET_LINK_LIBRARIES(nvhdrtest nvcore nvimage nvtt bc6h nvmath)
ADD_EXECUTABLE(bc1enc bc1enc.cpp)
TARGET_LINK_LIBRARIES(bc1enc nvcore nvimage nvmath nvtt squish CMP_Core)
INSTALL(TARGETS nvtestsuite nvhdrtest DESTINATION bin)
#include_directories("/usr/include/ffmpeg/")

Loading…
Cancel
Save