diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt
index 90cb4e1..7e71391 100644
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -4,3 +4,10 @@ IF(WIN32)
 ENDIF(WIN32)
 
 ADD_SUBDIRECTORY(poshlib)
+
+ADD_SUBDIRECTORY(EtcLib)
+ADD_SUBDIRECTORY(rg_etc1_v104)
+ADD_SUBDIRECTORY(etcpack)
+
+ADD_SUBDIRECTORY(butteraugli)
+
diff --git a/extern/EtcLib/CMakeLists.txt b/extern/EtcLib/CMakeLists.txt
new file mode 100644
index 0000000..b584b88
--- /dev/null
+++ b/extern/EtcLib/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright 2015 The Etc2Comp Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+project(EtcLib)
+include_directories(./Etc)
+include_directories(./EtcCodec)
+
+file(GLOB SOURCES
+	${PROJECT_SOURCE_DIR}/Etc/*.h
+	${PROJECT_SOURCE_DIR}/EtcCodec/*.h
+	${PROJECT_SOURCE_DIR}/Etc/*.cpp
+	${PROJECT_SOURCE_DIR}/EtcCodec/*.cpp)
+ADD_LIBRARY(EtcLib ${SOURCES})
diff --git a/extern/EtcLib/Etc/Etc.cpp b/extern/EtcLib/Etc/Etc.cpp
new file mode 100644
index 0000000..87e1d9b
--- /dev/null
+++ b/extern/EtcLib/Etc/Etc.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EtcConfig.h"
+#include "Etc.h"
+
+#include <string.h>
+
+namespace Etc
+{
+	// ----------------------------------------------------------------------------------------------------
+	// C-style inteface to the encoder
+	//
+	void Encode(float *a_pafSourceRGBA,
+				unsigned int a_uiSourceWidth, 
+				unsigned int a_uiSourceHeight,
+				Image::Format a_format,
+				ErrorMetric a_eErrMetric,
+				float a_fEffort,
+				unsigned int a_uiJobs,
+				unsigned int a_uiMaxJobs,
+				unsigned char **a_ppaucEncodingBits,
+				unsigned int *a_puiEncodingBitsBytes,
+				unsigned int *a_puiExtendedWidth,
+				unsigned int *a_puiExtendedHeight, 
+				int *a_piEncodingTime_ms, bool a_bVerboseOutput)
+	{
+
+		Image image(a_pafSourceRGBA, a_uiSourceWidth,
+					a_uiSourceHeight,
+					a_eErrMetric);
+		image.m_bVerboseOutput = a_bVerboseOutput;
+		image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
+
+		*a_ppaucEncodingBits = image.GetEncodingBits();
+		*a_puiEncodingBitsBytes = image.GetEncodingBitsBytes();
+		*a_puiExtendedWidth = image.GetExtendedWidth();
+		*a_puiExtendedHeight = image.GetExtendedHeight();
+		*a_piEncodingTime_ms = image.GetEncodingTimeMs();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}
diff --git a/extern/EtcLib/Etc/Etc.h b/extern/EtcLib/Etc/Etc.h
new file mode 100644
index 0000000..df687e2
--- /dev/null
+++ b/extern/EtcLib/Etc/Etc.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcConfig.h"
+#include "EtcImage.h"
+#include "EtcColor.h"
+#include "EtcErrorMetric.h"
+
+#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f)
+#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f)
+#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f)
+
+namespace Etc
+{
+	class Block4x4EncodingBits;
+
+	// C-style inteface to the encoder
+	void Encode(float *a_pafSourceRGBA,
+				unsigned int a_uiSourceWidth,
+				unsigned int a_uiSourceHeight,
+				Image::Format a_format,
+				ErrorMetric a_eErrMetric,
+				float a_fEffort,
+				unsigned int a_uiJobs,
+				unsigned int a_uimaxJobs,
+				unsigned char **a_ppaucEncodingBits,
+				unsigned int *a_puiEncodingBitsBytes,
+				unsigned int *a_puiExtendedWidth,
+				unsigned int *a_puiExtendedHeight,
+				int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
+
+}
diff --git a/extern/EtcLib/Etc/EtcColor.h b/extern/EtcLib/Etc/EtcColor.h
new file mode 100644
index 0000000..0bd86e8
--- /dev/null
+++ b/extern/EtcLib/Etc/EtcColor.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <math.h>
+
+namespace Etc
+{
+
+	inline float LogToLinear(float a_fLog)
+	{
+		static const float ALPHA = 0.055f;
+		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
+
+		if (a_fLog <= 0.04045f)
+		{
+			return a_fLog / 12.92f;
+		}
+		else
+		{
+			return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f);
+		}
+	}
+
+	inline float LinearToLog(float &a_fLinear)
+	{
+		static const float ALPHA = 0.055f;
+		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
+
+		if (a_fLinear <= 0.0031308f)
+		{
+			return 12.92f * a_fLinear;
+		}
+		else
+		{
+			return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA;
+		}
+	}
+
+	class ColorR8G8B8A8
+	{
+	public:
+
+		unsigned char ucR;
+		unsigned char ucG;
+		unsigned char ucB;
+		unsigned char ucA;
+
+	};
+}
diff --git a/extern/EtcLib/Etc/EtcColorFloatRGBA.h b/extern/EtcLib/Etc/EtcColorFloatRGBA.h
new file mode 100644
index 0000000..5afd6ef
--- /dev/null
+++ b/extern/EtcLib/Etc/EtcColorFloatRGBA.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcConfig.h"
+#include "EtcColor.h"
+
+#include <math.h>
+
+namespace Etc
+{
+
+	class ColorFloatRGBA
+    {
+    public:
+
+		ColorFloatRGBA(void)
+        {
+            fR = fG = fB = fA = 0.0f;
+        }
+
+		ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA)
+        {
+            fR = a_fR;
+            fG = a_fG;
+            fB = a_fB;
+            fA = a_fA;
+        }
+
+		inline ColorFloatRGBA operator+(ColorFloatRGBA& a_rfrgba)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR + a_rfrgba.fR;
+			frgba.fG = fG + a_rfrgba.fG;
+			frgba.fB = fB + a_rfrgba.fB;
+			frgba.fA = fA + a_rfrgba.fA;
+			return frgba;
+		}
+
+		inline ColorFloatRGBA operator+(float a_f)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR + a_f;
+			frgba.fG = fG + a_f;
+			frgba.fB = fB + a_f;
+			frgba.fA = fA;
+			return frgba;
+		}
+
+		inline ColorFloatRGBA operator-(float a_f)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR - a_f;
+			frgba.fG = fG - a_f;
+			frgba.fB = fB - a_f;
+			frgba.fA = fA;
+			return frgba;
+		}
+
+		inline ColorFloatRGBA operator-(ColorFloatRGBA& a_rfrgba)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR - a_rfrgba.fR;
+			frgba.fG = fG - a_rfrgba.fG;
+			frgba.fB = fB - a_rfrgba.fB;
+			frgba.fA = fA - a_rfrgba.fA;
+			return frgba;
+		}
+
+		inline ColorFloatRGBA operator*(float a_f)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR * a_f;
+			frgba.fG = fG * a_f;
+			frgba.fB = fB * a_f;
+			frgba.fA = fA;
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ScaleRGB(float a_f)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = a_f * fR;
+			frgba.fG = a_f * fG;
+			frgba.fB = a_f * fB;
+			frgba.fA = fA;
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA RoundRGB(void)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = roundf(fR);
+			frgba.fG = roundf(fG);
+			frgba.fB = roundf(fB);
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ToLinear()
+		{
+			ColorFloatRGBA frgbaLinear;
+			frgbaLinear.fR = LogToLinear(fR);
+			frgbaLinear.fG = LogToLinear(fG);
+			frgbaLinear.fB = LogToLinear(fB);
+			frgbaLinear.fA = fA;
+
+			return frgbaLinear;
+		}
+
+		inline ColorFloatRGBA ToLog(void)
+		{
+			ColorFloatRGBA frgbaLog;
+			frgbaLog.fR = LinearToLog(fR);
+			frgbaLog.fG = LinearToLog(fG);
+			frgbaLog.fB = LinearToLog(fB);
+			frgbaLog.fA = fA;
+
+			return frgbaLog;
+		}
+
+		inline static ColorFloatRGBA ConvertFromRGBA8(unsigned char a_ucR, 
+			unsigned char a_ucG, unsigned char a_ucB, unsigned char a_ucA)
+		{
+			ColorFloatRGBA frgba;
+
+			frgba.fR = (float)a_ucR / 255.0f;
+			frgba.fG = (float)a_ucG / 255.0f;
+			frgba.fB = (float)a_ucB / 255.0f;
+			frgba.fA = (float)a_ucA / 255.0f;
+
+			return frgba;
+		}
+
+		inline static ColorFloatRGBA ConvertFromRGB4(unsigned char a_ucR4,
+														unsigned char a_ucG4,
+														unsigned char a_ucB4)
+		{
+			ColorFloatRGBA frgba;
+
+			unsigned char ucR8 = (unsigned char)((a_ucR4 << 4) + a_ucR4);
+			unsigned char ucG8 = (unsigned char)((a_ucG4 << 4) + a_ucG4);
+			unsigned char ucB8 = (unsigned char)((a_ucB4 << 4) + a_ucB4);
+
+			frgba.fR = (float)ucR8 / 255.0f;
+			frgba.fG = (float)ucG8 / 255.0f;
+			frgba.fB = (float)ucB8 / 255.0f;
+			frgba.fA = 1.0f;
+
+			return frgba;
+		}
+
+		inline static ColorFloatRGBA ConvertFromRGB5(unsigned char a_ucR5,
+			unsigned char a_ucG5,
+			unsigned char a_ucB5)
+		{
+			ColorFloatRGBA frgba;
+
+			unsigned char ucR8 = (unsigned char)((a_ucR5 << 3) + (a_ucR5 >> 2));
+			unsigned char ucG8 = (unsigned char)((a_ucG5 << 3) + (a_ucG5 >> 2));
+			unsigned char ucB8 = (unsigned char)((a_ucB5 << 3) + (a_ucB5 >> 2));
+
+			frgba.fR = (float)ucR8 / 255.0f;
+			frgba.fG = (float)ucG8 / 255.0f;
+			frgba.fB = (float)ucB8 / 255.0f;
+			frgba.fA = 1.0f;
+
+			return frgba;
+		}
+
+		inline static ColorFloatRGBA ConvertFromR6G7B6(unsigned char a_ucR6,
+			unsigned char a_ucG7,
+			unsigned char a_ucB6)
+		{
+			ColorFloatRGBA frgba;
+
+			unsigned char ucR8 = (unsigned char)((a_ucR6 << 2) + (a_ucR6 >> 4));
+			unsigned char ucG8 = (unsigned char)((a_ucG7 << 1) + (a_ucG7 >> 6));
+			unsigned char ucB8 = (unsigned char)((a_ucB6 << 2) + (a_ucB6 >> 4));
+
+			frgba.fR = (float)ucR8 / 255.0f;
+			frgba.fG = (float)ucG8 / 255.0f;
+			frgba.fB = (float)ucB8 / 255.0f;
+			frgba.fA = 1.0f;
+
+			return frgba;
+		}
+
+		// quantize to 4 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR4G4B4(void) const
+		{
+			ColorFloatRGBA frgba = *this;
+
+			// quantize to 4 bits
+			frgba = frgba.ClampRGB().ScaleRGB(15.0f).RoundRGB();
+			unsigned int uiR4 = (unsigned int)frgba.fR;
+			unsigned int uiG4 = (unsigned int)frgba.fG;
+			unsigned int uiB4 = (unsigned int)frgba.fB;
+
+			// expand to 8 bits
+			frgba.fR = (float) ((uiR4 << 4) + uiR4);
+			frgba.fG = (float) ((uiG4 << 4) + uiG4);
+			frgba.fB = (float) ((uiB4 << 4) + uiB4);
+
+			frgba = frgba.ScaleRGB(1.0f/255.0f);
+
+			return frgba;
+		}
+
+		// quantize to 5 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR5G5B5(void) const
+		{
+			ColorFloatRGBA frgba = *this;
+
+			// quantize to 5 bits
+			frgba = frgba.ClampRGB().ScaleRGB(31.0f).RoundRGB();
+			unsigned int uiR5 = (unsigned int)frgba.fR;
+			unsigned int uiG5 = (unsigned int)frgba.fG;
+			unsigned int uiB5 = (unsigned int)frgba.fB;
+
+			// expand to 8 bits
+			frgba.fR = (float)((uiR5 << 3) + (uiR5 >> 2));
+			frgba.fG = (float)((uiG5 << 3) + (uiG5 >> 2));
+			frgba.fB = (float)((uiB5 << 3) + (uiB5 >> 2));
+
+			frgba = frgba.ScaleRGB(1.0f / 255.0f);
+
+			return frgba;
+		}
+
+		// quantize to 6/7/6 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR6G7B6(void) const
+		{
+			ColorFloatRGBA frgba = *this;
+
+			// quantize to 6/7/6 bits
+			ColorFloatRGBA frgba6 = frgba.ClampRGB().ScaleRGB(63.0f).RoundRGB();
+			ColorFloatRGBA frgba7 = frgba.ClampRGB().ScaleRGB(127.0f).RoundRGB();
+			unsigned int uiR6 = (unsigned int)frgba6.fR;
+			unsigned int uiG7 = (unsigned int)frgba7.fG;
+			unsigned int uiB6 = (unsigned int)frgba6.fB;
+
+			// expand to 8 bits
+			frgba.fR = (float)((uiR6 << 2) + (uiR6 >> 4));
+			frgba.fG = (float)((uiG7 << 1) + (uiG7 >> 6));
+			frgba.fB = (float)((uiB6 << 2) + (uiB6 >> 4));
+
+			frgba = frgba.ScaleRGB(1.0f / 255.0f);
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ClampRGB(void)
+		{
+			ColorFloatRGBA frgba = *this;
+			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
+			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
+			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
+			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
+			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
+			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ClampRGBA(void)
+		{
+			ColorFloatRGBA frgba = *this;
+			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
+			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
+			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
+			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
+			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
+			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
+			if (frgba.fA < 0.0f) { frgba.fA = 0.0f; }
+			if (frgba.fA > 1.0f) { frgba.fA = 1.0f; }
+
+			return frgba;
+		}
+
+		inline int IntRed(float a_fScale)
+		{
+			return (int)roundf(fR * a_fScale);
+		}
+
+		inline int IntGreen(float a_fScale)
+		{
+			return (int)roundf(fG * a_fScale);
+		}
+
+		inline int IntBlue(float a_fScale)
+		{
+			return (int)roundf(fB * a_fScale);
+		}
+
+		inline int IntAlpha(float a_fScale)
+		{
+			return (int)roundf(fA * a_fScale);
+		}
+
+		float	fR, fG, fB, fA;
+    };
+
+}
+
diff --git a/extern/EtcLib/Etc/EtcConfig.h b/extern/EtcLib/Etc/EtcConfig.h
new file mode 100644
index 0000000..6ce1b8c
--- /dev/null
+++ b/extern/EtcLib/Etc/EtcConfig.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifdef _WIN32
+#define ETC_WINDOWS (1)
+#else
+#define ETC_WINDOWS (0)
+#endif
+
+#if __APPLE__
+#define ETC_OSX (1)
+#else
+#define ETC_OSX (0)
+#endif
+
+#if __unix__
+#define ETC_UNIX (1)
+#else
+#define ETC_UNIX (0)
+#endif
+
+
+// short names for common types
+#include <stdint.h>
+typedef int8_t i8;
+typedef int16_t i16;
+typedef int32_t i32;
+typedef int64_t i64;
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef float	f32;
+typedef double	f64;
+
+// Keep asserts enabled in release builds during development
+#undef NDEBUG
+
+// 0=disable. stb_image can be used if you need to compress 
+//other image formats like jpg
+#define USE_STB_IMAGE_LOAD 0	
+
+#if ETC_WINDOWS
+#include <SDKDDKVer.h>
+#define _CRT_SECURE_NO_WARNINGS (1)
+#include <tchar.h>
+#endif
+
+#include <stdio.h>
+
diff --git a/extern/EtcLib/Etc/EtcImage.cpp b/extern/EtcLib/Etc/EtcImage.cpp
new file mode 100644
index 0000000..7ca3519
--- /dev/null
+++ b/extern/EtcLib/Etc/EtcImage.cpp
@@ -0,0 +1,685 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcImage.cpp
+
+Image is an array of 4x4 blocks that represent the encoding of the source image
+
+*/
+
+#include "EtcConfig.h"
+
+#include <stdlib.h>
+
+#include "EtcImage.h"
+
+#include "Etc.h"
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcSortedBlockList.h"
+
+#if ETC_WINDOWS
+#include <windows.h>
+#endif
+#include <ctime>
+#include <chrono>
+#include <future>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+// fix conflict with Block4x4::AlphaMix
+#ifdef OPAQUE
+#undef OPAQUE
+#endif
+#ifdef TRANSPARENT
+#undef TRANSPARENT
+#endif
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Image::Image(void)
+	{
+		m_encodingStatus = EncodingStatus::SUCCESS;
+		m_warningsToCapture = EncodingStatus::SUCCESS;
+		m_pafrgbaSource = nullptr;
+
+		m_pablock = nullptr;
+
+		m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
+		m_uiEncodingBitsBytes = 0;
+		m_paucEncodingBits = nullptr;
+
+		m_format = Format::UNKNOWN;
+		m_iNumOpaquePixels = 0;
+		m_iNumTranslucentPixels = 0;
+		m_iNumTransparentPixels = 0;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// constructor using source image
+	// used to set state before Encode() is called
+	//
+    Image::Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
+					unsigned int a_uiSourceHeight, 
+					ErrorMetric a_errormetric)
+	{
+		m_encodingStatus = EncodingStatus::SUCCESS;
+		m_warningsToCapture = EncodingStatus::SUCCESS;
+        m_pafrgbaSource = (ColorFloatRGBA *) a_pafSourceRGBA;
+		m_uiSourceWidth = a_uiSourceWidth;
+		m_uiSourceHeight = a_uiSourceHeight;
+
+		m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
+		m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
+
+		m_uiBlockColumns = m_uiExtendedWidth >> 2;
+		m_uiBlockRows = m_uiExtendedHeight >> 2;
+
+		m_pablock = new Block4x4[GetNumberOfBlocks()];
+		assert(m_pablock);
+
+		m_format = Format::UNKNOWN;
+
+		m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
+		m_uiEncodingBitsBytes = 0;
+		m_paucEncodingBits = nullptr;
+
+		m_errormetric = a_errormetric;
+		m_fEffort = 0.0f;
+
+		m_iEncodeTime_ms = -1;
+
+		m_iNumOpaquePixels = 0;
+		m_iNumTranslucentPixels = 0;
+		m_iNumTransparentPixels = 0;
+		m_bVerboseOutput = false;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// constructor using encoding bits
+	// recreates encoding state using a previously encoded image
+	//
+	Image::Image(Format a_format,
+					unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
+					unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
+					Image *a_pimageSource, ErrorMetric a_errormetric)
+	{
+		m_encodingStatus = EncodingStatus::SUCCESS;
+		m_pafrgbaSource = nullptr;
+		m_uiSourceWidth = a_uiSourceWidth;
+		m_uiSourceHeight = a_uiSourceHeight;
+
+		m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
+		m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
+
+		m_uiBlockColumns = m_uiExtendedWidth >> 2;
+		m_uiBlockRows = m_uiExtendedHeight >> 2;
+
+		unsigned int uiBlocks = GetNumberOfBlocks();
+
+		m_pablock = new Block4x4[uiBlocks];
+		assert(m_pablock);
+
+		m_format = a_format;
+
+		m_iNumOpaquePixels = 0;
+		m_iNumTranslucentPixels = 0;
+		m_iNumTransparentPixels = 0;
+		
+		m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
+		if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN)
+		{
+			AddToEncodingStatus(ERROR_UNKNOWN_FORMAT);
+			return;
+		}
+		m_uiEncodingBitsBytes = a_uiEncodingBitsBytes;
+		m_paucEncodingBits = a_paucEncidingBits;
+
+		m_errormetric = a_errormetric;
+		m_fEffort = 0.0f;
+		m_bVerboseOutput = false;
+		m_iEncodeTime_ms = -1;
+		
+		unsigned char *paucEncodingBits = m_paucEncodingBits;
+		unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+
+		unsigned int uiH = 0;
+		unsigned int uiV = 0;
+		for (unsigned int uiBlock = 0; uiBlock < uiBlocks; uiBlock++)
+		{
+			m_pablock[uiBlock].InitFromEtcEncodingBits(a_format, uiH, uiV, paucEncodingBits, 
+														a_pimageSource, a_errormetric);
+			paucEncodingBits += uiEncodingBitsBytesPerBlock;
+			uiH += 4;
+			if (uiH >= m_uiSourceWidth)
+			{
+				uiH = 0;
+				uiV += 4;
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Image::~Image(void)
+	{
+		if (m_pablock != nullptr)
+		{
+			delete[] m_pablock;
+			m_pablock = nullptr;
+		}
+
+		/*if (m_paucEncodingBits != nullptr)
+		{
+			delete[] m_paucEncodingBits;
+			m_paucEncodingBits = nullptr;
+		}*/
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// encode an image
+	// create a set of encoding bits that conforms to a_format
+	// find best fit using a_errormetric
+	// explore a range of possible encodings based on a_fEffort (range = [0:100])
+	// speed up process using a_uiJobs as the number of process threads (a_uiJobs must not excede a_uiMaxJobs)
+	//
+	Image::EncodingStatus Image::Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, unsigned int a_uiJobs, unsigned int a_uiMaxJobs)
+	{
+
+		auto start = std::chrono::steady_clock::now();
+		
+		m_encodingStatus = EncodingStatus::SUCCESS;
+
+		m_format = a_format;
+		m_errormetric = a_errormetric;
+		m_fEffort = a_fEffort;
+
+		if (m_errormetric < 0 || m_errormetric > ERROR_METRICS)
+		{
+			AddToEncodingStatus(ERROR_UNKNOWN_ERROR_METRIC);
+			return m_encodingStatus;
+		}
+
+		if (m_fEffort < ETCCOMP_MIN_EFFORT_LEVEL)
+		{
+			AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE);
+			m_fEffort = ETCCOMP_MIN_EFFORT_LEVEL;
+		}
+		else if (m_fEffort > ETCCOMP_MAX_EFFORT_LEVEL)
+		{
+			AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE);
+			m_fEffort = ETCCOMP_MAX_EFFORT_LEVEL;
+		}
+		if (a_uiJobs < 1)
+		{
+			a_uiJobs = 1;
+			AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE);
+		}
+		else if (a_uiJobs > a_uiMaxJobs)
+		{
+			a_uiJobs = a_uiMaxJobs;
+			AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE);
+		}
+
+		m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
+
+		if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN)
+		{
+			AddToEncodingStatus(ERROR_UNKNOWN_FORMAT);
+			return m_encodingStatus;
+		}
+
+		assert(m_paucEncodingBits == nullptr);
+		m_uiEncodingBitsBytes = GetNumberOfBlocks() * Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+		m_paucEncodingBits = new unsigned char[m_uiEncodingBitsBytes];
+
+		InitBlocksAndBlockSorter();
+
+
+		std::future<void> *handle = new std::future<void>[a_uiMaxJobs];
+
+		unsigned int uiNumThreadsNeeded = 0;
+		unsigned int uiUnfinishedBlocks = GetNumberOfBlocks();
+
+		uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs;
+			
+		for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
+		{
+			handle[i] = async(std::launch::async, &Image::RunFirstPass, this, i, uiNumThreadsNeeded);
+		}
+
+		RunFirstPass(uiNumThreadsNeeded - 1, uiNumThreadsNeeded);
+
+		for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
+		{
+			handle[i].get();
+		}
+
+		// perform effort-based encoding
+		if (m_fEffort > ETCCOMP_MIN_EFFORT_LEVEL)
+		{
+			unsigned int uiFinishedBlocks = 0;
+			unsigned int uiTotalEffortBlocks = static_cast<unsigned int>(roundf(0.01f * m_fEffort  * GetNumberOfBlocks()));
+
+			if (m_bVerboseOutput)
+			{
+				printf("effortblocks = %d\n", uiTotalEffortBlocks);
+			}
+			unsigned int uiPass = 0;
+			while (1)
+			{
+				if (m_bVerboseOutput)
+				{
+					uiPass++;
+					printf("pass %u\n", uiPass);
+				}
+				m_psortedblocklist->Sort();
+				uiUnfinishedBlocks = m_psortedblocklist->GetNumberOfSortedBlocks();
+				uiFinishedBlocks = GetNumberOfBlocks() - uiUnfinishedBlocks;
+				if (m_bVerboseOutput)
+				{
+					printf("    %u unfinished blocks\n", uiUnfinishedBlocks);
+					// m_psortedblocklist->Print();
+				}
+
+				
+
+				//stop enocding when we did enough to satify the effort percentage
+				if (uiFinishedBlocks >= uiTotalEffortBlocks)
+				{
+					if (m_bVerboseOutput)
+					{
+						printf("Finished %d Blocks out of %d\n", uiFinishedBlocks, uiTotalEffortBlocks);
+					}
+					break;
+				}
+
+				unsigned int uiIteratedBlocks = 0;
+				unsigned int blocksToIterateThisPass = (uiTotalEffortBlocks - uiFinishedBlocks);
+				uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs;
+
+				if (uiNumThreadsNeeded <= 1)
+				{
+					//since we already how many blocks each thread will process
+					//cap the thread limit to do the proper amount of work, and not more
+					uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, 0, 1);
+				}
+				else
+				{
+					//we have a lot of work to do, so lets multi thread it
+					std::future<unsigned int> *handleToBlockEncoders = new std::future<unsigned int>[uiNumThreadsNeeded-1];
+
+					for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
+					{
+						handleToBlockEncoders[i] = async(std::launch::async, &Image::IterateThroughWorstBlocks, this, blocksToIterateThisPass, i, uiNumThreadsNeeded);
+					}
+					uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, uiNumThreadsNeeded - 1, uiNumThreadsNeeded);
+
+					for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
+					{
+						uiIteratedBlocks += handleToBlockEncoders[i].get();
+					}
+
+					delete[] handleToBlockEncoders;
+				}
+
+				if (m_bVerboseOutput)
+				{
+					printf("    %u iterated blocks\n", uiIteratedBlocks);
+				}
+			}
+		}
+
+		// generate Etc2-compatible bit-format 4x4 blocks
+		for (int i = 0; i < (int)a_uiJobs - 1; i++)
+		{
+			handle[i] = async(std::launch::async, &Image::SetEncodingBits, this, i, a_uiJobs);
+		}
+		SetEncodingBits(a_uiJobs - 1, a_uiJobs);
+
+		for (int i = 0; i < (int)a_uiJobs - 1; i++)
+		{
+			handle[i].get();
+		}
+
+		auto end = std::chrono::steady_clock::now();
+		std::chrono::milliseconds elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+		m_iEncodeTime_ms = (int)elapsed.count();
+
+		delete[] handle;
+		delete m_psortedblocklist;
+		return m_encodingStatus;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// iterate the encoding thru the blocks with the worst error
+	// stop when a_uiMaxBlocks blocks have been iterated
+	// split the blocks between the process threads using a_uiMultithreadingOffset and a_uiMultithreadingStride
+	//
+	unsigned int Image::IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks, 
+													unsigned int a_uiMultithreadingOffset, 
+													unsigned int a_uiMultithreadingStride)
+	{
+		assert(a_uiMultithreadingStride > 0);
+		unsigned int uiIteratedBlocks = a_uiMultithreadingOffset;
+
+		SortedBlockList::Link *plink = m_psortedblocklist->GetLinkToFirstBlock();
+		for (plink = plink->Advance(a_uiMultithreadingOffset);
+				plink != nullptr;
+				plink = plink->Advance(a_uiMultithreadingStride) )
+		{
+			if (uiIteratedBlocks >= a_uiMaxBlocks)
+			{
+				break;
+			}
+
+			plink->GetBlock()->PerformEncodingIteration(m_fEffort);
+
+			uiIteratedBlocks += a_uiMultithreadingStride;	
+		}
+
+		return uiIteratedBlocks;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// determine which warnings to check for during Encode() based on encoding format
+	//
+	void Image::FindEncodingWarningTypesForCurFormat()
+	{
+		TrackEncodingWarning(WARNING_ALL_TRANSPARENT_PIXELS);
+		TrackEncodingWarning(WARNING_SOME_RGBA_NOT_0_TO_1);
+		switch (m_format)
+		{
+		case Image::Format::ETC1:
+		case Image::Format::RGB8:
+		case Image::Format::SRGB8:
+			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
+			break;
+
+		case Image::Format::RGB8A1:
+		case Image::Format::SRGB8A1:
+			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
+			TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS);
+			break;
+		case Image::Format::RGBA8:
+		case Image::Format::SRGBA8:
+			TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS);
+			break;
+
+		case Image::Format::R11:
+		case Image::Format::SIGNED_R11:
+			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO);
+			TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
+			break;
+
+		case Image::Format::RG11:
+		case Image::Format::SIGNED_RG11:
+			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
+			break;
+		case Image::Format::FORMATS:
+		case Image::Format::UNKNOWN:
+		default:
+			assert(0);
+			break;
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// examine source pixels to check for warnings
+	//
+	void Image::FindAndSetEncodingWarnings()
+	{
+		int numPixels = (m_uiBlockRows * 4) * (m_uiBlockColumns * 4);
+		if (m_iNumOpaquePixels == numPixels)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_OPAQUE_PIXELS);
+		}
+		if (m_iNumOpaquePixels < numPixels)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_NON_OPAQUE_PIXELS);
+		}
+		if (m_iNumTranslucentPixels > 0)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_TRANSLUCENT_PIXELS);
+		}
+		if (m_iNumTransparentPixels == numPixels)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_TRANSPARENT_PIXELS);
+		}
+		if (m_numColorValues.fB > 0.0f)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
+		}
+		if (m_numColorValues.fG > 0.0f) 
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO);
+		}
+
+		if (m_numOutOfRangeValues.fR > 0.0f || m_numOutOfRangeValues.fG > 0.0f)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1);
+		}
+		if (m_numOutOfRangeValues.fB > 0.0f || m_numOutOfRangeValues.fA > 0.0f)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1);
+		}
+	}
+	
+	// ----------------------------------------------------------------------------------------------------
+	// return a string name for a given image format
+	//
+	const char * Image::EncodingFormatToString(Image::Format a_format)
+	{
+		switch (a_format)
+		{
+		case Image::Format::ETC1:
+			return "ETC1";
+		case Image::Format::RGB8:
+			return "RGB8";
+		case Image::Format::SRGB8:
+			return "SRGB8";
+
+		case Image::Format::RGB8A1:
+			return "RGB8A1";
+		case Image::Format::SRGB8A1:
+			return "SRGB8A1";
+		case Image::Format::RGBA8:
+			return "RGBA8";
+		case Image::Format::SRGBA8:
+			return "SRGBA8";
+
+		case Image::Format::R11:
+			return "R11";
+		case Image::Format::SIGNED_R11:
+			return "SIGNED_R11";
+
+		case Image::Format::RG11:
+			return "RG11";
+		case Image::Format::SIGNED_RG11:
+			return "SIGNED_RG11";
+		case Image::Format::FORMATS:
+		case Image::Format::UNKNOWN:
+		default:
+			return "UNKNOWN";
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// return a string name for the image's format
+	//
+	const char * Image::EncodingFormatToString(void)
+	{
+		return EncodingFormatToString(m_format);
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// init image blocks prior to encoding
+	// init block sorter for subsequent sortings
+	// check for encoding warnings
+	//
+	void Image::InitBlocksAndBlockSorter(void)
+	{
+		
+		FindEncodingWarningTypesForCurFormat();
+
+		// init each block
+		Block4x4 *pblock = m_pablock;
+		unsigned char *paucEncodingBits = m_paucEncodingBits;
+		for (unsigned int uiBlockRow = 0; uiBlockRow < m_uiBlockRows; uiBlockRow++)
+		{
+			unsigned int uiBlockV = uiBlockRow * 4;
+
+			for (unsigned int uiBlockColumn = 0; uiBlockColumn < m_uiBlockColumns; uiBlockColumn++)
+			{
+				unsigned int uiBlockH = uiBlockColumn * 4;
+
+				pblock->InitFromSource(this, uiBlockH, uiBlockV, paucEncodingBits, m_errormetric);
+
+				paucEncodingBits += Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+
+				pblock++;
+			}
+		}
+
+		FindAndSetEncodingWarnings();
+
+		// init block sorter
+		{
+			m_psortedblocklist = new SortedBlockList(GetNumberOfBlocks(), 100);
+
+			for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++)
+			{
+				pblock = &m_pablock[uiBlock];
+				m_psortedblocklist->AddBlock(pblock);
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// run the first pass of the encoder
+	// the encoder generally finds a reasonable, fast encoding
+	// this is run on all blocks regardless of effort to ensure that all blocks have a valid encoding
+	//
+	void Image::RunFirstPass(unsigned int a_uiMultithreadingOffset, unsigned int a_uiMultithreadingStride)
+	{
+		assert(a_uiMultithreadingStride > 0);
+
+		for (unsigned int uiBlock = a_uiMultithreadingOffset;
+				uiBlock < GetNumberOfBlocks(); 
+				uiBlock += a_uiMultithreadingStride)
+		{
+			Block4x4 *pblock = &m_pablock[uiBlock];
+			pblock->PerformEncodingIteration(m_fEffort);
+		}
+	}
+
+    // ----------------------------------------------------------------------------------------------------
+	// set the encoding bits (for the output file) based on the best encoding for each block
+	//
+	void Image::SetEncodingBits(unsigned int a_uiMultithreadingOffset,
+								unsigned int a_uiMultithreadingStride)
+	{
+		assert(a_uiMultithreadingStride > 0);
+
+		for (unsigned int uiBlock = a_uiMultithreadingOffset; 
+				uiBlock < GetNumberOfBlocks(); 
+				uiBlock += a_uiMultithreadingStride)
+		{
+			Block4x4 *pblock = &m_pablock[uiBlock];
+			pblock->SetEncodingBitsFromEncoding();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// return the image error
+	// image error is the sum of all block errors
+	//
+	float Image::GetError(void)
+	{
+		float fError = 0.0f;
+
+		for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++)
+		{
+			Block4x4 *pblock = &m_pablock[uiBlock];
+			fError += pblock->GetError();
+		}
+
+		return fError;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// determine the encoding bits format based on the encoding format
+	// the encoding bits format is a family of bit encodings that are shared across various encoding formats
+	//
+	Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format)
+	{
+		Block4x4EncodingBits::Format encodingbitsformat;
+
+		// determine encoding bits format from image format
+		switch (a_format)
+		{
+		case Format::ETC1:
+		case Format::RGB8:
+		case Format::SRGB8:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGB8;
+			break;
+
+		case Format::RGBA8:
+		case Format::SRGBA8:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGBA8;
+			break;
+
+		case Format::R11:
+		case Format::SIGNED_R11:
+			encodingbitsformat = Block4x4EncodingBits::Format::R11;
+			break;
+
+		case Format::RG11:
+		case Format::SIGNED_RG11:
+			encodingbitsformat = Block4x4EncodingBits::Format::RG11;
+			break;
+
+		case Format::RGB8A1:
+		case Format::SRGB8A1:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1;
+			break;
+
+		default:
+			encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
+			break;
+		}
+
+		return encodingbitsformat;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}	// namespace Etc
diff --git a/extern/EtcLib/Etc/EtcImage.h b/extern/EtcLib/Etc/EtcImage.h
new file mode 100644
index 0000000..f937fce
--- /dev/null
+++ b/extern/EtcLib/Etc/EtcImage.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+//#include "Etc.h"
+#include "EtcColorFloatRGBA.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcErrorMetric.h"
+
+
+namespace Etc
+{
+	class Block4x4;
+	class EncoderSpec;
+	class SortedBlockList;
+
+    class Image
+    {
+    public:
+
+		//the differnt warning and errors that can come up during encoding
+		enum  EncodingStatus
+		{
+			SUCCESS = 0,
+			//
+			WARNING_THRESHOLD = 1 << 0,
+			//
+			WARNING_EFFORT_OUT_OF_RANGE = 1 << 1,
+			WARNING_JOBS_OUT_OF_RANGE = 1 << 2,
+			WARNING_SOME_NON_OPAQUE_PIXELS = 1 << 3,//just for opaque formats, etc1, rgb8, r11, rg11
+			WARNING_ALL_OPAQUE_PIXELS = 1 << 4,
+			WARNING_ALL_TRANSPARENT_PIXELS = 1 << 5,
+			WARNING_SOME_TRANSLUCENT_PIXELS = 1 << 6,//just for rgb8A1
+			WARNING_SOME_RGBA_NOT_0_TO_1 = 1 << 7,
+			WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO = 1 << 8,
+			WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO = 1 << 9,
+			//
+			ERROR_THRESHOLD = 1 << 16,
+			//
+			ERROR_UNKNOWN_FORMAT = 1 << 17,
+			ERROR_UNKNOWN_ERROR_METRIC = 1 << 18,
+			ERROR_ZERO_WIDTH_OR_HEIGHT = 1 << 19,
+			//
+		};
+		
+		enum class Format
+		{
+			UNKNOWN,
+			//
+			ETC1,
+			//
+			// ETC2 formats
+			RGB8,
+			SRGB8,
+			RGBA8,
+			SRGBA8,
+			R11,
+			SIGNED_R11,
+			RG11,
+			SIGNED_RG11,
+			RGB8A1,
+			SRGB8A1,
+			//
+			FORMATS,
+			//
+			DEFAULT = SRGB8
+		};
+
+		// constructor using source image
+        Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
+				unsigned int a_uiSourceHeight,
+				ErrorMetric a_errormetric);
+
+		// constructor using encoding bits
+		Image(Format a_format, 
+				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
+				unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
+				Image *a_pimageSource,
+				ErrorMetric a_errormetric);
+
+		~Image(void);
+
+		EncodingStatus Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, 
+			unsigned int a_uiJobs, unsigned int a_uiMaxJobs);
+
+		inline void AddToEncodingStatus(EncodingStatus a_encStatus)
+		{
+			m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus);
+		}
+		
+		inline unsigned int GetSourceWidth(void)
+		{
+			return m_uiSourceWidth;
+		}
+
+		inline unsigned int GetSourceHeight(void)
+		{
+			return m_uiSourceHeight;
+		}
+
+		inline unsigned int GetExtendedWidth(void)
+		{
+			return m_uiExtendedWidth;
+		}
+
+		inline unsigned int GetExtendedHeight(void)
+		{
+			return m_uiExtendedHeight;
+		}
+
+		inline unsigned int GetNumberOfBlocks()
+		{
+			return m_uiBlockColumns * m_uiBlockRows;
+		}
+
+		inline Block4x4 * GetBlocks()
+		{
+			return m_pablock;
+		}
+
+		inline unsigned char * GetEncodingBits(void)
+		{
+			return m_paucEncodingBits;
+		}
+
+		inline unsigned int GetEncodingBitsBytes(void)
+		{
+			return m_uiEncodingBitsBytes;
+		}
+
+		inline int GetEncodingTimeMs(void)
+		{
+			return m_iEncodeTime_ms;
+		}
+
+		float GetError(void);
+
+        inline ColorFloatRGBA * GetSourcePixel(unsigned int a_uiH, unsigned int a_uiV)
+		{
+			if (a_uiH >= m_uiSourceWidth || a_uiV >= m_uiSourceHeight)
+			{
+				return nullptr;
+			}
+
+			return &m_pafrgbaSource[a_uiV*m_uiSourceWidth + a_uiH];
+		}
+
+		inline Format GetFormat(void)
+		{
+			return m_format;
+		}
+
+		static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format);
+
+		inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension)
+		{
+			return (unsigned short)((a_ushOriginalDimension + 3) & ~3);
+		}
+
+		inline ErrorMetric GetErrorMetric(void)
+		{
+			return m_errormetric;
+		}
+
+		static const char * EncodingFormatToString(Image::Format a_format);
+		const char * EncodingFormatToString(void);
+		//used to get basic information about the image data
+		int m_iNumOpaquePixels;
+		int m_iNumTranslucentPixels;
+		int m_iNumTransparentPixels;
+
+		ColorFloatRGBA m_numColorValues;
+		ColorFloatRGBA m_numOutOfRangeValues;
+
+		bool m_bVerboseOutput;
+	private:
+		//add a warning or error to check for while encoding
+		inline void TrackEncodingWarning(EncodingStatus a_encStatus)
+		{
+			m_warningsToCapture = (EncodingStatus)((unsigned int)m_warningsToCapture | (unsigned int)a_encStatus);
+		}
+
+		//report the warning if it is something we care about for this encoding
+		inline void AddToEncodingStatusIfSignfigant(EncodingStatus a_encStatus)
+		{
+			if ((EncodingStatus)((unsigned int)m_warningsToCapture & (unsigned int)a_encStatus) == a_encStatus)
+			{
+				AddToEncodingStatus(a_encStatus);
+			}
+		}
+
+		Image(void);
+		void FindEncodingWarningTypesForCurFormat();
+		void FindAndSetEncodingWarnings();
+
+		void InitBlocksAndBlockSorter(void);
+
+		void RunFirstPass(unsigned int a_uiMultithreadingOffset, 
+							unsigned int a_uiMultithreadingStride);
+
+		void SetEncodingBits(unsigned int a_uiMultithreadingOffset,
+								unsigned int a_uiMultithreadingStride);
+
+		unsigned int IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks,
+												unsigned int a_uiMultithreadingOffset,
+												unsigned int a_uiMultithreadingStride);
+
+		// inputs
+        ColorFloatRGBA *m_pafrgbaSource;
+		unsigned int m_uiSourceWidth;
+		unsigned int m_uiSourceHeight;
+		unsigned int m_uiExtendedWidth;
+		unsigned int m_uiExtendedHeight;
+		unsigned int m_uiBlockColumns;
+		unsigned int m_uiBlockRows;
+		// intermediate data
+		Block4x4 *m_pablock;
+		// encoding
+		Format m_format;
+		Block4x4EncodingBits::Format m_encodingbitsformat;
+		unsigned int m_uiEncodingBitsBytes;		// for entire image
+		unsigned char *m_paucEncodingBits;
+		ErrorMetric m_errormetric;
+		float m_fEffort;
+		// stats
+		int m_iEncodeTime_ms;
+		
+		SortedBlockList *m_psortedblocklist;
+		//this will hold any warning or errors that happen during encoding
+		EncodingStatus m_encodingStatus;
+		//these will be the warnings we are tracking
+		EncodingStatus m_warningsToCapture;
+	};
+
+} // namespace Etc
diff --git a/extern/EtcLib/Etc/EtcMath.cpp b/extern/EtcLib/Etc/EtcMath.cpp
new file mode 100644
index 0000000..cd70a9a
--- /dev/null
+++ b/extern/EtcLib/Etc/EtcMath.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EtcConfig.h"
+#include "EtcMath.h"
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[]
+	// use a_fSlope and a_fOffset to define that line
+	//
+	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
+					float *a_fSlope, float *a_fOffset)
+	{
+		float fPoints = (float)a_Points;
+
+		float fSumX = 0.0f;
+		float fSumY = 0.0f;
+		float fSumXY = 0.0f;
+		float fSumX2 = 0.0f;
+
+		for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++)
+		{
+			fSumX += a_afX[uiPoint];
+			fSumY += a_afY[uiPoint];
+			fSumXY += a_afX[uiPoint] * a_afY[uiPoint];
+			fSumX2 += a_afX[uiPoint] * a_afX[uiPoint];
+		}
+
+		float fDivisor = fPoints*fSumX2 - fSumX*fSumX;
+
+		// if vertical line
+		if (fDivisor == 0.0f)
+		{
+			*a_fSlope = 0.0f;
+			*a_fOffset = 0.0f;
+			return true;
+		}
+
+		*a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor;
+		*a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints;
+
+		return false;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/Etc/EtcMath.h b/extern/EtcLib/Etc/EtcMath.h
new file mode 100644
index 0000000..3d951fe
--- /dev/null
+++ b/extern/EtcLib/Etc/EtcMath.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <math.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// return true if vertical line
+	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
+					float *a_fSlope, float *a_fOffset);
+
+	inline float ConvertMSEToPSNR(float a_fMSE)
+	{
+		if (a_fMSE == 0.0f)
+		{
+			return INFINITY;
+		}
+
+		return 10.0f * log10f(1.0f / a_fMSE);
+	}
+
+
+}
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4.cpp b/extern/EtcLib/EtcCodec/EtcBlock4x4.cpp
new file mode 100644
index 0000000..e810369
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4.cpp
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+EtcBlock4x4.cpp
+
+Implements the state associated with each 4x4 block of pixels in an image
+
+Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an 
+alpha of NAN
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcColor.h"
+#include "EtcImage.h"
+#include "EtcColorFloatRGBA.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcBlock4x4Encoding_RGBA8.h"
+#include "EtcBlock4x4Encoding_RGB8A1.h"
+#include "EtcBlock4x4Encoding_R11.h"
+#include "EtcBlock4x4Encoding_RG11.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+	// ETC pixels are scanned vertically.  
+	// this mapping is for when someone wants to scan the ETC pixels horizontally
+	const unsigned int Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4::Block4x4(void)
+	{
+		m_pimageSource = nullptr;
+		m_uiSourceH = 0;
+		m_uiSourceV = 0;
+
+		m_sourcealphamix = SourceAlphaMix::UNKNOWN;
+		m_boolBorderPixels = false;
+		m_boolPunchThroughPixels = false;
+
+		m_pencoding = nullptr;
+
+		m_errormetric = ErrorMetric::NUMERIC;
+
+	}
+	Block4x4::~Block4x4()
+	{
+		m_pimageSource = nullptr;
+		if (m_pencoding)
+		{
+			delete m_pencoding;
+			m_pencoding = nullptr;
+		}
+	}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding from a source image
+	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
+	// a_paucEncodingBits is the place to store the final encoding
+	// a_errormetric is used for finding the best encoding
+	//
+	void Block4x4::InitFromSource(Image *a_pimageSource, 
+									unsigned int a_uiSourceH, unsigned int a_uiSourceV,
+									unsigned char *a_paucEncodingBits,
+									ErrorMetric a_errormetric)
+	{
+
+		Block4x4();
+
+		m_pimageSource = a_pimageSource;
+		m_uiSourceH = a_uiSourceH;
+		m_uiSourceV = a_uiSourceV;
+		m_errormetric = a_errormetric;
+
+		SetSourcePixels();
+
+		// set block encoder function
+		switch (m_pimageSource->GetFormat())
+		{
+		case Image::Format::ETC1:
+			m_pencoding = new Block4x4Encoding_ETC1;
+			break;
+
+		case Image::Format::RGB8:
+		case Image::Format::SRGB8:
+			m_pencoding = new Block4x4Encoding_RGB8;
+			break;
+
+		case Image::Format::RGBA8:
+		case Image::Format::SRGBA8:
+			switch (m_sourcealphamix)
+			{
+			case SourceAlphaMix::OPAQUE:
+				m_pencoding = new Block4x4Encoding_RGBA8_Opaque;
+				break;
+
+			case SourceAlphaMix::TRANSPARENT:
+				m_pencoding = new Block4x4Encoding_RGBA8_Transparent;
+				break;
+
+			case SourceAlphaMix::TRANSLUCENT:
+				m_pencoding = new Block4x4Encoding_RGBA8;
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+			break;
+
+		case Image::Format::RGB8A1:
+		case Image::Format::SRGB8A1:
+			switch (m_sourcealphamix)
+			{
+			case SourceAlphaMix::OPAQUE:
+				m_pencoding = new Block4x4Encoding_RGB8A1_Opaque;
+				break;
+
+			case SourceAlphaMix::TRANSPARENT:
+				m_pencoding = new Block4x4Encoding_RGB8A1_Transparent;
+				break;
+
+			case SourceAlphaMix::TRANSLUCENT:
+				if (m_boolPunchThroughPixels)
+				{
+					m_pencoding = new Block4x4Encoding_RGB8A1;
+				}
+				else
+				{
+					m_pencoding = new Block4x4Encoding_RGB8A1_Opaque;
+				}
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+			break;
+
+		case Image::Format::R11:
+		case Image::Format::SIGNED_R11:
+			m_pencoding = new Block4x4Encoding_R11;
+			break;
+		case Image::Format::RG11:
+		case Image::Format::SIGNED_RG11:
+			m_pencoding = new Block4x4Encoding_RG11;
+			break;
+		default:
+			assert(0);
+			break;
+		}
+
+		m_pencoding->InitFromSource(this, m_afrgbaSource,
+									a_paucEncodingBits, a_errormetric);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization of encoding state from a prior encoding using encoding bits
+	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
+	// a_paucEncodingBits is the place to read the prior encoding
+	// a_imageformat is used to determine how to interpret a_paucEncodingBits
+	// a_errormetric was used for the prior encoding
+	//
+	void Block4x4::InitFromEtcEncodingBits(Image::Format a_imageformat,
+											unsigned int a_uiSourceH, unsigned int a_uiSourceV,
+											unsigned char *a_paucEncodingBits,
+											Image *a_pimageSource,
+											ErrorMetric a_errormetric)
+	{
+		Block4x4();
+
+		m_pimageSource = a_pimageSource;
+		m_uiSourceH = a_uiSourceH;
+		m_uiSourceV = a_uiSourceV;
+		m_errormetric = a_errormetric;
+
+		SetSourcePixels();
+
+		// set block encoder function
+		switch (a_imageformat)
+		{
+		case Image::Format::ETC1:
+			m_pencoding = new Block4x4Encoding_ETC1;
+			break;
+
+		case Image::Format::RGB8:
+		case Image::Format::SRGB8:
+			m_pencoding = new Block4x4Encoding_RGB8;
+			break;
+
+		case Image::Format::RGBA8:
+		case Image::Format::SRGBA8:
+			m_pencoding = new Block4x4Encoding_RGBA8;
+			break;
+
+		case Image::Format::RGB8A1:
+		case Image::Format::SRGB8A1:
+			m_pencoding = new Block4x4Encoding_RGB8A1;
+			break;
+
+		case Image::Format::R11:
+		case Image::Format::SIGNED_R11:
+			m_pencoding = new Block4x4Encoding_R11;
+			break;
+		case Image::Format::RG11:
+		case Image::Format::SIGNED_RG11:
+			m_pencoding = new Block4x4Encoding_RG11;
+			break;
+		default:
+			assert(0);
+			break;
+		}
+
+		m_pencoding->InitFromEncodingBits(this, a_paucEncodingBits, m_afrgbaSource,
+										m_pimageSource->GetErrorMetric());
+
+	}
+	
+	// ----------------------------------------------------------------------------------------------------
+	// set source pixels from m_pimageSource
+	// set m_alphamix
+	//
+	void Block4x4::SetSourcePixels(void)
+	{
+
+		Image::Format imageformat = m_pimageSource->GetFormat();
+
+		// alpha census
+		unsigned int uiTransparentSourcePixels = 0;
+		unsigned int uiOpaqueSourcePixels = 0;
+
+		// copy source to consecutive memory locations
+		// convert from image horizontal scan to block vertical scan
+		unsigned int uiPixel = 0;
+		for (unsigned int uiBlockPixelH = 0; uiBlockPixelH < Block4x4::COLUMNS; uiBlockPixelH++)
+		{
+			unsigned int uiSourcePixelH = m_uiSourceH + uiBlockPixelH;
+
+			for (unsigned int uiBlockPixelV = 0; uiBlockPixelV < Block4x4::ROWS; uiBlockPixelV++)
+			{
+				unsigned int uiSourcePixelV = m_uiSourceV + uiBlockPixelV;
+
+                ColorFloatRGBA *pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV);
+
+				// if pixel extends beyond source image because of block padding
+				if (pfrgbaSource == nullptr)
+				{
+					m_afrgbaSource[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, NAN);	// denotes border pixel
+					m_boolBorderPixels = true;
+					uiTransparentSourcePixels++;
+				}
+				else
+				{
+					//get teh current pixel data, and store some of the attributes
+					//before capping values to fit the encoder type
+					
+					m_afrgbaSource[uiPixel] = (*pfrgbaSource).ClampRGBA();
+
+					if (m_afrgbaSource[uiPixel].fA == 1.0f)
+					{
+						m_pimageSource->m_iNumOpaquePixels++;
+					}
+					else if (m_afrgbaSource[uiPixel].fA == 0.0f)
+					{
+						m_pimageSource->m_iNumTransparentPixels++;
+					}
+					else if(m_afrgbaSource[uiPixel].fA > 0.0f && m_afrgbaSource[uiPixel].fA < 1.0f)
+					{
+						m_pimageSource->m_iNumTranslucentPixels++;
+					}
+					else
+					{
+						m_pimageSource->m_numOutOfRangeValues.fA++;
+					}
+
+					if (m_afrgbaSource[uiPixel].fR != 0.0f)
+					{
+						m_pimageSource->m_numColorValues.fR++;
+						//make sure we are getting a float between 0-1
+						if (m_afrgbaSource[uiPixel].fR - 1.0f > 0.0f)
+						{
+							m_pimageSource->m_numOutOfRangeValues.fR++;
+						}
+					}
+
+					if (m_afrgbaSource[uiPixel].fG != 0.0f)
+					{
+						m_pimageSource->m_numColorValues.fG++;
+						if (m_afrgbaSource[uiPixel].fG - 1.0f > 0.0f)
+						{
+							m_pimageSource->m_numOutOfRangeValues.fG++;
+						}
+					}
+					if (m_afrgbaSource[uiPixel].fB != 0.0f)
+					{
+						m_pimageSource->m_numColorValues.fB++;
+						if (m_afrgbaSource[uiPixel].fB - 1.0f > 0.0f)
+						{
+							m_pimageSource->m_numOutOfRangeValues.fB++;
+						}
+					}
+					// for formats with no alpha, set source alpha to 1
+					if (imageformat == Image::Format::ETC1 ||
+						imageformat == Image::Format::RGB8 ||
+						imageformat == Image::Format::SRGB8)
+					{
+						m_afrgbaSource[uiPixel].fA = 1.0f;
+					}
+
+					if (imageformat == Image::Format::R11 ||
+						imageformat == Image::Format::SIGNED_R11)
+					{
+						m_afrgbaSource[uiPixel].fA = 1.0f;
+						m_afrgbaSource[uiPixel].fG = 0.0f;
+						m_afrgbaSource[uiPixel].fB = 0.0f;
+					}
+
+					if (imageformat == Image::Format::RG11 ||
+						imageformat == Image::Format::SIGNED_RG11)
+					{
+						m_afrgbaSource[uiPixel].fA = 1.0f;
+						m_afrgbaSource[uiPixel].fB = 0.0f;
+					}
+
+				
+					// for RGB8A1, set source alpha to 0.0 or 1.0
+					// set punch through flag
+					if (imageformat == Image::Format::RGB8A1 ||
+						imageformat == Image::Format::SRGB8A1)
+					{
+						if (m_afrgbaSource[uiPixel].fA >= 0.5f)
+						{
+							m_afrgbaSource[uiPixel].fA = 1.0f;
+						}
+						else
+						{
+							m_afrgbaSource[uiPixel].fA = 0.0f;
+							m_boolPunchThroughPixels = true;
+						}
+					}
+
+					if (m_afrgbaSource[uiPixel].fA == 1.0f)
+					{
+						uiOpaqueSourcePixels++;
+					}
+					else if (m_afrgbaSource[uiPixel].fA == 0.0f)
+					{
+						uiTransparentSourcePixels++;
+					}
+
+				}
+
+				uiPixel += 1;
+			}
+		}
+
+		if (uiOpaqueSourcePixels == PIXELS)
+		{
+			m_sourcealphamix = SourceAlphaMix::OPAQUE;
+		}
+		else if (uiTransparentSourcePixels == PIXELS)
+		{
+			m_sourcealphamix = SourceAlphaMix::TRANSPARENT;
+		}
+		else
+		{
+			m_sourcealphamix = SourceAlphaMix::TRANSLUCENT;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// return a name for the encoding mode
+	//
+	const char * Block4x4::GetEncodingModeName(void)
+	{
+
+		switch (m_pencoding->GetMode())
+		{
+		case Block4x4Encoding::MODE_ETC1:
+			return "ETC1";
+		case Block4x4Encoding::MODE_T:
+			return "T";
+		case Block4x4Encoding::MODE_H:
+			return "H";
+		case Block4x4Encoding::MODE_PLANAR:
+			return "PLANAR";
+		default:
+			return "???";
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4.h b/extern/EtcLib/EtcCodec/EtcBlock4x4.h
new file mode 100644
index 0000000..7716beb
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColor.h"
+#include "EtcColorFloatRGBA.h"
+#include "EtcErrorMetric.h"
+#include "EtcImage.h"
+#include "EtcBlock4x4Encoding.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits;
+
+	class Block4x4
+	{
+	public:
+
+		static const unsigned int ROWS = 4;
+		static const unsigned int COLUMNS = 4;
+		static const unsigned int PIXELS = ROWS * COLUMNS;
+
+		// the alpha mix for a 4x4 block of pixels
+		enum class SourceAlphaMix
+		{
+			UNKNOWN,
+			//
+			OPAQUE,			// all 1.0
+			TRANSPARENT,	// all 0.0 or NAN
+			TRANSLUCENT		// not all opaque or transparent
+		};
+
+		typedef void (Block4x4::*EncoderFunctionPtr)(void);
+
+		Block4x4(void);
+		~Block4x4();
+		void InitFromSource(Image *a_pimageSource,
+							unsigned int a_uiSourceH,
+							unsigned int a_uiSourceV,
+							unsigned char *a_paucEncodingBits,
+							ErrorMetric a_errormetric);
+
+		void InitFromEtcEncodingBits(Image::Format a_imageformat,
+										unsigned int a_uiSourceH,
+										unsigned int a_uiSourceV,
+										unsigned char *a_paucEncodingBits,
+										Image *a_pimageSource,
+										ErrorMetric a_errormetric);
+
+		// return true if final iteration was performed
+		inline void PerformEncodingIteration(float a_fEffort)
+		{
+			m_pencoding->PerformIteration(a_fEffort);
+		}
+
+		inline void SetEncodingBitsFromEncoding(void)
+		{
+			m_pencoding->SetEncodingBits();
+		}
+
+		inline unsigned int GetSourceH(void)
+		{
+			return m_uiSourceH;
+		}
+
+		inline unsigned int GetSourceV(void)
+		{
+			return m_uiSourceV;
+		}
+
+		inline float GetError(void)
+		{
+			return m_pencoding->GetError();
+		}
+
+		static const unsigned int s_auiPixelOrderHScan[PIXELS];
+
+		inline ColorFloatRGBA * GetDecodedColors(void)
+		{
+			return m_pencoding->GetDecodedColors();
+		}
+
+		inline float * GetDecodedAlphas(void)
+		{
+			return m_pencoding->GetDecodedAlphas();
+		}
+
+		inline Block4x4Encoding::Mode GetEncodingMode(void)
+		{
+			return m_pencoding->GetMode();
+		}
+
+		inline bool GetFlip(void)
+		{
+			return m_pencoding->GetFlip();
+		}
+
+		inline bool IsDifferential(void)
+		{
+			return m_pencoding->IsDifferential();
+		}
+
+		inline ColorFloatRGBA * GetSource()
+		{
+			return m_afrgbaSource;
+		}
+
+		inline ErrorMetric GetErrorMetric()
+		{
+			return m_errormetric;
+		}
+
+		const char * GetEncodingModeName(void);
+
+		inline Block4x4Encoding * GetEncoding(void)
+		{
+			return m_pencoding;
+		}
+
+		inline SourceAlphaMix GetSourceAlphaMix(void)
+		{
+			return m_sourcealphamix;
+		}
+
+		inline Image * GetImageSource(void)
+		{
+			return m_pimageSource;
+		}
+
+		inline bool HasBorderPixels(void)
+		{
+			return m_boolBorderPixels;
+		}
+
+		inline bool HasPunchThroughPixels(void)
+		{
+			return m_boolPunchThroughPixels;
+		}
+
+	private:
+
+		void SetSourcePixels(void);
+
+		Image				*m_pimageSource;
+		unsigned int		m_uiSourceH;
+		unsigned int		m_uiSourceV;
+		ErrorMetric			m_errormetric;
+		ColorFloatRGBA		m_afrgbaSource[PIXELS];		// vertical scan
+
+		SourceAlphaMix		m_sourcealphamix;
+		bool				m_boolBorderPixels;			// marked as rgba(NAN, NAN, NAN, NAN)
+		bool				m_boolPunchThroughPixels;	// RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5
+
+		Block4x4Encoding	*m_pencoding;
+
+	};
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding.cpp b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding.cpp
new file mode 100644
index 0000000..de71fe6
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding.cpp
@@ -0,0 +1,250 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding.cpp
+
+Block4x4Encoding is the abstract base class for the different encoders.  Each encoder targets a 
+particular file format (e.g. ETC1, RGB8, RGBA8, R11)
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+	// ----------------------------------------------------------------------------------------------------
+	//
+	const float Block4x4Encoding::LUMA_WEIGHT = 3.0f;
+	const float Block4x4Encoding::CHROMA_BLUE_WEIGHT = 0.5f;
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding::Block4x4Encoding(void)
+	{
+
+		m_pblockParent = nullptr;
+
+		m_pafrgbaSource = nullptr;
+
+		m_boolBorderPixels = false;
+
+		m_fError = -1.0f;
+
+		m_mode = MODE_UNKNOWN;
+
+		m_uiEncodingIterations = 0;
+		m_boolDone = false;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f);
+			m_afDecodedAlphas[uiPixel] = -1.0f;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialize the generic encoding for a 4x4 block
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// init the decoded pixels to -1 to mark them as undefined
+	// init the error to -1 to mark it as undefined
+	//
+	void Block4x4Encoding::Init(Block4x4 *a_pblockParent,
+								ColorFloatRGBA *a_pafrgbaSource,
+								ErrorMetric a_errormetric)
+	{
+
+		m_pblockParent = a_pblockParent;
+
+		m_pafrgbaSource = a_pafrgbaSource;
+
+		m_boolBorderPixels = m_pblockParent->HasBorderPixels();
+
+		m_fError = -1.0f;
+
+		m_uiEncodingIterations = 0;
+
+		m_errormetric = a_errormetric;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f);
+			m_afDecodedAlphas[uiPixel] = -1.0f;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate the error for the block by summing the pixel errors
+	//
+	void Block4x4Encoding::CalcBlockError(void)
+	{
+		m_fError = 0.0f;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], m_afDecodedAlphas[uiPixel],
+										m_pafrgbaSource[uiPixel]);
+		}
+		
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate the error between the source pixel and the decoded pixel
+	// the error amount is base on the error metric
+	//
+	float Block4x4Encoding::CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha,
+											ColorFloatRGBA a_frgbaSourcePixel)
+	{
+
+		// if a border pixel
+		if (isnan(a_frgbaSourcePixel.fA))
+		{
+			return 0.0f;
+		}
+
+		if (m_errormetric == ErrorMetric::RGBA)
+		{
+			assert(a_fDecodedAlpha >= 0.0f);
+
+			float fDRed = (a_fDecodedAlpha * a_frgbaDecodedColor.fR) -
+							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fR);
+			float fDGreen = (a_fDecodedAlpha * a_frgbaDecodedColor.fG) -
+							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fG);
+			float fDBlue = (a_fDecodedAlpha * a_frgbaDecodedColor.fB) -
+							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fB);
+
+			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
+
+			return fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue + fDAlpha*fDAlpha;
+		}
+		else if (m_errormetric == ErrorMetric::REC709)
+		{
+			assert(a_fDecodedAlpha >= 0.0f);
+
+			float fLuma1 = a_frgbaSourcePixel.fR*0.2126f + a_frgbaSourcePixel.fG*0.7152f + a_frgbaSourcePixel.fB*0.0722f;
+			float fChromaR1 = 0.5f * ((a_frgbaSourcePixel.fR - fLuma1) * (1.0f / (1.0f - 0.2126f)));
+			float fChromaB1 = 0.5f * ((a_frgbaSourcePixel.fB - fLuma1) * (1.0f / (1.0f - 0.0722f)));
+
+			float fLuma2 = a_frgbaDecodedColor.fR*0.2126f +
+							a_frgbaDecodedColor.fG*0.7152f +
+							a_frgbaDecodedColor.fB*0.0722f;
+			float fChromaR2 = 0.5f * ((a_frgbaDecodedColor.fR - fLuma2) * (1.0f / (1.0f - 0.2126f)));
+			float fChromaB2 = 0.5f * ((a_frgbaDecodedColor.fB - fLuma2) * (1.0f / (1.0f - 0.0722f)));
+
+			float fDeltaL = a_frgbaSourcePixel.fA * fLuma1 - a_fDecodedAlpha * fLuma2;
+			float fDeltaCr = a_frgbaSourcePixel.fA * fChromaR1 - a_fDecodedAlpha * fChromaR2;
+			float fDeltaCb = a_frgbaSourcePixel.fA * fChromaB1 - a_fDecodedAlpha * fChromaB2;
+
+			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
+
+			// Favor Luma accuracy over Chroma, and Red over Blue 
+			return LUMA_WEIGHT*fDeltaL*fDeltaL +
+					fDeltaCr*fDeltaCr +
+					CHROMA_BLUE_WEIGHT*fDeltaCb*fDeltaCb +
+					fDAlpha*fDAlpha;
+	#if 0
+			float fDRed = a_frgbaDecodedPixel.fR - a_frgbaSourcePixel.fR;
+			float fDGreen = a_frgbaDecodedPixel.fG - a_frgbaSourcePixel.fG;
+			float fDBlue = a_frgbaDecodedPixel.fB - a_frgbaSourcePixel.fB;
+			return 2.0f * 3.0f * fDeltaL * fDeltaL + fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue;
+#endif
+		}
+		else if (m_errormetric == ErrorMetric::NORMALXYZ)
+		{
+			float fDecodedX = 2.0f * a_frgbaDecodedColor.fR - 1.0f;
+			float fDecodedY = 2.0f * a_frgbaDecodedColor.fG - 1.0f;
+			float fDecodedZ = 2.0f * a_frgbaDecodedColor.fB - 1.0f;
+
+			float fDecodedLength = sqrtf(fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ);
+
+			if (fDecodedLength < 0.5f)
+			{
+				return 1.0f;
+			}
+			else if (fDecodedLength == 0.0f)
+			{
+				fDecodedX = 1.0f;
+				fDecodedY = 0.0f;
+				fDecodedZ = 0.0f;
+			}
+			else
+			{
+				fDecodedX /= fDecodedLength;
+				fDecodedY /= fDecodedLength;
+				fDecodedZ /= fDecodedLength;
+			}
+
+			float fSourceX = 2.0f * a_frgbaSourcePixel.fR - 1.0f;
+			float fSourceY = 2.0f * a_frgbaSourcePixel.fG - 1.0f;
+			float fSourceZ = 2.0f * a_frgbaSourcePixel.fB - 1.0f;
+
+			float fSourceLength = sqrtf(fSourceX*fSourceX + fSourceY*fSourceY + fSourceZ*fSourceZ);
+
+			if (fSourceLength == 0.0f)
+			{
+				fSourceX = 1.0f;
+				fSourceY = 0.0f;
+				fSourceZ = 0.0f;
+			}
+			else
+			{
+				fSourceX /= fSourceLength;
+				fSourceY /= fSourceLength;
+				fSourceZ /= fSourceLength;
+			}
+
+			float fDotProduct = fSourceX*fDecodedX + fSourceY*fDecodedY + fSourceZ*fDecodedZ;
+			float fNormalizedDotProduct = 1.0f - 0.5f * (fDotProduct + 1.0f);
+			float fDotProductError = fNormalizedDotProduct * fNormalizedDotProduct;
+			
+			float fLength2 = fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ;
+			float fLength2Error = fabsf(1.0f - fLength2);
+
+			float fDeltaW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA;
+			float fErrorW = fDeltaW * fDeltaW;
+
+			return fDotProductError + fLength2Error + fErrorW;
+		}
+		else // ErrorMetric::NUMERIC
+		{
+			assert(a_fDecodedAlpha >= 0.0f);
+
+			float fDX = a_frgbaDecodedColor.fR - a_frgbaSourcePixel.fR;
+			float fDY = a_frgbaDecodedColor.fG - a_frgbaSourcePixel.fG;
+			float fDZ = a_frgbaDecodedColor.fB - a_frgbaSourcePixel.fB;
+			float fDW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA;
+
+			return fDX*fDX + fDY*fDY + fDZ*fDZ + fDW*fDW;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
+
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding.h b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding.h
new file mode 100644
index 0000000..86da2c5
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+#include "EtcErrorMetric.h"
+
+#include <assert.h>
+#include <float.h>
+
+namespace Etc
+{
+	class Block4x4;
+
+	// abstract base class for specific encodings
+	class Block4x4Encoding
+	{
+	public:
+
+		static const unsigned int ROWS = 4;
+		static const unsigned int COLUMNS = 4;
+		static const unsigned int PIXELS = ROWS * COLUMNS;
+		static const float LUMA_WEIGHT;
+		static const float CHROMA_BLUE_WEIGHT;
+
+		typedef enum
+		{
+			MODE_UNKNOWN,
+			//
+			MODE_ETC1,
+			MODE_T,
+			MODE_H,
+			MODE_PLANAR,
+			MODE_R11,
+			MODE_RG11,
+			//
+			MODES
+		} Mode;
+
+		Block4x4Encoding(void);
+		//virtual ~Block4x4Encoding(void) =0;
+		virtual ~Block4x4Encoding(void) {}
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+									ColorFloatRGBA *a_pafrgbaSource,
+
+									unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) = 0;
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource,
+
+											ErrorMetric a_errormetric) = 0;
+
+		// perform an iteration of the encoding
+		// the first iteration must generate a complete, valid (if poor) encoding
+		virtual void PerformIteration(float a_fEffort) = 0;
+
+		void CalcBlockError(void);
+
+		inline float GetError(void)
+		{
+			assert(m_fError >= 0.0f);
+
+			return m_fError;
+		}
+
+		inline ColorFloatRGBA * GetDecodedColors(void)
+		{
+			return m_afrgbaDecodedColors;
+		}
+
+		inline float * GetDecodedAlphas(void)
+		{
+			return m_afDecodedAlphas;
+		}
+
+		virtual void SetEncodingBits(void) = 0;
+
+		virtual bool GetFlip(void) = 0;
+
+		virtual bool IsDifferential(void) = 0;
+
+		virtual bool HasSeverelyBentDifferentialColors(void) const = 0;
+
+		inline Mode GetMode(void)
+		{
+			return m_mode;
+		}
+
+		inline bool IsDone(void)
+		{
+			return m_boolDone;
+		}
+
+		inline void SetDoneIfPerfect()
+		{
+			if (GetError() == 0.0f)
+			{
+				m_boolDone = true;
+			}
+		}
+
+		float CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha,
+								ColorFloatRGBA a_frgbaSourcePixel);
+
+	protected:
+
+		void Init(Block4x4 *a_pblockParent,
+					ColorFloatRGBA *a_pafrgbaSource,
+
+					ErrorMetric a_errormetric);
+
+		Block4x4		*m_pblockParent;
+		ColorFloatRGBA	*m_pafrgbaSource;
+
+		bool			m_boolBorderPixels;				// if block has any border pixels
+
+		ColorFloatRGBA	m_afrgbaDecodedColors[PIXELS];	// decoded RGB components, ignore Alpha
+		float			m_afDecodedAlphas[PIXELS];		// decoded alpha component
+		float			m_fError;						// error for RGBA relative to m_pafrgbaSource
+
+		// intermediate encoding
+		Mode			m_mode;
+
+		unsigned int	m_uiEncodingIterations;
+		bool			m_boolDone;						// all iterations have been done
+		ErrorMetric		m_errormetric;
+
+	private:
+
+	};
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4EncodingBits.h b/extern/EtcLib/EtcCodec/EtcBlock4x4EncodingBits.h
new file mode 100644
index 0000000..5ba879e
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4EncodingBits.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ################################################################################
+	// Block4x4EncodingBits
+	// Base class for Block4x4EncodingBits_XXXX
+	// ################################################################################
+
+	class Block4x4EncodingBits
+	{
+	public:
+
+		enum class Format
+		{
+			UNKNOWN,
+			//
+			RGB8,
+			RGBA8,
+			R11,
+			RG11,
+			RGB8A1,
+			//
+			FORMATS
+		};
+
+		static unsigned int GetBytesPerBlock(Format a_format)
+		{
+			switch (a_format)
+			{
+			case Format::RGB8:
+			case Format::R11:
+			case Format::RGB8A1:
+				return 8;
+				break;
+
+			case Format::RGBA8:
+			case Format::RG11:
+				return 16;
+				break;
+
+			default:
+				return 0;
+				break;
+			}
+
+		}
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_RGB8
+	// Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8
+	// ################################################################################
+
+	class Block4x4EncodingBits_RGB8
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+
+		inline Block4x4EncodingBits_RGB8(void)
+		{
+			assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK);
+
+			for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++)
+			{
+				auc[uiByte] = 0;
+			}
+
+		}
+
+		typedef struct
+		{
+			unsigned red2 : 4;
+			unsigned red1 : 4;
+			//
+			unsigned green2 : 4;
+			unsigned green1 : 4;
+			//
+			unsigned blue2 : 4;
+			unsigned blue1 : 4;
+			//
+			unsigned flip : 1;
+			unsigned diff : 1;
+			unsigned cw2 : 3;
+			unsigned cw1 : 3;
+			//
+			unsigned int selectors;
+		} Individual;
+
+		typedef struct
+		{
+			signed dred2 : 3;
+			unsigned red1 : 5;
+			//
+			signed dgreen2 : 3;
+			unsigned green1 : 5;
+			//
+			signed dblue2 : 3;
+			unsigned blue1 : 5;
+			//
+			unsigned flip : 1;
+			unsigned diff : 1;
+			unsigned cw2 : 3;
+			unsigned cw1 : 3;
+			//
+			unsigned int selectors;
+		} Differential;
+
+		typedef struct
+		{
+			unsigned red1b : 2;
+			unsigned detect2 : 1;
+			unsigned red1a : 2;
+			unsigned detect1 : 3;
+			//
+			unsigned blue1 : 4;
+			unsigned green1 : 4;
+			//
+			unsigned green2 : 4;
+			unsigned red2 : 4;
+			//
+			unsigned db : 1;
+			unsigned diff : 1;
+			unsigned da : 2;
+			unsigned blue2 : 4;
+			//
+			unsigned int selectors;
+		} T;
+
+		typedef struct
+		{
+			unsigned green1a : 3;
+			unsigned red1 : 4;
+			unsigned detect1 : 1;
+			//
+			unsigned blue1b : 2;
+			unsigned detect3 : 1;
+			unsigned blue1a : 1;
+			unsigned green1b : 1;
+			unsigned detect2 : 3;
+			//
+			unsigned green2a : 3;
+			unsigned red2 : 4;
+			unsigned blue1c : 1;
+			//
+			unsigned db : 1;
+			unsigned diff : 1;
+			unsigned da : 1;
+			unsigned blue2 : 4;
+			unsigned green2b : 1;
+			//
+			unsigned int selectors;
+		} H;
+
+		typedef struct
+		{
+			unsigned originGreen1 : 1;
+			unsigned originRed : 6;
+			unsigned detect1 : 1;
+			//
+			unsigned originBlue1 : 1;
+			unsigned originGreen2 : 6;
+			unsigned detect2 : 1;
+			//
+			unsigned originBlue3 : 2;
+			unsigned detect4 : 1;
+			unsigned originBlue2 : 2;
+			unsigned detect3 : 3;
+			//
+			unsigned horizRed2 : 1;
+			unsigned diff : 1;
+			unsigned horizRed1 : 5;
+			unsigned originBlue4 : 1;
+			//
+			unsigned horizBlue1: 1;
+			unsigned horizGreen : 7;
+			//
+			unsigned vertRed1 : 3;
+			unsigned horizBlue2 : 5;
+			//
+			unsigned vertGreen1 : 5;
+			unsigned vertRed2 : 3;
+			//
+			unsigned vertBlue : 6;
+			unsigned vertGreen2 : 2;
+		} Planar;
+
+		union
+		{
+			unsigned char auc[BYTES_PER_BLOCK];
+			unsigned long int ul;
+			Individual individual;
+			Differential differential;
+			T t;
+			H h;
+			Planar planar;
+		};
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_A8
+	// Encoding bits for the A portion of RGBA8
+	// ################################################################################
+
+	class Block4x4EncodingBits_A8
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+		static const unsigned int SELECTOR_BYTES = 6;
+
+		typedef struct
+		{
+			unsigned base : 8;
+			unsigned table : 4;
+			unsigned multiplier : 4;
+			unsigned selectors0 : 8;
+			unsigned selectors1 : 8;
+			unsigned selectors2 : 8;
+			unsigned selectors3 : 8;
+			unsigned selectors4 : 8;
+			unsigned selectors5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_R11
+	// Encoding bits for the R portion of R11
+	// ################################################################################
+
+	class Block4x4EncodingBits_R11
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+		static const unsigned int SELECTOR_BYTES = 6;
+
+		typedef struct
+		{
+			unsigned base : 8;
+			unsigned table : 4;
+			unsigned multiplier : 4;
+			unsigned selectors0 : 8;
+			unsigned selectors1 : 8;
+			unsigned selectors2 : 8;
+			unsigned selectors3 : 8;
+			unsigned selectors4 : 8;
+			unsigned selectors5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+	class Block4x4EncodingBits_RG11
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 16;
+		static const unsigned int SELECTOR_BYTES = 12;
+
+		typedef struct
+		{
+			//Red portion
+			unsigned baseR : 8;
+			unsigned tableIndexR : 4;
+			unsigned multiplierR : 4;
+			unsigned selectorsR0 : 8;
+			unsigned selectorsR1 : 8;
+			unsigned selectorsR2 : 8;
+			unsigned selectorsR3 : 8;
+			unsigned selectorsR4 : 8;
+			unsigned selectorsR5 : 8;
+			//Green portion
+			unsigned baseG : 8;
+			unsigned tableIndexG : 4;
+			unsigned multiplierG : 4;
+			unsigned selectorsG0 : 8;
+			unsigned selectorsG1 : 8;
+			unsigned selectorsG2 : 8;
+			unsigned selectorsG3 : 8;
+			unsigned selectorsG4 : 8;
+			unsigned selectorsG5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+}
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.cpp b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.cpp
new file mode 100644
index 0000000..7aa2de3
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.cpp
@@ -0,0 +1,1280 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_ETC1.cpp
+
+Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1.  This encoder is also
+used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_ETC1.h"
+
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcDifferentialTrys.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+
+	// pixel processing order if the flip bit = 0 (horizontal split)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+
+	// pixel processing order if the flip bit = 1 (vertical split)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
+
+	// pixel processing order for horizontal scan (ETC normally does a vertical scan)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+	// pixel indices for different block halves
+	const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 };
+
+	// CW ranges that the ETC1 decoders use
+	// CW is basically a contrast for the different selector bits, since these values are offsets to the base color
+	// the first axis in the array is indexed by the CW in the encoding bits
+	// the second axis in the array is indexed by the selector bits
+	float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] =
+	{
+		{ 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f },
+		{ 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f },
+		{ 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f },
+		{ 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f },
+		{ 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f },
+		{ 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f },
+		{ 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f },
+		{ 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void)
+	{
+		m_mode = MODE_ETC1;
+		m_boolDiff = false;
+		m_boolFlip = false;
+		m_frgbaColor1 = ColorFloatRGBA();
+		m_frgbaColor2 = ColorFloatRGBA();
+		m_uiCW1 = 0;
+		m_uiCW2 = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_auiSelectors[uiPixel] = 0;
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+		m_boolMostLikelyFlip = false;
+
+		m_fError = -1.0f;
+
+		m_fError1 = -1.0f;
+		m_fError2 = -1.0f;
+		m_boolSeverelyBentDifferentialColors = false;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+	}
+
+	 Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_ETC1::InitFromSource(Block4x4 *a_pblockParent,
+												ColorFloatRGBA *a_pafrgbaSource,
+												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+		m_fError = -1.0f;
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_ETC1::InitFromEncodingBits(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														ColorFloatRGBA *a_pafrgbaSource, 
+														ErrorMetric a_errormetric)
+	{
+
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+		m_fError = -1.0f;
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = m_pencodingbitsRGB8->individual.diff;
+		m_boolFlip = m_pencodingbitsRGB8->individual.flip;
+		if (m_boolDiff)
+		{
+			int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2);
+			if (iR2 < 0)
+			{
+				iR2 = 0;
+			}
+			else if (iR2 > 31)
+			{
+				iR2 = 31;
+			}
+
+			int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2);
+			if (iG2 < 0)
+			{
+				iG2 = 0;
+			}
+			else if (iG2 > 31)
+			{
+				iG2 = 31;
+			}
+
+			int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2);
+			if (iB2 < 0)
+			{
+				iB2 = 0;
+			}
+			else if (iB2 > 31)
+			{
+				iB2 = 31;
+			}
+
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
+
+		}
+		else
+		{
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2);
+		}
+
+		m_uiCW1 = m_pencodingbitsRGB8->individual.cw1;
+		m_uiCW2 = m_pencodingbitsRGB8->individual.cw2;
+
+		InitFromEncodingBits_Selectors();
+
+		Decode();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// init the selectors from a prior encoding
+	//
+	void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void)
+	{
+
+		unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors;
+
+		for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++)
+		{
+			unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8));
+			unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8));
+			unsigned int uiShift = (unsigned int)(iPixel & 7);
+
+			unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1);
+			unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1);
+
+			m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			PerformFirstIteration();
+			break;
+
+		case 1:
+			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			TryIndividual(m_boolMostLikelyFlip, 1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 4:
+			TryIndividual(!m_boolMostLikelyFlip, 1);
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			TryDegenerates1();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryDegenerates2();
+			if (a_fEffort <= 89.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			TryDegenerates3();
+			if (a_fEffort <= 99.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_ETC1::PerformFirstIteration(void)
+	{
+		CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+
+		TryIndividual(m_boolMostLikelyFlip, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryIndividual(!m_boolMostLikelyFlip, 0);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// algorithm:
+	// create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half
+	// note: the "gray line" is the line of equal delta RGB that goes thru the average color
+	// for each half:
+	//		see how close each of the 8 pixels are to the "gray line" that goes thru the source average color
+	//		create an error value that is the sum of the distances from the gray line
+	// h_error is the sum of Left and Right errors
+	// v_error is the sum of Top and Bottom errors
+	//
+	void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void)
+	{
+		static const bool DEBUG_PRINT = false;
+
+		CalculateSourceAverages();
+
+		float fLeftGrayErrorSum = 0.0f;
+		float fRightGrayErrorSum = 0.0f;
+		float fTopGrayErrorSum = 0.0f;
+		float fBottomGrayErrorSum = 0.0f;
+
+		for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+		{
+			ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel];
+			ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8];
+			ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]];
+			ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]];
+
+			float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft);
+			float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight);
+			float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop);
+			float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom);
+
+			fLeftGrayErrorSum += fLeftGrayError;
+			fRightGrayErrorSum += fRightGrayError;
+			fTopGrayErrorSum += fTopGrayError;
+			fBottomGrayErrorSum += fBottomGrayError;
+		}
+
+		if (DEBUG_PRINT)
+		{
+			printf("\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum);
+		}
+
+		m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate source pixel averages for each 2x2 quadrant in a 4x4 block
+	// these are used to determine the averages for each of the 4 different halves (left, right, top, bottom)
+	// ignore pixels that have alpha == NAN (these are border pixels outside of the source image)
+	// weight the averages based on a pixel's alpha
+	//
+	void Block4x4Encoding_ETC1::CalculateSourceAverages(void)
+	{
+		static const bool DEBUG_PRINT = false;
+
+
+		if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE)
+		{
+			ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5];
+			ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7];
+			ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13];
+			ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15];
+
+			m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f;
+			m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f;
+			m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f;
+			m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f;
+		}
+		else
+		{
+			float afSourceAlpha[PIXELS];
+
+			// treat alpha NAN as 0.0f
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ? 
+																		0.0f : 
+																		m_pafrgbaSource[uiPixel].fA;
+			}
+
+			ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS];
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel];
+			}
+
+			ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] +
+										afrgbaAlphaWeightedSource[1] +
+										afrgbaAlphaWeightedSource[4] +
+										afrgbaAlphaWeightedSource[5];
+
+			ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] +
+										afrgbaAlphaWeightedSource[3] +
+										afrgbaAlphaWeightedSource[6] +
+										afrgbaAlphaWeightedSource[7];
+
+			ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] +
+										afrgbaAlphaWeightedSource[9] +
+										afrgbaAlphaWeightedSource[12] +
+										afrgbaAlphaWeightedSource[13];
+
+			ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] +
+										afrgbaAlphaWeightedSource[11] +
+										afrgbaAlphaWeightedSource[14] +
+										afrgbaAlphaWeightedSource[15];
+
+			float fWeightSumUL = afSourceAlpha[0] +
+									afSourceAlpha[1] +
+									afSourceAlpha[4] +
+									afSourceAlpha[5];
+
+			float fWeightSumLL = afSourceAlpha[2] +
+									afSourceAlpha[3] +
+									afSourceAlpha[6] +
+									afSourceAlpha[7];
+
+			float fWeightSumUR = afSourceAlpha[8] +
+									afSourceAlpha[9] +
+									afSourceAlpha[12] +
+									afSourceAlpha[13];
+
+			float fWeightSumLR = afSourceAlpha[10] +
+									afSourceAlpha[11] +
+									afSourceAlpha[14] +
+									afSourceAlpha[15];
+
+			ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL;
+			ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR;
+			ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR;
+			ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR;
+
+			float fWeightSumLeft = fWeightSumUL + fWeightSumLL;
+			float fWeightSumRight = fWeightSumUR + fWeightSumLR;
+			float fWeightSumTop = fWeightSumUL + fWeightSumUR;
+			float fWeightSumBottom = fWeightSumLL + fWeightSumLR;
+
+			// check to see if there is at least 1 pixel with  non-zero alpha
+			// completely transparent block should not make it to this code
+			assert((fWeightSumLeft + fWeightSumRight) > 0.0f);
+			assert((fWeightSumTop + fWeightSumBottom) > 0.0f);
+
+			if (fWeightSumLeft > 0.0f)
+			{
+				m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft);
+			}
+			if (fWeightSumRight > 0.0f)
+			{
+				m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight);
+			}
+			if (fWeightSumTop > 0.0f)
+			{
+				m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop);
+			}
+			if (fWeightSumBottom > 0.0f)
+			{
+				m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom);
+			}
+
+			if (fWeightSumLeft == 0.0f)
+			{
+				assert(fWeightSumRight > 0.0f);
+				m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight;
+			}
+			if (fWeightSumRight == 0.0f)
+			{
+				assert(fWeightSumLeft > 0.0f);
+				m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft;
+			}
+			if (fWeightSumTop == 0.0f)
+			{
+				assert(fWeightSumBottom > 0.0f);
+				m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom;
+			}
+			if (fWeightSumBottom == 0.0f)
+			{
+				assert(fWeightSumTop > 0.0f);
+				m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop;
+			}
+		}
+
+		
+
+		if (DEBUG_PRINT)
+		{
+			printf("\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n",
+				m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB,
+				m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB,
+				m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB,
+				m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding
+	// use a_boolFlip to set the encoding F bit
+	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
+	// use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings
+	// replace the encoding if the encoding error is less than previous encoding
+	//
+	void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+												int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
+								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
+
+		Block4x4Encoding_ETC1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryDifferentialHalf(&trys.m_half1);
+		encodingTry.TryDifferentialHalf(&trys.m_half2);
+
+		// find best halves that are within differential range
+		DifferentialTrys::Try *ptryBest1 = nullptr;
+		DifferentialTrys::Try *ptryBest2 = nullptr;
+		encodingTry.m_fError = FLT_MAX;
+
+		// see if the best of each half are in differential range
+		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
+		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
+		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
+		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
+		{
+			ptryBest1 = trys.m_half1.m_ptryBest;
+			ptryBest2 = trys.m_half2.m_ptryBest;
+			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+		}
+		else
+		{
+			// else, find the next best halves that are in differential range
+			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
+			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
+				ptry1++)
+			{
+				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
+				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
+					ptry2++)
+				{
+					iDRed = ptry2->m_iRed - ptry1->m_iRed;
+					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
+					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
+					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
+					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
+					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
+
+					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
+					{
+						float fError = ptry1->m_fError + ptry2->m_fError;
+
+						if (fError < encodingTry.m_fError)
+						{
+							encodingTry.m_fError = fError;
+
+							ptryBest1 = ptry1;
+							ptryBest2 = ptry2;
+						}
+					}
+
+				}
+			}
+			assert(encodingTry.m_fError < FLT_MAX);
+			assert(ptryBest1 != nullptr);
+			assert(ptryBest2 != nullptr);
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = true;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
+				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
+
+				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
+			m_fError = m_fError1 + m_fError2;
+
+			// sanity check
+			{
+				int iRed1 = m_frgbaColor1.IntRed(31.0f);
+				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+				int iRed2 = m_frgbaColor2.IntRed(31.0f);
+				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+				iDRed = iRed2 - iRed1;
+				iDGreen = iGreen2 - iGreen1;
+				iDBlue = iBlue2 - iBlue1;
+
+				assert(iDRed >= -4 && iDRed < 4);
+				assert(iDGreen >= -4 && iDGreen < 4);
+				assert(iDBlue >= -4 && iDBlue < 4);
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding for a half of a 4x4 block
+	// vary the basecolor components using a radius
+	//
+	void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; 
+				iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
+				iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 31);
+
+			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
+					iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
+					iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 31);
+
+				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
+						iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
+						iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 31);
+
+					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
+
+					ptry->m_iRed = iRed;
+					ptry->m_iGreen = iGreen;
+					ptry->m_iBlue = iBlue;
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, 
+															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError;
+
+								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
+																	*pfrgbaSourcePixel);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{	
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 individual mode encoding
+	// use a_boolFlip to set the encoding F bit
+	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
+	// replace the encoding if the encoding error is less than previous encoding
+	//
+	void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius);
+
+		Block4x4Encoding_ETC1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryIndividualHalf(&trys.m_half1);
+		encodingTry.TryIndividualHalf(&trys.m_half2);
+
+		// use the best of each half
+		IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest;
+		IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest;
+		encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = false;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
+				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
+
+				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_fError = m_fError1 + m_fError2;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding for a half of a 4x4 block
+	// vary the basecolor components using a radius
+	//
+	void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
+			iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
+			iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 15);
+
+			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
+				iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
+				iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 15);
+
+				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
+					iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
+					iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 15);
+
+					IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]);
+
+					ptry->m_iRed = iRed;
+					ptry->m_iGreen = iGreen;
+					ptry->m_iBlue = iBlue;
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError;
+
+								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
+										*pfrgbaSourcePixel);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 1 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates1(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 2 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates2(void)
+	{
+
+		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 3 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates3(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 4 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates4(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best selector for each pixel based on a particular basecolor and CW that have been previously set
+	// calculate the selectors for each half of the block separately
+	// set the block error as the sum of each half's error
+	//
+	void Block4x4Encoding_ETC1::CalculateSelectors()
+	{
+		if (m_boolFlip)
+		{
+			CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping);
+			CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping);
+		}
+		else
+		{
+			CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping);
+			CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping);
+		}
+
+		m_fError = m_fError1 + m_fError2;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// choose best selectors for half of the block
+	// calculate the error for half of the block
+	//
+	void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
+		const unsigned int *pauiPixelMapping)
+	{
+		static const bool DEBUG_PRINT = false;
+
+		ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1;
+		unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1;
+
+		float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1;
+		*pfHalfError = FLT_MAX;
+
+		// try each CW
+		for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+		{
+			if (DEBUG_PRINT)
+			{
+				printf("\ncw=%u\n", uiCW);
+			}
+
+			unsigned int auiPixelSelectors[PIXELS / 2];
+			ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+			float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+			{
+				if (DEBUG_PRINT)
+				{
+					printf("\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR,
+						m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB);
+				}
+
+				ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]];
+				ColorFloatRGBA frgbaDecodedPixel;
+
+				for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+				{
+					float fDeltaRGB = s_aafCwTable[uiCW][uiSelector];
+
+					frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB();
+
+					float fPixelError;
+					
+					fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[pauiPixelMapping[uiPixel]],
+														*pfrgbaSourcePixel);
+					
+					if (DEBUG_PRINT)
+					{
+						printf("\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f", uiPixel, uiSelector,
+							frgbaDecodedPixel.fR,
+							frgbaDecodedPixel.fG,
+							frgbaDecodedPixel.fB,
+							fPixelError);
+					}
+
+					if (fPixelError < afPixelErrors[uiPixel])
+					{
+						if (DEBUG_PRINT)
+						{
+							printf(" *");
+						}
+
+						auiPixelSelectors[uiPixel] = uiSelector;
+						afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+						afPixelErrors[uiPixel] = fPixelError;
+					}
+
+					if (DEBUG_PRINT)
+					{
+						printf("\n");
+					}
+				}
+			}
+
+			// add up all pixel errors
+			float fCWError = 0.0f;
+			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+			{
+				fCWError += afPixelErrors[uiPixel];
+			}
+			if (DEBUG_PRINT)
+			{
+				printf("\terror %.2f\n", fCWError);
+			}
+
+			// if best CW so far
+			if (fCWError < *pfHalfError)
+			{
+				*pfHalfError = fCWError;
+				*puiCW = uiCW;
+				for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+				{
+					m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel];
+					m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel];
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_ETC1::SetEncodingBits(void)
+	{
+		assert(m_mode == MODE_ETC1);
+
+		if (m_boolDiff)
+		{
+			int iRed1 = m_frgbaColor1.IntRed(31.0f);
+			int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+			int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+			int iRed2 = m_frgbaColor2.IntRed(31.0f);
+			int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+			int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+			int iDRed2 = iRed2 - iRed1;
+			int iDGreen2 = iGreen2 - iGreen1;
+			int iDBlue2 = iBlue2 - iBlue1;
+
+			assert(iDRed2 >= -4 && iDRed2 < 4);
+			assert(iDGreen2 >= -4 && iDGreen2 < 4);
+			assert(iDBlue2 >= -4 && iDBlue2 < 4);
+
+			m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1;
+			m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1;
+			m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1;
+
+			m_pencodingbitsRGB8->differential.dred2 = iDRed2;
+			m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
+			m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+			m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+			m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+			m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+			m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+			m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+		}
+
+		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
+		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
+
+		SetEncodingBits_Selectors();
+
+		m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff;
+		m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the selectors in the encoding bits
+	//
+	void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void)
+	{
+
+		m_pencodingbitsRGB8->individual.selectors = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiSelector = m_auiSelectors[uiPixel];
+
+			// set index msb
+			m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8);
+
+			// set index lsb
+			m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_ETC1::Decode(void)
+	{
+
+		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
+
+		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
+		{
+			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
+			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
+
+			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
+
+			float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
+			m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.h b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.h
new file mode 100644
index 0000000..816aa05
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcDifferentialTrys.h"
+#include "EtcIndividualTrys.h"
+
+namespace Etc
+{
+
+	// base class for Block4x4Encoding_RGB8
+	class Block4x4Encoding_ETC1 : public Block4x4Encoding
+	{
+	public:
+
+		Block4x4Encoding_ETC1(void);
+		virtual ~Block4x4Encoding_ETC1(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+									ColorFloatRGBA *a_pafrgbaSource,
+
+									unsigned char *a_paucEncodingBits,
+									ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource, 
+
+											ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		inline virtual bool GetFlip(void)
+		{
+			return m_boolFlip;
+		}
+
+		inline virtual bool IsDifferential(void)
+		{
+			return m_boolDiff;
+		}
+
+		virtual void SetEncodingBits(void);
+
+		void Decode(void);
+
+		inline ColorFloatRGBA GetColor1(void) const
+		{
+			return m_frgbaColor1;
+		}
+
+		inline ColorFloatRGBA GetColor2(void) const
+		{
+			return m_frgbaColor2;
+		}
+
+		inline const unsigned int * GetSelectors(void) const
+		{
+			return m_auiSelectors;
+		}
+
+		inline unsigned int GetCW1(void) const
+		{
+			return m_uiCW1;
+		}
+
+		inline unsigned int GetCW2(void) const
+		{
+			return m_uiCW2;
+		}
+
+		inline bool HasSeverelyBentDifferentialColors(void) const
+		{
+			return m_boolSeverelyBentDifferentialColors;
+		}
+
+	protected:
+
+		static const unsigned int s_auiPixelOrderFlip0[PIXELS];
+		static const unsigned int s_auiPixelOrderFlip1[PIXELS];
+		static const unsigned int s_auiPixelOrderHScan[PIXELS];
+
+		static const unsigned int s_auiLeftPixelMapping[8];
+		static const unsigned int s_auiRightPixelMapping[8];
+		static const unsigned int s_auiTopPixelMapping[8];
+		static const unsigned int s_auiBottomPixelMapping[8];
+
+		static const unsigned int SELECTOR_BITS = 2;
+		static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
+
+		static const unsigned int CW_BITS = 3;
+		static const unsigned int CW_RANGES = 1 << CW_BITS;
+
+		static float s_aafCwTable[CW_RANGES][SELECTORS];
+		static unsigned char s_aucDifferentialCwRange[256];
+
+		static const int MAX_DIFFERENTIAL = 3;
+		static const int MIN_DIFFERENTIAL = -4;
+
+		void InitFromEncodingBits_Selectors(void);
+
+		void PerformFirstIteration(void);
+		void CalculateMostLikelyFlip(void);
+
+		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+								int a_iGrayOffset1, int a_iGrayOffset2);
+		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
+
+		void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius);
+		void TryIndividualHalf(IndividualTrys::Half *a_phalf);
+
+		void TryDegenerates1(void);
+		void TryDegenerates2(void);
+		void TryDegenerates3(void);
+		void TryDegenerates4(void);
+
+		void CalculateSelectors();
+		void CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
+											const unsigned int *pauiPixelMapping);
+
+		// calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line
+		inline float CalcGrayDistance2(ColorFloatRGBA &r_frgbaPixel, 
+										ColorFloatRGBA &r_frgbaTarget)
+		{
+			float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) +
+								(r_frgbaPixel.fG - r_frgbaTarget.fG) +
+								(r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f;
+
+			ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB();
+
+			float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR;
+			float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG;
+			float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB;
+
+			return (fDR*fDR) + (fDG*fDG) + (fDB*fDB);
+		}
+
+		void SetEncodingBits_Selectors(void);
+
+		// intermediate encoding
+		bool			m_boolDiff;
+		bool			m_boolFlip;
+		ColorFloatRGBA	m_frgbaColor1;
+		ColorFloatRGBA	m_frgbaColor2;
+		unsigned int	m_uiCW1;
+		unsigned int	m_uiCW2;
+		unsigned int	m_auiSelectors[PIXELS];
+
+		// state shared between iterations
+		ColorFloatRGBA	m_frgbaSourceAverageLeft;
+		ColorFloatRGBA	m_frgbaSourceAverageRight;
+		ColorFloatRGBA	m_frgbaSourceAverageTop;
+		ColorFloatRGBA	m_frgbaSourceAverageBottom;
+		bool			m_boolMostLikelyFlip;
+
+		// stats
+		float			m_fError1;	// error for Etc1 half 1
+		float			m_fError2;	// error for Etc1 half 2
+		bool			m_boolSeverelyBentDifferentialColors;	// only valid if m_boolDiff;
+
+		// final encoding
+		Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8;		// or RGB8 portion of Block4x4EncodingBits_RGB8A8
+
+		private:
+
+		void CalculateSourceAverages(void);
+
+	};
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.cpp b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.cpp
new file mode 100644
index 0000000..5dd9884
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.cpp
@@ -0,0 +1,429 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_R11.cpp
+
+Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11).  
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_R11.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+
+	// modifier values to use for R11, SR11, RG11 and SRG11
+	float Block4x4Encoding_R11::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS]
+	{
+		{ -3.0f / 255.0f, -6.0f / 255.0f,  -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f },
+		{ -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f },
+		{ -2.0f / 255.0f, -4.0f / 255.0f,  -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f },
+
+		{ -3.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f },
+		{ -3.0f / 255.0f, -7.0f / 255.0f,  -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f },
+		{ -4.0f / 255.0f, -7.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
+		{ -3.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
+
+		{ -2.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -4.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
+
+		{ -3.0f / 255.0f, -4.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
+		{ -1.0f / 255.0f, -2.0f / 255.0f,  -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f,  9.0f / 255.0f },
+		{ -4.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f,  -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  8.0f / 255.0f },
+		{ -3.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f,  -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  8.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_R11::Block4x4Encoding_R11(void)
+	{
+
+		m_pencodingbitsR11 = nullptr;
+
+	}
+
+	Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_R11::InitFromSource(Block4x4 *a_pblockParent,
+		ColorFloatRGBA *a_pafrgbaSource,
+		unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+
+		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_R11::InitFromEncodingBits(Block4x4 *a_pblockParent,
+		unsigned char *a_paucEncodingBits,
+		ColorFloatRGBA *a_pafrgbaSource,
+		ErrorMetric a_errormetric)
+	{
+		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits;
+
+		// init RGB portion
+		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
+			(unsigned char *)m_pencodingbitsR11,
+			a_pafrgbaSource,
+			a_errormetric);
+
+		// init R11 portion
+		{
+			m_mode = MODE_R11;
+			if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+			{
+				m_fRedBase = (float)(signed char)m_pencodingbitsR11->data.base;
+			}
+			else
+			{
+				m_fRedBase = (float)(unsigned char)m_pencodingbitsR11->data.base;
+			}
+			m_fRedMultiplier = (float)m_pencodingbitsR11->data.multiplier;
+			m_uiRedModifierTableIndex = m_pencodingbitsR11->data.table;
+
+			unsigned long long int ulliSelectorBits = 0;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors0 << 40;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors1 << 32;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors2 << 24;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors3 << 16;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors4 << 8;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors5;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				unsigned int uiShift = 45 - (3 * uiPixel);
+				m_auiRedSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (SELECTORS - 1);
+			}
+
+			// decode the red channel
+			// calc red error
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				float fDecodedPixelData = 0.0f;
+				if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+				{
+					fDecodedPixelData = DecodePixelRed(m_fRedBase, m_fRedMultiplier,
+						m_uiRedModifierTableIndex,
+						m_auiRedSelectors[uiPixel]);
+				}
+				else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+				{
+					fDecodedPixelData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier,
+						m_uiRedModifierTableIndex,
+						m_auiRedSelectors[uiPixel]);
+				}
+				else
+				{
+					assert(0);
+				}
+				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fDecodedPixelData, 0.0f, 0.0f, 1.0f);
+			}
+			CalcBlockError();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_R11::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+		m_mode = MODE_R11;
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			m_fError = FLT_MAX;
+			m_fRedBlockError = FLT_MAX;		// artificially high value
+			CalculateR11(8, 0.0f, 0.0f);
+			m_fError = m_fRedBlockError;
+			break;
+
+		case 1:
+			CalculateR11(8, 2.0f, 1.0f);
+			m_fError = m_fRedBlockError;
+			if (a_fEffort <= 24.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 2:
+			CalculateR11(8, 12.0f, 1.0f);
+			m_fError = m_fRedBlockError;
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			CalculateR11(7, 6.0f, 1.0f);
+			m_fError = m_fRedBlockError;
+			break;
+
+		case 4:
+			CalculateR11(6, 3.0f, 1.0f);
+			m_fError = m_fRedBlockError;
+			break;
+
+		case 5:
+			CalculateR11(5, 1.0f, 0.0f);
+			m_fError = m_fRedBlockError;
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best combination of base color, multiplier and selectors
+	//
+	// a_uiSelectorsUsed limits the number of selector combinations to try
+	// a_fBaseRadius limits the range of base colors to try
+	// a_fMultiplierRadius limits the range of multipliers to try
+	//
+	void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed, 
+												float a_fBaseRadius, float a_fMultiplierRadius)
+	{
+		// maps from virtual (monotonic) selector to ETC selector
+		static const unsigned int auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7};
+
+		// find min/max red
+		float fMinRed = 1.0f;
+		float fMaxRed = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			// ignore border pixels
+			float fAlpha = m_pafrgbaSource[uiPixel].fA;
+			if (isnan(fAlpha))
+			{
+				continue;
+			}
+
+			float fRed = m_pafrgbaSource[uiPixel].fR;
+
+			if (fRed < fMinRed)
+			{
+				fMinRed = fRed;
+			}
+			if (fRed > fMaxRed)
+			{
+				fMaxRed = fRed;
+			}
+		}
+		assert(fMinRed <= fMaxRed);
+
+		float fRedRange = (fMaxRed - fMinRed);
+
+		// try each modifier table entry							  
+		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
+		{
+			for (unsigned int uiMinVirtualSelector = 0; 
+					uiMinVirtualSelector <= (8- a_uiSelectorsUsed); 
+					uiMinVirtualSelector++)
+			{
+				unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
+
+				unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
+				unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
+
+				float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector];
+
+				float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] -
+											s_aafModifierTable[uiTableEntry][uiMinSelector];
+
+				float fCenterRatio = fTableEntryCenter / fTableEntryRange;
+
+				float fCenter = fMinRed + fCenterRatio*fRedRange;
+				fCenter = roundf(255.0f * fCenter) / 255.0f;
+
+				float fMinBase = fCenter - (a_fBaseRadius / 255.0f);
+				if (fMinBase < 0.0f)
+				{
+					fMinBase = 0.0f;
+				}
+
+				float fMaxBase = fCenter + (a_fBaseRadius / 255.0f);
+				if (fMaxBase > 1.0f)
+				{
+					fMaxBase = 1.0f;
+				}
+
+				for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
+				{
+					float fRangeMultiplier = roundf(fRedRange / fTableEntryRange);
+
+					float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius;
+					if (fMinMultiplier < 1.0f)
+					{
+						fMinMultiplier = 0.0f;
+					}
+					else if (fMinMultiplier > 15.0f)
+					{
+						fMinMultiplier = 15.0f;
+					}
+
+					float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius;
+					if (fMaxMultiplier < 1.0f)
+					{
+						fMaxMultiplier = 1.0f;
+					}
+					else if (fMaxMultiplier > 15.0f)
+					{
+						fMaxMultiplier = 15.0f;
+					}
+
+					for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
+					{
+						// find best selector for each pixel
+						unsigned int auiBestSelectors[PIXELS];
+						float afBestRedError[PIXELS];
+						float afBestPixelRed[PIXELS];
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							float fBestPixelRedError = FLT_MAX;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								float fPixelRed = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector);
+
+								ColorFloatRGBA frgba(fPixelRed, m_pafrgbaSource[uiPixel].fG,0.0f,1.0f);
+
+								float fPixelRedError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]);
+
+								if (fPixelRedError < fBestPixelRedError)
+								{
+									fBestPixelRedError = fPixelRedError;
+									auiBestSelectors[uiPixel] = uiSelector;
+									afBestRedError[uiPixel] = fBestPixelRedError;
+									afBestPixelRed[uiPixel] = fPixelRed;
+								}
+							}
+						}
+						float fBlockError = 0.0f;  
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							fBlockError += afBestRedError[uiPixel];
+						}
+						if (fBlockError < m_fRedBlockError)
+						{
+							m_fRedBlockError = fBlockError;
+
+							if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+							{
+								m_fRedBase = 255.0f * fBase;
+							}
+							else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+							{
+								m_fRedBase = (fBase * 255) - 128;
+							}
+							else
+							{
+								assert(0);
+							}
+							m_fRedMultiplier = fMultiplier;
+							m_uiRedModifierTableIndex = uiTableEntry;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiRedSelectors[uiPixel] = auiBestSelectors[uiPixel];
+								float fBestPixelRed = afBestPixelRed[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fBestPixelRed, 0.0f, 0.0f, 1.0f);
+								m_afDecodedAlphas[uiPixel] = 1.0f;
+							}
+						}
+					}
+				}
+
+			}
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_R11::SetEncodingBits(void)
+	{
+		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+		{
+			m_pencodingbitsR11->data.base = (unsigned char)roundf(m_fRedBase);
+		}
+		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			m_pencodingbitsR11->data.base = (signed char)roundf(m_fRedBase);
+		}
+		else
+		{
+			assert(0);
+		}
+		m_pencodingbitsR11->data.table = m_uiRedModifierTableIndex;
+		m_pencodingbitsR11->data.multiplier = (unsigned char)roundf(m_fRedMultiplier);
+
+		unsigned long long int ulliSelectorBits = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiShift = 45 - (3 * uiPixel);
+			ulliSelectorBits |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift;
+		}
+
+		m_pencodingbitsR11->data.selectors0 = ulliSelectorBits >> 40;
+		m_pencodingbitsR11->data.selectors1 = ulliSelectorBits >> 32;
+		m_pencodingbitsR11->data.selectors2 = ulliSelectorBits >> 24;
+		m_pencodingbitsR11->data.selectors3 = ulliSelectorBits >> 16;
+		m_pencodingbitsR11->data.selectors4 = ulliSelectorBits >> 8;
+		m_pencodingbitsR11->data.selectors5 = ulliSelectorBits;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.h b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.h
new file mode 100644
index 0000000..99cb4df
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits_R11;
+
+	// ################################################################################
+	// Block4x4Encoding_R11
+	// ################################################################################
+
+	class Block4x4Encoding_R11 : public Block4x4Encoding_RGB8
+	{
+	public:
+
+		Block4x4Encoding_R11(void);
+		virtual ~Block4x4Encoding_R11(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+			ColorFloatRGBA *a_pafrgbaSource,
+			unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+			unsigned char *a_paucEncodingBits,
+			ColorFloatRGBA *a_pafrgbaSource,
+			ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+		inline float GetRedBase(void) const
+		{
+			return m_fRedBase;
+		}
+
+		inline float GetRedMultiplier(void) const
+		{
+			return m_fRedMultiplier;
+		}
+
+		inline int GetRedTableIndex(void) const
+		{
+			return m_uiRedModifierTableIndex;
+		}
+
+		inline const unsigned int * GetRedSelectors(void) const
+		{
+			return m_auiRedSelectors;
+		}
+
+	protected:
+
+		static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
+		static const unsigned int SELECTOR_BITS = 3;
+		static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
+
+		static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS];
+
+		void CalculateR11(unsigned int a_uiSelectorsUsed, 
+							float a_fBaseRadius, float a_fMultiplierRadius);
+
+		
+
+	
+		inline float DecodePixelRed(float a_fBase, float a_fMultiplier,
+			unsigned int a_uiTableIndex, unsigned int a_uiSelector)
+		{
+			float fMultiplier = a_fMultiplier;
+			if (fMultiplier <= 0.0f)
+			{
+				fMultiplier = 1.0f / 8.0f;
+			}
+
+			float fPixelRed = a_fBase * 8 + 4 +
+				8 * fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector]*255;
+			fPixelRed /= 2047.0f;
+
+			if (fPixelRed < 0.0f)
+			{
+				fPixelRed = 0.0f;
+			}
+			else if (fPixelRed > 1.0f)
+			{
+				fPixelRed = 1.0f;
+			}
+
+			return fPixelRed;
+		}
+
+		Block4x4EncodingBits_R11 *m_pencodingbitsR11;
+
+		float m_fRedBase;
+		float m_fRedMultiplier;
+		float m_fRedBlockError;
+		unsigned int m_uiRedModifierTableIndex;
+		unsigned int m_auiRedSelectors[PIXELS];
+
+		
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.cpp b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.cpp
new file mode 100644
index 0000000..a1bc9c9
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.cpp
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RG11.cpp
+
+Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11).
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RG11.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RG11::Block4x4Encoding_RG11(void)
+	{
+		m_pencodingbitsRG11 = nullptr;
+	}
+
+	Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_RG11::InitFromSource(Block4x4 *a_pblockParent,
+		ColorFloatRGBA *a_pafrgbaSource,
+		unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+
+		m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RG11::InitFromEncodingBits(Block4x4 *a_pblockParent,
+		unsigned char *a_paucEncodingBits,
+		ColorFloatRGBA *a_pafrgbaSource,
+		ErrorMetric a_errormetric)
+	{
+
+		m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits;
+
+		// init RGB portion
+		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
+			(unsigned char *)m_pencodingbitsRG11,
+			a_pafrgbaSource,
+			a_errormetric);
+		m_fError = 0.0f;
+
+		{
+			m_mode = MODE_RG11;
+			if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+			{
+				m_fRedBase = (float)(signed char)m_pencodingbitsRG11->data.baseR;
+				m_fGrnBase = (float)(signed char)m_pencodingbitsRG11->data.baseG;
+			}
+			else
+			{
+				m_fRedBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseR;
+				m_fGrnBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseG;
+			}
+			m_fRedMultiplier = (float)m_pencodingbitsRG11->data.multiplierR;
+			m_fGrnMultiplier = (float)m_pencodingbitsRG11->data.multiplierG;
+			m_uiRedModifierTableIndex = m_pencodingbitsRG11->data.tableIndexR;
+			m_uiGrnModifierTableIndex = m_pencodingbitsRG11->data.tableIndexG;
+
+			unsigned long long int ulliSelectorBitsR = 0;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR0 << 40;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR1 << 32;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR2 << 24;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR3 << 16;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR4 << 8;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR5;
+
+			unsigned long long int ulliSelectorBitsG = 0;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG0 << 40;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG1 << 32;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG2 << 24;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG3 << 16;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG4 << 8;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG5;
+
+			
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				unsigned int uiShift = 45 - (3 * uiPixel);
+				m_auiRedSelectors[uiPixel] = (ulliSelectorBitsR >> uiShift) & (SELECTORS - 1);
+				m_auiGrnSelectors[uiPixel] = (ulliSelectorBitsG >> uiShift) & (SELECTORS - 1);
+			}
+
+			
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				float fRedDecodedData = 0.0f;
+				float fGrnDecodedData = 0.0f;
+				if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+				{
+					fRedDecodedData = DecodePixelRed(m_fRedBase, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]);
+					fGrnDecodedData = DecodePixelRed(m_fGrnBase, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]);
+				}
+				else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+				{
+					fRedDecodedData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]);
+					fGrnDecodedData = DecodePixelRed(m_fGrnBase + 128, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]);
+				}
+				else
+				{
+					assert(0);
+				}
+				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fRedDecodedData, fGrnDecodedData, 0.0f, 1.0f);
+			}
+
+		}
+
+		CalcBlockError();
+ 	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RG11::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			m_fError = FLT_MAX;
+			m_fGrnBlockError = FLT_MAX;		// artificially high value
+			m_fRedBlockError = FLT_MAX;
+			CalculateR11(8, 0.0f, 0.0f);
+			CalculateG11(8, 0.0f, 0.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			break;
+
+		case 1:
+			CalculateR11(8, 2.0f, 1.0f);
+			CalculateG11(8, 2.0f, 1.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			if (a_fEffort <= 24.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 2:
+			CalculateR11(8, 12.0f, 1.0f);
+			CalculateG11(8, 12.0f, 1.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			CalculateR11(7, 6.0f, 1.0f);
+			CalculateG11(7, 6.0f, 1.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			break;
+
+		case 4:
+			CalculateR11(6, 3.0f, 1.0f);
+			CalculateG11(6, 3.0f, 1.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			break;
+
+		case 5:
+			CalculateR11(5, 1.0f, 0.0f);
+			CalculateG11(5, 1.0f, 0.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best combination of base color, multiplier and selectors
+	//
+	// a_uiSelectorsUsed limits the number of selector combinations to try
+	// a_fBaseRadius limits the range of base colors to try
+	// a_fMultiplierRadius limits the range of multipliers to try
+	//
+	void Block4x4Encoding_RG11::CalculateG11(unsigned int a_uiSelectorsUsed,
+		float a_fBaseRadius, float a_fMultiplierRadius)
+	{
+		// maps from virtual (monotonic) selector to etc selector
+		static const unsigned int auiVirtualSelectorMap[8] = { 3, 2, 1, 0, 4, 5, 6, 7 };
+
+		// find min/max Grn
+		float fMinGrn = 1.0f;
+		float fMaxGrn = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			// ignore border pixels
+			float fAlpha = m_pafrgbaSource[uiPixel].fA;
+			if (isnan(fAlpha))
+			{
+				continue;
+			}
+
+			float fGrn = m_pafrgbaSource[uiPixel].fG;
+
+			if (fGrn < fMinGrn)
+			{
+				fMinGrn = fGrn;
+			}
+			if (fGrn > fMaxGrn)
+			{
+				fMaxGrn = fGrn;
+			}
+		}
+		assert(fMinGrn <= fMaxGrn);
+
+		float fGrnRange = (fMaxGrn - fMinGrn);
+
+		// try each modifier table entry							  
+		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
+		{
+			for (unsigned int uiMinVirtualSelector = 0;
+			uiMinVirtualSelector <= (8 - a_uiSelectorsUsed);
+				uiMinVirtualSelector++)
+			{
+				unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
+
+				unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
+				unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
+
+				float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector];
+
+				float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] -
+					s_aafModifierTable[uiTableEntry][uiMinSelector];
+
+				float fCenterRatio = fTableEntryCenter / fTableEntryRange;
+
+				float fCenter = fMinGrn + fCenterRatio*fGrnRange;
+				fCenter = roundf(255.0f * fCenter) / 255.0f;
+
+				float fMinBase = fCenter - (a_fBaseRadius / 255.0f);
+				if (fMinBase < 0.0f)
+				{
+					fMinBase = 0.0f;
+				}
+
+				float fMaxBase = fCenter + (a_fBaseRadius / 255.0f);
+				if (fMaxBase > 1.0f)
+				{
+					fMaxBase = 1.0f;
+				}
+
+				for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
+				{
+					float fRangeMultiplier = roundf(fGrnRange / fTableEntryRange);
+
+					float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius;
+					if (fMinMultiplier < 1.0f)
+					{
+						fMinMultiplier = 0.0f;
+					}
+					else if (fMinMultiplier > 15.0f)
+					{
+						fMinMultiplier = 15.0f;
+					}
+
+					float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius;
+					if (fMaxMultiplier < 1.0f)
+					{
+						fMaxMultiplier = 1.0f;
+					}
+					else if (fMaxMultiplier > 15.0f)
+					{
+						fMaxMultiplier = 15.0f;
+					}
+
+					for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
+					{
+						// find best selector for each pixel
+						unsigned int auiBestSelectors[PIXELS];
+						float afBestGrnError[PIXELS];
+						float afBestPixelGrn[PIXELS];
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							float fBestPixelGrnError = FLT_MAX;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								//DecodePixelRed is not red channel specific
+								float fPixelGrn = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector);
+								
+								ColorFloatRGBA frgba(m_pafrgbaSource[uiPixel].fR, fPixelGrn, 0.0f, 1.0f);
+									
+								float fPixelGrnError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]);
+
+								if (fPixelGrnError < fBestPixelGrnError)
+								{
+									fBestPixelGrnError = fPixelGrnError;
+									auiBestSelectors[uiPixel] = uiSelector;
+									afBestGrnError[uiPixel] = fBestPixelGrnError;
+									afBestPixelGrn[uiPixel] = fPixelGrn;
+								}
+							}
+						}
+						float fBlockError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							fBlockError += afBestGrnError[uiPixel];
+						}
+
+						if (fBlockError < m_fGrnBlockError)
+						{
+							m_fGrnBlockError = fBlockError;
+
+							if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+							{
+								m_fGrnBase = 255.0f * fBase;
+							}
+							else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+							{
+								m_fGrnBase = (fBase * 255) - 128;
+							}
+							else
+							{
+								assert(0);
+							}
+							m_fGrnMultiplier = fMultiplier;
+							m_uiGrnModifierTableIndex = uiTableEntry;
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiGrnSelectors[uiPixel] = auiBestSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel].fG = afBestPixelGrn[uiPixel];
+								m_afDecodedAlphas[uiPixel] = 1.0f;
+							}
+						}
+					}
+				}
+
+			}
+		}
+	}
+	
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RG11::SetEncodingBits(void)
+	{
+		unsigned long long int ulliSelectorBitsR = 0;
+		unsigned long long int ulliSelectorBitsG = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiShift = 45 - (3 * uiPixel);
+			ulliSelectorBitsR |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift;
+			ulliSelectorBitsG |= ((unsigned long long int)m_auiGrnSelectors[uiPixel]) << uiShift;
+		}
+		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+		{
+			m_pencodingbitsRG11->data.baseR = (unsigned char)roundf(m_fRedBase);
+		}
+		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			m_pencodingbitsRG11->data.baseR = (signed char)roundf(m_fRedBase);
+		}
+		else
+		{
+			assert(0);
+		}
+		m_pencodingbitsRG11->data.tableIndexR = m_uiRedModifierTableIndex;
+		m_pencodingbitsRG11->data.multiplierR = (unsigned char)roundf(m_fRedMultiplier);
+
+		m_pencodingbitsRG11->data.selectorsR0 = ulliSelectorBitsR >> 40;
+		m_pencodingbitsRG11->data.selectorsR1 = ulliSelectorBitsR >> 32;
+		m_pencodingbitsRG11->data.selectorsR2 = ulliSelectorBitsR >> 24;
+		m_pencodingbitsRG11->data.selectorsR3 = ulliSelectorBitsR >> 16;
+		m_pencodingbitsRG11->data.selectorsR4 = ulliSelectorBitsR >> 8;
+		m_pencodingbitsRG11->data.selectorsR5 = ulliSelectorBitsR;
+
+		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+		{
+			m_pencodingbitsRG11->data.baseG = (unsigned char)roundf(m_fGrnBase);
+		}
+		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			m_pencodingbitsRG11->data.baseG = (signed char)roundf(m_fGrnBase);
+		}
+		else
+		{
+			assert(0);
+		}
+		m_pencodingbitsRG11->data.tableIndexG = m_uiGrnModifierTableIndex;
+		m_pencodingbitsRG11->data.multiplierG = (unsigned char)roundf(m_fGrnMultiplier);
+
+		m_pencodingbitsRG11->data.selectorsG0 = ulliSelectorBitsG >> 40;
+		m_pencodingbitsRG11->data.selectorsG1 = ulliSelectorBitsG >> 32;
+		m_pencodingbitsRG11->data.selectorsG2 = ulliSelectorBitsG >> 24;
+		m_pencodingbitsRG11->data.selectorsG3 = ulliSelectorBitsG >> 16;
+		m_pencodingbitsRG11->data.selectorsG4 = ulliSelectorBitsG >> 8;
+		m_pencodingbitsRG11->data.selectorsG5 = ulliSelectorBitsG;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.h b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.h
new file mode 100644
index 0000000..4caac1d
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcBlock4x4Encoding_R11.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits_RG11;
+
+	// ################################################################################
+	// Block4x4Encoding_RG11
+	// ################################################################################
+
+	class Block4x4Encoding_RG11 : public Block4x4Encoding_R11
+	{
+		float m_fGrnBase;
+		float m_fGrnMultiplier;
+		float m_fGrnBlockError;
+		unsigned int m_auiGrnSelectors[PIXELS];
+		unsigned int m_uiGrnModifierTableIndex;
+	public:
+
+		Block4x4Encoding_RG11(void);
+		virtual ~Block4x4Encoding_RG11(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+			ColorFloatRGBA *a_pafrgbaSource,
+
+			unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+			unsigned char *a_paucEncodingBits,
+			ColorFloatRGBA *a_pafrgbaSource,
+
+			ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+		Block4x4EncodingBits_RG11 *m_pencodingbitsRG11;
+
+		void CalculateG11(unsigned int a_uiSelectorsUsed, float a_fBaseRadius, float a_fMultiplierRadius);
+
+		inline float GetGrnBase(void) const
+		{
+			return m_fGrnBase;
+		}
+
+		inline float GetGrnMultiplier(void) const
+		{
+			return m_fGrnMultiplier;
+		}
+
+		inline int GetGrnTableIndex(void) const
+		{
+			return m_uiGrnModifierTableIndex;
+		}
+
+		inline const unsigned int * GetGrnSelectors(void) const
+		{
+			return m_auiGrnSelectors;
+		}
+
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.cpp b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.cpp
new file mode 100644
index 0000000..4c8b14c
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.cpp
@@ -0,0 +1,1728 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGB8.cpp
+
+Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8.  
+This encoder is also used for the ETC2 subset of file format RGBA8.
+
+Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8.
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+#include "EtcMath.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+	float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] =
+	{
+		3.0f / 255.0f,
+		6.0f / 255.0f,
+		11.0f / 255.0f,
+		16.0f / 255.0f,
+		23.0f / 255.0f,
+		32.0f / 255.0f,
+		41.0f / 255.0f,
+		64.0f / 255.0f
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void)
+	{
+
+		m_pencodingbitsRGB8 = nullptr;
+
+	}
+
+	Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric)
+	{
+		
+		// handle ETC1 modes
+		Block4x4Encoding_ETC1::InitFromEncodingBits(a_pblockParent,
+													a_paucEncodingBits, a_pafrgbaSource,a_errormetric);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		// detect if there is a T, H or Planar mode present
+		if (m_pencodingbitsRGB8->differential.diff)
+		{
+			int iRed1 = (int)m_pencodingbitsRGB8->differential.red1;
+			int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
+			int iRed2 = iRed1 + iDRed2;
+
+			int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1;
+			int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
+			int iGreen2 = iGreen1 + iDGreen2;
+
+			int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1;
+			int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
+			int iBlue2 = iBlue1 + iDBlue2;
+
+			if (iRed2 < 0 || iRed2 > 31)
+			{
+				InitFromEncodingBits_T();
+			}
+			else if (iGreen2 < 0 || iGreen2 > 31)
+			{
+				InitFromEncodingBits_H();
+			}
+			else if (iBlue2 < 0 || iBlue2 > 31)
+			{
+				InitFromEncodingBits_Planar();
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if T mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void)
+	{
+
+		m_mode = MODE_T;
+
+		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
+								m_pencodingbitsRGB8->t.red1b);
+		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
+		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
+		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_T();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if H mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void)
+	{
+
+		m_mode = MODE_H;
+		
+		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
+		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
+									m_pencodingbitsRGB8->h.green1b);
+		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
+								(m_pencodingbitsRGB8->h.blue1b << 1) + 
+								m_pencodingbitsRGB8->h.blue1c);
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
+		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
+									m_pencodingbitsRGB8->h.green2b);
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		// used to determine the LSB of the CW
+		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
+		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
+		if (uiRGB1 >= uiRGB2)
+		{
+			m_uiCW1++;
+		}
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_H();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if Planar mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void)
+	{
+
+		m_mode = MODE_PLANAR;
+
+		unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed;
+		unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) +
+										m_pencodingbitsRGB8->planar.originGreen2);
+		unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) +
+										(m_pencodingbitsRGB8->planar.originBlue2 << 3) +
+										(m_pencodingbitsRGB8->planar.originBlue3 << 1) +
+										m_pencodingbitsRGB8->planar.originBlue4);
+
+		unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) +
+									m_pencodingbitsRGB8->planar.horizRed2);
+		unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen;
+		unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) +
+									m_pencodingbitsRGB8->planar.horizBlue2);
+
+		unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) +
+									m_pencodingbitsRGB8->planar.vertRed2);
+		unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) +
+									m_pencodingbitsRGB8->planar.vertGreen2);
+		unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue);
+		m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue);
+
+		DecodePixels_Planar();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			Block4x4Encoding_ETC1::PerformFirstIteration();
+			if (m_boolDone)
+			{
+				break;
+			}
+			TryPlanar(0);
+			SetDoneIfPerfect();
+			if (m_boolDone)
+			{
+				break;
+			}
+			TryTAndH(0);
+			break;
+
+		case 1:
+			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1);
+			break;
+
+		case 3:
+			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 4:
+			Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1);
+			break;
+
+		case 5:
+			TryPlanar(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryTAndH(1);
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			Block4x4Encoding_ETC1::TryDegenerates1();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			Block4x4Encoding_ETC1::TryDegenerates2();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 9:
+			Block4x4Encoding_ETC1::TryDegenerates3();
+			if (a_fEffort <= 89.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 10:
+			Block4x4Encoding_ETC1::TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in Planar mode
+	// save this encoding if it improves the error
+	//
+	void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		encodingTry.CalculatePlanarCornerColors();
+
+		encodingTry.DecodePixels_Planar();
+
+		encodingTry.CalcBlockError();
+
+		if (a_uiRadius > 0)
+		{
+			encodingTry.TwiddlePlanar();
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_PLANAR;
+			m_boolDiff = true;
+			m_boolFlip = false;
+			m_frgbaColor1 = encodingTry.m_frgbaColor1;
+			m_frgbaColor2 = encodingTry.m_frgbaColor2;
+			m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+			}
+
+			m_fError = encodingTry.m_fError;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode or H mode
+	// save this encoding if it improves the error
+	//
+	void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius)
+	{
+
+		CalculateBaseColorsForTAndH();
+
+		TryT(a_uiRadius);
+
+		TryH(a_uiRadius);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate original values for base colors
+	// store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2
+	//
+	void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void)
+	{
+
+		ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f;
+
+		// find pixel farthest from average gray line
+		unsigned int uiFarthestPixel = 0;
+		float fFarthestGrayDistance2 = 0.0f;
+		unsigned int uiTransparentPixels = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			// don't count transparent
+			if (m_pafrgbaSource[uiPixel].fA == 0.0f)
+			{
+				uiTransparentPixels++;
+			}
+			else
+			{
+				float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage);
+
+				if (fGrayDistance2 > fFarthestGrayDistance2)
+				{
+					uiFarthestPixel = uiPixel;
+					fFarthestGrayDistance2 = fGrayDistance2;
+				}
+			}
+		}
+		// a transparent block should not reach this method
+		assert(uiTransparentPixels < PIXELS);
+
+		// set the original base colors to:
+		//		half way to the farthest pixel and
+		//		the mirror color on the other side of the average
+		ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f;
+		m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4();
+		m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4();	// the "other side" might be out of range
+
+		// move base colors to find best fit
+		for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++)
+		{
+			// find the center of pixels closest to each color
+			float fPixelsCloserToColor1 = 0.0f;
+			ColorFloatRGBA frgbSumPixelsCloserToColor1;
+			float fPixelsCloserToColor2 = 0.0f;
+			ColorFloatRGBA frgbSumPixelsCloserToColor2;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				// don't count transparent pixels
+				if (m_pafrgbaSource[uiPixel].fA == 0.0f)
+				{
+					continue;
+				}
+
+				float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH);
+				float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH);
+
+				ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * m_pafrgbaSource[uiPixel].fA;
+					
+				if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2)
+				{
+					fPixelsCloserToColor1 += m_pafrgbaSource[uiPixel].fA;
+					frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource;
+				}
+				else
+				{
+					fPixelsCloserToColor2 += m_pafrgbaSource[uiPixel].fA;
+					frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource;
+				}
+			}
+			if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f)
+			{
+				break;
+			}
+
+			ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4();
+			ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4();
+
+			if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR &&
+				frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG &&
+				frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB &&
+				frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR &&
+				frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG &&
+				frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB)
+			{
+				break;
+			}
+
+			m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels;
+			m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
+	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
+	//
+	void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_T;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
+			//
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryT
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+		
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = m_frgbaColor1;
+		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = m_frgbaColor2;
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+		
+		// try each selector
+		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
+														m_pafrgbaSource[uiPixel]);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
+	// TWIDDLE_RADIUS of 2 is WAY too slow
+	//
+	void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_H;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryH
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+		
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+		
+		// try each selector
+		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
+														m_pafrgbaSource[uiPixel]);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// use linear regression to find the best fit for colors along the edges of the 4x4 block
+	//
+	void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void)
+	{
+		ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE];
+		ColorFloatRGBA frgbaSlope;
+		ColorFloatRGBA frgbaOffset;
+
+		// top edge
+		afrgbaRegression[0] = m_pafrgbaSource[0];
+		afrgbaRegression[1] = m_pafrgbaSource[4];
+		afrgbaRegression[2] = m_pafrgbaSource[8];
+		afrgbaRegression[3] = m_pafrgbaSource[12];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor1 = frgbaOffset;
+		m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset;
+
+		// left edge
+		afrgbaRegression[0] = m_pafrgbaSource[0];
+		afrgbaRegression[1] = m_pafrgbaSource[1];
+		afrgbaRegression[2] = m_pafrgbaSource[2];
+		afrgbaRegression[3] = m_pafrgbaSource[3];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f;		// average with top edge
+		m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset;
+
+		// right edge
+		afrgbaRegression[0] = m_pafrgbaSource[12];
+		afrgbaRegression[1] = m_pafrgbaSource[13];
+		afrgbaRegression[2] = m_pafrgbaSource[14];
+		afrgbaRegression[3] = m_pafrgbaSource[15];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f;		// average with top edge
+
+		// bottom edge
+		afrgbaRegression[0] = m_pafrgbaSource[3];
+		afrgbaRegression[1] = m_pafrgbaSource[7];
+		afrgbaRegression[2] = m_pafrgbaSource[11];
+		afrgbaRegression[3] = m_pafrgbaSource[15];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f;		// average with left edge
+
+		// quantize corner colors to 6/7/6
+		m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6();
+		m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6();
+		m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing R, G and B independently
+	//
+	// R, G and B decoding and errors are independent, so R, G and B twiddles can be independent
+	//
+	// return true if improvement
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanar(void)
+	{
+		bool boolImprovement = false;
+
+		while (TwiddlePlanarR())
+		{
+			boolImprovement = true;
+		}
+
+		while (TwiddlePlanarG())
+		{
+			boolImprovement = true;
+		}
+
+		while (TwiddlePlanarB())
+		{
+			boolImprovement = true;
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing R
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarR()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f);
+		int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f);
+		int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f);
+
+		for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++)
+		{
+			// check for out of range
+			if (iTryOriginRed < 0 || iTryOriginRed > 63)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f;
+
+			for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++)
+			{
+				// check for out of range
+				if (iTryHorizRed < 0 || iTryHorizRed > 63)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f;
+
+				for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++)
+				{
+					// check for out of range
+					if (iTryVertRed < 0 || iTryVertRed > 63)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing G
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarG()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f);
+		int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f);
+		int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f);
+
+		for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++)
+		{
+			// check for out of range
+			if (iTryOriginGreen < 0 || iTryOriginGreen > 127)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f;
+
+			for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++)
+			{
+				// check for out of range
+				if (iTryHorizGreen < 0 || iTryHorizGreen > 127)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f;
+
+				for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++)
+				{
+					// check for out of range
+					if (iTryVertGreen < 0 || iTryVertGreen > 127)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginGreen == iOriginGreen && 
+						iTryHorizGreen == iHorizGreen && 
+						iTryVertGreen == iVertGreen)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing B
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarB()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f);
+		int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f);
+		int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f);
+
+		for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++)
+		{
+			// check for out of range
+			if (iTryOriginBlue < 0 || iTryOriginBlue > 63)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f;
+
+			for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++)
+			{
+				// check for out of range
+				if (iTryHorizBlue < 0 || iTryHorizBlue > 63)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f;
+
+				for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++)
+				{
+					// check for out of range
+					if (iTryVertBlue < 0 || iTryVertBlue > 63)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits(void)
+	{
+
+		switch (m_mode)
+		{
+		case MODE_ETC1:
+			Block4x4Encoding_ETC1::SetEncodingBits();
+			break;
+
+		case MODE_T:
+			SetEncodingBits_T();
+			break;
+
+		case MODE_H:
+			SetEncodingBits_H();
+			break;
+
+		case MODE_PLANAR:
+			SetEncodingBits_Planar();
+			break;
+
+		default:
+			assert(false);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for T mode
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_T(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_T);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
+		m_pencodingbitsRGB8->t.red1b = uiRed1;
+		m_pencodingbitsRGB8->t.green1 = uiGreen1;
+		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
+
+		m_pencodingbitsRGB8->t.red2 = uiRed2;
+		m_pencodingbitsRGB8->t.green2 = uiGreen2;
+		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
+
+		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
+		m_pencodingbitsRGB8->t.db = m_uiCW1;
+
+		m_pencodingbitsRGB8->t.diff = 1;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->t.detect1 = 0;
+		m_pencodingbitsRGB8->t.detect2 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		if (iRed2 >= 4)
+		{
+			m_pencodingbitsRGB8->t.detect1 = 7;
+			m_pencodingbitsRGB8->t.detect2 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->t.detect1 = 0;
+			m_pencodingbitsRGB8->t.detect2 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+
+			// make sure red overflows
+			assert(iRed2 < 0 || iRed2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for H mode
+	//
+	// colors and selectors may need to swap in order to generate lsb of distance index
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_H(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_H);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+		bool boolOddDistance = m_uiCW1 & 1;
+		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed2;
+			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed1;
+			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed1;
+			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed2;
+			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+
+		m_pencodingbitsRGB8->h.diff = 1;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
+		}
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->h.detect1 = 0;
+		m_pencodingbitsRGB8->h.detect2 = 0;
+		m_pencodingbitsRGB8->h.detect3 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->h.detect1 = 1;
+		}
+		if (iGreen2 >= 4)
+		{
+			m_pencodingbitsRGB8->h.detect2 = 7;
+			m_pencodingbitsRGB8->h.detect3 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.detect2 = 0;
+			m_pencodingbitsRGB8->h.detect3 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+
+			// make sure red doesn't overflow and green does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 < 0 || iGreen2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for Planar mode
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_PLANAR);
+		assert(m_boolDiff == true);
+
+		unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f);
+		unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f);
+		unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f);
+
+		unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f);
+		unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f);
+		unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f);
+
+		unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f);
+		unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f);
+		unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f);
+
+		m_pencodingbitsRGB8->planar.originRed = uiOriginRed;
+		m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6;
+		m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen;
+		m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5;
+		m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3;
+		m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1;
+		m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue;
+
+		m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1;
+		m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed;
+		m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen;
+		m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5;
+		m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue;
+
+		m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3;
+		m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed;
+		m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2;
+		m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen;
+		m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue;
+
+		m_pencodingbitsRGB8->planar.diff = 1;
+
+		// create valid RG differentials and an invalid B differential to trigger planar mode
+		m_pencodingbitsRGB8->planar.detect1 = 0;
+		m_pencodingbitsRGB8->planar.detect2 = 0;
+		m_pencodingbitsRGB8->planar.detect3 = 0;
+		m_pencodingbitsRGB8->planar.detect4 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->planar.detect1 = 1;
+		}
+		if (iGreen2 < 0 || iGreen2 > 31)
+		{
+			m_pencodingbitsRGB8->planar.detect2 = 1;
+		}
+		if (iBlue2 >= 4)
+		{
+			m_pencodingbitsRGB8->planar.detect3 = 7;
+			m_pencodingbitsRGB8->planar.detect4 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->planar.detect3 = 0;
+			m_pencodingbitsRGB8->planar.detect4 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+			iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
+
+			// make sure red and green don't overflow and blue does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 >= 0 && iGreen2 <= 31);
+			assert(iBlue2 < 0 || iBlue2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for T mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_T(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				break;
+
+			case 2:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for H mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_H(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
+				break;
+
+			case 2:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for Planar mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_Planar(void)
+	{
+
+		int iRO = (int)roundf(m_frgbaColor1.fR * 255.0f);
+		int iGO = (int)roundf(m_frgbaColor1.fG * 255.0f);
+		int iBO = (int)roundf(m_frgbaColor1.fB * 255.0f);
+
+		int iRH = (int)roundf(m_frgbaColor2.fR * 255.0f);
+		int iGH = (int)roundf(m_frgbaColor2.fG * 255.0f);
+		int iBH = (int)roundf(m_frgbaColor2.fB * 255.0f);
+
+		int iRV = (int)roundf(m_frgbaColor3.fR * 255.0f);
+		int iGV = (int)roundf(m_frgbaColor3.fG * 255.0f);
+		int iBV = (int)roundf(m_frgbaColor3.fB * 255.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			int iX = (int)(uiPixel >> 2);
+			int iY = (int)(uiPixel & 3);
+
+			int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2;
+			int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2;
+			int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2;
+
+			ColorFloatRGBA frgba;
+			frgba.fR = (float)iR / 255.0f;
+			frgba.fG = (float)iG / 255.0f;
+			frgba.fB = (float)iB / 255.0f;
+			frgba.fA = 1.0f;
+
+			m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a linear regression for the a_uiPixels in a_pafrgbaPixels[]
+	//
+	// output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset
+	//
+	void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
+												ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset)
+	{
+		typedef struct
+		{
+			float f[4];
+		} Float4;
+
+		Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels);
+		Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope);
+		Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset);
+
+		float afX[MAX_PLANAR_REGRESSION_SIZE];
+		float afY[MAX_PLANAR_REGRESSION_SIZE];
+
+		// handle r, g and b separately.  don't bother with a
+		for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++)
+			{
+				afX[uiPixel] = (float)uiPixel;
+				afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent];
+				
+			}
+			Etc::Regression(afX, afY, a_uiPixels,
+				&(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent]));
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.h b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.h
new file mode 100644
index 0000000..e405223
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_ETC1.h"
+
+namespace Etc
+{
+
+	class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1
+	{
+	public:
+
+		Block4x4Encoding_RGB8(void);
+		virtual ~Block4x4Encoding_RGB8(void);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource,
+
+											ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+		
+		virtual void SetEncodingBits(void);
+
+		inline ColorFloatRGBA GetColor3(void) const
+		{
+			return m_frgbaColor3;
+		}
+
+	protected:
+
+		static const unsigned int PLANAR_CORNER_COLORS = 3;
+		static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4;
+		static const unsigned int TH_DISTANCES = 8;
+
+		static float s_afTHDistanceTable[TH_DISTANCES];
+
+		void TryPlanar(unsigned int a_uiRadius);
+		void TryTAndH(unsigned int a_uiRadius);
+
+		void InitFromEncodingBits_Planar(void);
+
+		ColorFloatRGBA	m_frgbaColor3;		// used for planar
+
+		void SetEncodingBits_T(void);
+		void SetEncodingBits_H(void);
+		void SetEncodingBits_Planar(void);
+
+		// state shared between iterations
+		ColorFloatRGBA	m_frgbaOriginalColor1_TAndH;
+		ColorFloatRGBA	m_frgbaOriginalColor2_TAndH;
+
+		void CalculateBaseColorsForTAndH(void);
+		void TryT(unsigned int a_uiRadius);
+		void TryT_BestSelectorCombination(void);
+		void TryH(unsigned int a_uiRadius);
+		void TryH_BestSelectorCombination(void);
+
+	private:
+
+		void InitFromEncodingBits_T(void);
+		void InitFromEncodingBits_H(void);
+
+		void CalculatePlanarCornerColors(void);
+
+		void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
+			ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset);
+
+		bool TwiddlePlanar(void);
+		bool TwiddlePlanarR();
+		bool TwiddlePlanarG();
+		bool TwiddlePlanarB();
+
+		void DecodePixels_T(void);
+		void DecodePixels_H(void);
+		void DecodePixels_Planar(void);
+
+	};
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.cpp b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.cpp
new file mode 100644
index 0000000..fc0678d
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.cpp
@@ -0,0 +1,1819 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGB8A1.cpp contains:
+	Block4x4Encoding_RGB8A1
+	Block4x4Encoding_RGB8A1_Opaque
+	Block4x4Encoding_RGB8A1_Transparent
+
+These encoders are used when targetting file format RGB8A1.
+
+Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque
+Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent
+Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGB8A1.h"
+
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+	
+	// ####################################################################################################
+	// Block4x4Encoding_RGB8A1
+	// ####################################################################################################
+
+	float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] =
+	{
+		{ 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f },
+		{ 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f },
+		{ 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f },
+		{ 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f },
+		{ 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f },
+		{ 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f },
+		{ 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f },
+		{ 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void)
+	{
+		m_pencodingbitsRGB8 = nullptr;
+		m_boolOpaque = false;
+		m_boolTransparent = false;
+		m_boolPunchThroughPixels = true;
+
+	}
+	Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_RGB8A1::InitFromSource(Block4x4 *a_pblockParent,
+													ColorFloatRGBA *a_pafrgbaSource,
+													unsigned char *a_paucEncodingBits,
+													ErrorMetric a_errormetric)
+	{
+
+		Block4x4Encoding_RGB8::InitFromSource(a_pblockParent,
+			a_pafrgbaSource,
+			a_paucEncodingBits,
+			a_errormetric);
+
+		m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE;
+		m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT;
+		m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels();
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			if (m_pafrgbaSource[uiPixel].fA >= 0.5f)
+			{
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+			}
+			else
+			{
+				m_afDecodedAlphas[uiPixel] = 0.0f;
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric)
+	{
+
+
+		InitFromEncodingBits_ETC1(a_pblockParent,
+			a_paucEncodingBits,
+			a_pafrgbaSource,
+			a_errormetric);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		// detect if there is a T, H or Planar mode present
+		int iRed1 = m_pencodingbitsRGB8->differential.red1;
+		int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
+		int iRed2 = iRed1 + iDRed2;
+
+		int iGreen1 = m_pencodingbitsRGB8->differential.green1;
+		int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
+		int iGreen2 = iGreen1 + iDGreen2;
+
+		int iBlue1 = m_pencodingbitsRGB8->differential.blue1;
+		int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
+		int iBlue2 = iBlue1 + iDBlue2;
+
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			InitFromEncodingBits_T();
+		}
+		else if (iGreen2 < 0 || iGreen2 > 31)
+		{
+			InitFromEncodingBits_H();
+		}
+		else if (iBlue2 < 0 || iBlue2 > 31)
+		{
+			Block4x4Encoding_RGB8::InitFromEncodingBits_Planar();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode.
+	// if it isn't an ETC1 mode, this will be overwritten later
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
+		unsigned char *a_paucEncodingBits,
+		ColorFloatRGBA *a_pafrgbaSource,
+		ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,
+			a_errormetric);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = true;
+		m_boolFlip = m_pencodingbitsRGB8->differential.flip;
+		m_boolOpaque = m_pencodingbitsRGB8->differential.diff;
+
+		int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2;
+		if (iR2 < 0)
+		{
+			iR2 = 0;
+		}
+		else if (iR2 > 31)
+		{
+			iR2 = 31;
+		}
+
+		int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2;
+		if (iG2 < 0)
+		{
+			iG2 = 0;
+		}
+		else if (iG2 > 31)
+		{
+			iG2 = 31;
+		}
+
+		int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2;
+		if (iB2 < 0)
+		{
+			iB2 = 0;
+		}
+		else if (iB2 > 31)
+		{
+			iB2 = 31;
+		}
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
+
+		m_uiCW1 = m_pencodingbitsRGB8->differential.cw1;
+		m_uiCW2 = m_pencodingbitsRGB8->differential.cw2;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		Decode_ETC1();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if T mode is detected
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void)
+	{
+		m_mode = MODE_T;
+
+		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
+								m_pencodingbitsRGB8->t.red1b);
+		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
+		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
+		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_T();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if H mode is detected
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void)
+	{
+		m_mode = MODE_H;
+
+		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
+		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
+									m_pencodingbitsRGB8->h.green1b);
+		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
+								(m_pencodingbitsRGB8->h.blue1b << 1) +
+								m_pencodingbitsRGB8->h.blue1c);
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
+		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
+									m_pencodingbitsRGB8->h.green2b);
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		// used to determine the LSB of the CW
+		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
+		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
+		if (uiRGB1 >= uiRGB2)
+		{
+			m_uiCW1++;
+		}
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_H();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::Decode_ETC1(void)
+	{
+
+		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
+
+		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
+		{
+			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
+			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
+
+			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
+
+			float fDelta;
+			if (m_boolOpaque)
+				fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
+			else 
+				fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]];
+
+			if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR)
+			{
+				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+				m_afDecodedAlphas[uiPixel] = 0.0f;
+			}
+			else
+			{
+				m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for T mode, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::DecodePixels_T(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 2:
+				if (m_boolOpaque == false)
+				{
+					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+					m_afDecodedAlphas[uiPixel] = 0.0f;
+				}
+				else
+				{
+					m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
+					m_afDecodedAlphas[uiPixel] = 1.0f;
+				}
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for H mode, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::DecodePixels_H(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 2:
+				if (m_boolOpaque == false)
+				{
+					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+					m_afDecodedAlphas[uiPixel] = 0.0f;
+				}
+				else
+				{
+					m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+					m_afDecodedAlphas[uiPixel] = 1.0f;
+				}
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+			}
+
+		}
+
+	}
+
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	// RGB8A1 can't use individual mode
+	// RGB8A1 with transparent pixels can't use planar mode
+	//
+	void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolOpaque);
+		assert(!m_boolTransparent);
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			PerformFirstIteration();
+			break;
+
+		case 1:
+			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			if (a_fEffort <= 39.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH();
+			TryT(1);
+			TryH(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 4:
+			TryDegenerates1();
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			TryDegenerates2();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryDegenerates3();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+
+		SetDoneIfPerfect();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_RGB8A1::PerformFirstIteration(void)
+	{
+		Block4x4Encoding_ETC1::CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// mostly copied from ETC1
+	// differences:
+	//		Block4x4Encoding_RGB8A1 encodingTry = *this;
+	//
+	void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, 
+													int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
+								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
+
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryDifferentialHalf(&trys.m_half1);
+		encodingTry.TryDifferentialHalf(&trys.m_half2);
+
+		// find best halves that are within differential range
+		DifferentialTrys::Try *ptryBest1 = nullptr;
+		DifferentialTrys::Try *ptryBest2 = nullptr;
+		encodingTry.m_fError = FLT_MAX;
+
+		// see if the best of each half are in differential range
+		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
+		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
+		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
+		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
+		{
+			ptryBest1 = trys.m_half1.m_ptryBest;
+			ptryBest2 = trys.m_half2.m_ptryBest;
+			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+		}
+		else
+		{
+			// else, find the next best halves that are in differential range
+			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
+			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
+				ptry1++)
+			{
+				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
+				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
+					ptry2++)
+				{
+					iDRed = ptry2->m_iRed - ptry1->m_iRed;
+					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
+					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
+					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
+					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
+					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
+
+					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
+					{
+						float fError = ptry1->m_fError + ptry2->m_fError;
+
+						if (fError < encodingTry.m_fError)
+						{
+							encodingTry.m_fError = fError;
+
+							ptryBest1 = ptry1;
+							ptryBest2 = ptry2;
+						}
+					}
+
+				}
+			}
+			assert(encodingTry.m_fError < FLT_MAX);
+			assert(ptryBest1 != nullptr);
+			assert(ptryBest2 != nullptr);
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = true;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			m_fError = 0.0f;
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				if (uiSelector1 == TRANSPARENT_SELECTOR)
+				{
+					m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA();
+					m_afDecodedAlphas[uiPixel1] = 0.0f;
+				}
+				else
+				{
+					float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1];
+					m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+					m_afDecodedAlphas[uiPixel1] = 1.0f;
+				}
+
+				if (uiSelector2 == TRANSPARENT_SELECTOR)
+				{
+					m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA();
+					m_afDecodedAlphas[uiPixel2] = 0.0f;
+				}
+				else
+				{
+					float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2];
+					m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+					m_afDecodedAlphas[uiPixel2] = 1.0f;
+				}
+
+				float fDeltaA1 = m_afDecodedAlphas[uiPixel1] - m_pafrgbaSource[uiPixel1].fA;
+				m_fError += fDeltaA1 * fDeltaA1;
+				float fDeltaA2 = m_afDecodedAlphas[uiPixel2] - m_pafrgbaSource[uiPixel2].fA;
+				m_fError += fDeltaA2 * fDeltaA2;
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
+			m_fError = m_fError1 + m_fError2;
+
+			// sanity check
+			{
+				int iRed1 = m_frgbaColor1.IntRed(31.0f);
+				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+				int iRed2 = m_frgbaColor2.IntRed(31.0f);
+				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+				iDRed = iRed2 - iRed1;
+				iDGreen = iGreen2 - iGreen1;
+				iDBlue = iBlue2 - iBlue1;
+
+				assert(iDRed >= -4 && iDRed < 4);
+				assert(iDGreen >= -4 && iDGreen < 4);
+				assert(iDBlue >= -4 && iDBlue < 4);
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// mostly copied from ETC1
+	// differences:
+	//		uses s_aafCwOpaqueUnsetTable
+	//		color for selector set to 0,0,0,0
+	//
+	void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
+		iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
+			iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 31);
+
+			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
+			iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
+				iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 31);
+
+				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
+				iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
+					iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 31);
+
+					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
+
+					ptry->m_iRed = iRed;
+					ptry->m_iGreen = iGreen;
+					ptry->m_iBlue = iBlue;
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedColors[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+							FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = ColorFloatRGBA();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								if (pfrgbaSourcePixel->fA < 0.5f)
+								{
+									uiSelector = TRANSPARENT_SELECTOR;
+								}
+								else if (uiSelector == TRANSPARENT_SELECTOR)
+								{
+									continue;
+								}
+
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError;
+								
+								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
+																	*pfrgbaSourcePixel);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+								if (uiSelector == TRANSPARENT_SELECTOR)
+								{
+									break;
+								}
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
+	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
+	//
+	void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_T;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
+			//
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryT
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = m_frgbaColor1;
+		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = ColorFloatRGBA();
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+
+		// try each selector
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiMinSelector = 0;
+			unsigned int uiMaxSelector = SELECTORS - 1;
+
+			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
+			{
+				uiMinSelector = 2;
+				uiMaxSelector = 2;
+			}
+
+			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
+			{
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
+													m_pafrgbaSource[uiPixel]);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in H mode
+	// save this encoding if it improves the error
+	//
+	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
+	// TWIDDLE_RADIUS of 2 is WAY too slow
+	//
+	void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_H;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryH
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void)
+	{
+
+		// abort if colors and CW will pose an encoding problem
+		{
+			unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f);
+			unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f);
+			unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f);
+			unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+
+			unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f);
+			unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f);
+			unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f);
+			unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+			unsigned int uiCWLsb = m_uiCW1 & 1;
+
+			if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 ||
+				(uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1)
+			{
+				return;
+			}
+		}
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+											FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = ColorFloatRGBA();;
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+
+
+		// try each selector
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiMinSelector = 0;
+			unsigned int uiMaxSelector = SELECTORS - 1;
+
+			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
+			{
+				uiMinSelector = 2;
+				uiMaxSelector = 2;
+			}
+
+			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
+			{
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
+													m_pafrgbaSource[uiPixel]);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 1 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates1(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 2 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates2(void)
+	{
+
+		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 3 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates3(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 4 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates4(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits(void)
+	{
+		switch (m_mode)
+		{
+		case MODE_ETC1:
+			SetEncodingBits_ETC1();
+			break;
+
+		case MODE_T:
+			SetEncodingBits_T();
+			break;
+
+		case MODE_H:
+			SetEncodingBits_H();
+			break;
+
+		case MODE_PLANAR:
+			Block4x4Encoding_RGB8::SetEncodingBits_Planar();
+			break;
+
+		default:
+			assert(false);
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if ETC1 mode
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void)
+	{
+
+		// there is no individual mode in RGB8A1
+		assert(m_boolDiff);
+
+		int iRed1 = m_frgbaColor1.IntRed(31.0f);
+		int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+		int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+		int iRed2 = m_frgbaColor2.IntRed(31.0f);
+		int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+		int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+		int iDRed2 = iRed2 - iRed1;
+		int iDGreen2 = iGreen2 - iGreen1;
+		int iDBlue2 = iBlue2 - iBlue1;
+
+		assert(iDRed2 >= -4 && iDRed2 < 4);
+		assert(iDGreen2 >= -4 && iDGreen2 < 4);
+		assert(iDBlue2 >= -4 && iDBlue2 < 4);
+
+		m_pencodingbitsRGB8->differential.red1 = iRed1;
+		m_pencodingbitsRGB8->differential.green1 = iGreen1;
+		m_pencodingbitsRGB8->differential.blue1 = iBlue1;
+
+		m_pencodingbitsRGB8->differential.dred2 = iDRed2;
+		m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
+		m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
+
+		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
+		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
+
+		SetEncodingBits_Selectors();
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		m_pencodingbitsRGB8->individual.flip = m_boolFlip;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if T mode
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_T);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
+		m_pencodingbitsRGB8->t.red1b = uiRed1;
+		m_pencodingbitsRGB8->t.green1 = uiGreen1;
+		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
+
+		m_pencodingbitsRGB8->t.red2 = uiRed2;
+		m_pencodingbitsRGB8->t.green2 = uiGreen2;
+		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
+
+		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
+		m_pencodingbitsRGB8->t.db = m_uiCW1;
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->t.detect1 = 0;
+		m_pencodingbitsRGB8->t.detect2 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		if (iRed2 >= 4)
+		{
+			m_pencodingbitsRGB8->t.detect1 = 7;
+			m_pencodingbitsRGB8->t.detect2 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->t.detect1 = 0;
+			m_pencodingbitsRGB8->t.detect2 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+
+			// make sure red overflows
+			assert(iRed2 < 0 || iRed2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if H mode
+	//
+	// colors and selectors may need to swap in order to generate lsb of distance index
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_H);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+		bool boolOddDistance = m_uiCW1 & 1;
+		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed2;
+			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed1;
+			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed1;
+			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed2;
+			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
+		}
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->h.detect1 = 0;
+		m_pencodingbitsRGB8->h.detect2 = 0;
+		m_pencodingbitsRGB8->h.detect3 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->h.detect1 = 1;
+		}
+		if (iGreen2 >= 4)
+		{
+			m_pencodingbitsRGB8->h.detect2 = 7;
+			m_pencodingbitsRGB8->h.detect3 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.detect2 = 0;
+			m_pencodingbitsRGB8->h.detect3 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+
+			// make sure red doesn't overflow and green does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 < 0 || iGreen2 > 31);
+		}
+
+	}
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGB8A1_Opaque
+	// ####################################################################################################
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8A1_Opaque::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolPunchThroughPixels);
+		assert(!m_boolTransparent);
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			PerformFirstIteration();
+			break;
+
+		case 1:
+			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 3:
+			Block4x4Encoding_RGB8::TryPlanar(1);
+			break;
+
+		case 4:
+			Block4x4Encoding_RGB8::TryTAndH(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			Block4x4Encoding_ETC1::TryDegenerates1();
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			Block4x4Encoding_ETC1::TryDegenerates2();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			Block4x4Encoding_ETC1::TryDegenerates3();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			Block4x4Encoding_ETC1::TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_RGB8A1_Opaque::PerformFirstIteration(void)
+	{
+		
+		// set decoded alphas
+		// calculate alpha error
+		m_fError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+
+			float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA;
+			m_fError += fDeltaA * fDeltaA;
+		}
+
+		CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_RGB8::TryPlanar(0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_RGB8::TryTAndH(0);
+		SetDoneIfPerfect();
+	}
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGB8A1_Transparent
+	// ####################################################################################################
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8A1_Transparent::PerformIteration(float )
+	{
+		assert(!m_boolOpaque);
+		assert(m_boolTransparent);
+		assert(!m_boolDone);
+		assert(m_uiEncodingIterations == 0);
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = true;
+		m_boolFlip = false;
+
+		m_uiCW1 = 0;
+		m_uiCW2 = 0;
+
+		m_frgbaColor1 = ColorFloatRGBA();
+		m_frgbaColor2 = ColorFloatRGBA();
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR;
+
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+			m_afDecodedAlphas[uiPixel] = 0.0f;
+		}
+
+		CalcBlockError();
+
+		m_boolDone = true;
+		m_uiEncodingIterations++;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.h b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.h
new file mode 100644
index 0000000..91b9355
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcErrorMetric.h"
+#include "EtcBlock4x4EncodingBits.h"
+
+namespace Etc
+{
+
+	// ################################################################################
+	// Block4x4Encoding_RGB8A1
+	// RGB8A1 if not completely opaque or transparent
+	// ################################################################################
+
+	class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8
+	{
+	public:
+
+		static const unsigned int TRANSPARENT_SELECTOR = 2;
+
+		Block4x4Encoding_RGB8A1(void);
+		virtual ~Block4x4Encoding_RGB8A1(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+									ColorFloatRGBA *a_pafrgbaSource,
+									unsigned char *a_paucEncodingBits,
+									ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource,
+											ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+		void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
+										unsigned char *a_paucEncodingBits,
+										ColorFloatRGBA *a_pafrgbaSource,
+										ErrorMetric a_errormetric);
+
+		void InitFromEncodingBits_T(void);
+		void InitFromEncodingBits_H(void);
+
+		void PerformFirstIteration(void);
+
+		void Decode_ETC1(void);
+		void DecodePixels_T(void);
+		void DecodePixels_H(void);
+		void SetEncodingBits_ETC1(void);
+		void SetEncodingBits_T(void);
+		void SetEncodingBits_H(void);
+
+	protected:
+
+		bool m_boolOpaque;				// all source pixels have alpha >= 0.5
+		bool m_boolTransparent;			// all source pixels have alpha < 0.5
+		bool m_boolPunchThroughPixels;	// some source pixels have alpha < 0.5
+
+		static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS];
+
+	private:
+
+		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+								int a_iGrayOffset1, int a_iGrayOffset2);
+		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
+
+		void TryT(unsigned int a_uiRadius);
+		void TryT_BestSelectorCombination(void);
+		void TryH(unsigned int a_uiRadius);
+		void TryH_BestSelectorCombination(void);
+
+		void TryDegenerates1(void);
+		void TryDegenerates2(void);
+		void TryDegenerates3(void);
+		void TryDegenerates4(void);
+
+	};
+
+	// ################################################################################
+	// Block4x4Encoding_RGB8A1_Opaque
+	// RGB8A1 if all pixels have alpha==1
+	// ################################################################################
+
+	class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1
+	{
+	public:
+
+		virtual void PerformIteration(float a_fEffort);
+
+		void PerformFirstIteration(void);
+
+	private:
+
+	};
+
+	// ################################################################################
+	// Block4x4Encoding_RGB8A1_Transparent
+	// RGB8A1 if all pixels have alpha==0
+	// ################################################################################
+
+	class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1
+	{
+	public:
+
+		virtual void PerformIteration(float a_fEffort);
+
+	private:
+
+	};
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.cpp b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.cpp
new file mode 100644
index 0000000..0ca531d
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.cpp
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGBA8.cpp contains:
+	Block4x4Encoding_RGBA8
+	Block4x4Encoding_RGBA8_Opaque
+	Block4x4Encoding_RGBA8_Transparent
+
+These encoders are used when targetting file format RGBA8.
+
+Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque
+Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent
+Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGBA8.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGBA8
+	// ####################################################################################################
+
+	float Block4x4Encoding_RGBA8::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS]
+	{
+		{ -3.0f / 255.0f, -6.0f / 255.0f,  -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f },
+		{ -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f },
+		{ -2.0f / 255.0f, -4.0f / 255.0f,  -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f },
+
+		{ -3.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f },
+		{ -3.0f / 255.0f, -7.0f / 255.0f,  -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f },
+		{ -4.0f / 255.0f, -7.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
+		{ -3.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
+
+		{ -2.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -4.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
+
+		{ -3.0f / 255.0f, -4.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
+		{ -1.0f / 255.0f, -2.0f / 255.0f,  -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f,  9.0f / 255.0f },
+		{ -4.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f,  -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  8.0f / 255.0f },
+		{ -3.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f,  -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  8.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void)
+	{
+
+		m_pencodingbitsA8 = nullptr;
+
+	}
+	Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_RGBA8::InitFromSource(Block4x4 *a_pblockParent,
+												ColorFloatRGBA *a_pafrgbaSource,
+												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+
+		m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8));
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGBA8::InitFromEncodingBits(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric)
+	{
+
+		m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8));
+
+		// init RGB portion
+		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
+													(unsigned char *) m_pencodingbitsRGB8,
+													a_pafrgbaSource,
+													a_errormetric);
+
+		// init A8 portion
+		// has to be done after InitFromEncodingBits()
+		{
+			m_fBase = m_pencodingbitsA8->data.base / 255.0f;
+			m_fMultiplier = (float)m_pencodingbitsA8->data.multiplier;
+			m_uiModifierTableIndex = m_pencodingbitsA8->data.table;
+
+			unsigned long long int ulliSelectorBits = 0;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors0 << 40;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors1 << 32;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors2 << 24;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors3 << 16;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors4 << 8;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors5;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				unsigned int uiShift = 45 - (3 * uiPixel);
+				m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (ALPHA_SELECTORS - 1);
+			}
+
+			// decode the alphas
+			// calc alpha error
+			m_fError = 0.0f;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_afDecodedAlphas[uiPixel] = DecodePixelAlpha(m_fBase, m_fMultiplier,
+					m_uiModifierTableIndex,
+					m_auiAlphaSelectors[uiPixel]);
+
+				float fDeltaAlpha = m_afDecodedAlphas[uiPixel] - m_pafrgbaSource[uiPixel].fA;
+				m_fError += fDeltaAlpha * fDeltaAlpha;
+			}
+		}
+
+		// redo error calc to include alpha
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	// similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added
+	//
+	void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		if (m_uiEncodingIterations == 0)
+		{
+			if (a_fEffort < 24.9f)
+			{
+				CalculateA8(0.0f);
+			}
+			else if (a_fEffort < 49.9f)
+			{
+				CalculateA8(1.0f);
+			}
+			else
+			{
+				CalculateA8(2.0f);
+			}
+		}
+
+		Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best combination of base alpga, multiplier and selectors
+	//
+	// a_fRadius limits the range of base alpha to try
+	//
+	void Block4x4Encoding_RGBA8::CalculateA8(float a_fRadius)
+	{
+
+		// find min/max alpha
+		float fMinAlpha = 1.0f;
+		float fMaxAlpha = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			float fAlpha = m_pafrgbaSource[uiPixel].fA;
+
+			// ignore border pixels
+			if (isnan(fAlpha))
+			{
+				continue;
+			}
+
+			if (fAlpha < fMinAlpha)
+			{
+				fMinAlpha = fAlpha;
+			}
+			if (fAlpha > fMaxAlpha)
+			{
+				fMaxAlpha = fAlpha;
+			}
+		}
+		assert(fMinAlpha <= fMaxAlpha);
+
+		float fAlphaRange = fMaxAlpha - fMinAlpha;
+
+		// try each modifier table entry
+		m_fError = FLT_MAX;		// artificially high value
+		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
+		{
+			static const unsigned int MIN_VALUE_SELECTOR = 3;
+			static const unsigned int MAX_VALUE_SELECTOR = 7;
+
+			float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR];
+
+			float fTableEntryRange = s_aafModifierTable[uiTableEntry][MAX_VALUE_SELECTOR] -
+				s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR];
+
+			float fCenterRatio = fTableEntryCenter / fTableEntryRange;
+
+			float fCenter = fMinAlpha + fCenterRatio*fAlphaRange;
+			fCenter = roundf(255.0f * fCenter) / 255.0f;
+
+			float fMinBase = fCenter - (a_fRadius / 255.0f);
+			if (fMinBase < 0.0f)
+			{
+				fMinBase = 0.0f;
+			}
+
+			float fMaxBase = fCenter + (a_fRadius / 255.0f);
+			if (fMaxBase > 1.0f)
+			{
+				fMaxBase = 1.0f;
+			}
+
+			for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
+			{
+
+				float fRangeMultiplier = roundf(fAlphaRange / fTableEntryRange);
+
+				float fMinMultiplier = fRangeMultiplier - a_fRadius;
+				if (fMinMultiplier < 1.0f)
+				{
+					fMinMultiplier = 1.0f;
+				}
+				else if (fMinMultiplier > 15.0f)
+				{
+					fMinMultiplier = 15.0f;
+				}
+
+				float fMaxMultiplier = fRangeMultiplier + a_fRadius;
+				if (fMaxMultiplier < 1.0f)
+				{
+					fMaxMultiplier = 1.0f;
+				}
+				else if (fMaxMultiplier > 15.0f)
+				{
+					fMaxMultiplier = 15.0f;
+				}
+
+				for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
+				{
+					// find best selector for each pixel
+					unsigned int auiBestSelectors[PIXELS];
+					float afBestAlphaError[PIXELS];
+					float afBestDecodedAlphas[PIXELS];
+					for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+					{
+						float fBestPixelAlphaError = FLT_MAX;
+						for (unsigned int uiSelector = 0; uiSelector < ALPHA_SELECTORS; uiSelector++)
+						{
+							float fDecodedAlpha = DecodePixelAlpha(fBase, fMultiplier, uiTableEntry, uiSelector);
+
+							// border pixels (NAN) should have zero error
+							float fPixelDeltaAlpha = isnan(m_pafrgbaSource[uiPixel].fA) ?
+															0.0f :
+															fDecodedAlpha - m_pafrgbaSource[uiPixel].fA;
+
+							float fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha;
+
+							if (fPixelAlphaError < fBestPixelAlphaError)
+							{
+								fBestPixelAlphaError = fPixelAlphaError;
+								auiBestSelectors[uiPixel] = uiSelector;
+								afBestAlphaError[uiPixel] = fBestPixelAlphaError;
+								afBestDecodedAlphas[uiPixel] = fDecodedAlpha;
+							}
+						}
+					}
+
+					float fBlockError = 0.0f;
+					for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+					{
+						fBlockError += afBestAlphaError[uiPixel];
+					}
+
+					if (fBlockError < m_fError)
+					{
+						m_fError = fBlockError;
+
+						m_fBase = fBase;
+						m_fMultiplier = fMultiplier;
+						m_uiModifierTableIndex = uiTableEntry;
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel];
+							m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel];
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGBA8::SetEncodingBits(void)
+	{
+
+		// set the RGB8 portion
+		Block4x4Encoding_RGB8::SetEncodingBits();
+
+		// set the A8 portion
+		{
+			m_pencodingbitsA8->data.base = (unsigned char)roundf(255.0f * m_fBase);
+			m_pencodingbitsA8->data.table = m_uiModifierTableIndex;
+			m_pencodingbitsA8->data.multiplier = (unsigned char)roundf(m_fMultiplier);
+
+			unsigned long long int ulliSelectorBits = 0;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				unsigned int uiShift = 45 - (3 * uiPixel);
+				ulliSelectorBits |= ((unsigned long long int)m_auiAlphaSelectors[uiPixel]) << uiShift;
+			}
+
+			m_pencodingbitsA8->data.selectors0 = ulliSelectorBits >> 40;
+			m_pencodingbitsA8->data.selectors1 = ulliSelectorBits >> 32;
+			m_pencodingbitsA8->data.selectors2 = ulliSelectorBits >> 24;
+			m_pencodingbitsA8->data.selectors3 = ulliSelectorBits >> 16;
+			m_pencodingbitsA8->data.selectors4 = ulliSelectorBits >> 8;
+			m_pencodingbitsA8->data.selectors5 = ulliSelectorBits;
+		}
+
+	}
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGBA8_Opaque
+	// ####################################################################################################
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGBA8_Opaque::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		if (m_uiEncodingIterations == 0)
+		{
+			m_fError = 0.0f;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+			}
+		}
+
+		Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGBA8_Opaque::SetEncodingBits(void)
+	{
+
+		// set the RGB8 portion
+		Block4x4Encoding_RGB8::SetEncodingBits();
+
+		// set the A8 portion
+		m_pencodingbitsA8->data.base = 255;
+		m_pencodingbitsA8->data.table = 15;
+		m_pencodingbitsA8->data.multiplier = 15;
+		m_pencodingbitsA8->data.selectors0 = 0xFF;
+		m_pencodingbitsA8->data.selectors1 = 0xFF;
+		m_pencodingbitsA8->data.selectors2 = 0xFF;
+		m_pencodingbitsA8->data.selectors3 = 0xFF;
+		m_pencodingbitsA8->data.selectors4 = 0xFF;
+		m_pencodingbitsA8->data.selectors5 = 0xFF;
+
+	}
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGBA8_Transparent
+	// ####################################################################################################
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGBA8_Transparent::PerformIteration(float )
+	{
+		assert(!m_boolDone);
+		assert(m_uiEncodingIterations == 0);
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = true;
+		m_boolFlip = false;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+			m_afDecodedAlphas[uiPixel] = 0.0f;
+		}
+
+		m_fError = 0.0f;
+
+		m_boolDone = true;
+		m_uiEncodingIterations++;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGBA8_Transparent::SetEncodingBits(void)
+	{
+
+		Block4x4Encoding_RGB8::SetEncodingBits();
+
+		// set the A8 portion
+		m_pencodingbitsA8->data.base = 0;
+		m_pencodingbitsA8->data.table = 0;
+		m_pencodingbitsA8->data.multiplier = 1;
+		m_pencodingbitsA8->data.selectors0 = 0;
+		m_pencodingbitsA8->data.selectors1 = 0;
+		m_pencodingbitsA8->data.selectors2 = 0;
+		m_pencodingbitsA8->data.selectors3 = 0;
+		m_pencodingbitsA8->data.selectors4 = 0;
+		m_pencodingbitsA8->data.selectors5 = 0;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.h b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.h
new file mode 100644
index 0000000..9d21e90
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits_A8;
+
+	// ################################################################################
+	// Block4x4Encoding_RGBA8
+	// RGBA8 if not completely opaque or transparent
+	// ################################################################################
+
+	class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8
+	{
+	public:
+
+		Block4x4Encoding_RGBA8(void);
+		virtual ~Block4x4Encoding_RGBA8(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+									ColorFloatRGBA *a_pafrgbaSource,
+									unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource,
+											ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+	protected:
+
+		static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
+		static const unsigned int ALPHA_SELECTOR_BITS = 3;
+		static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS;
+
+		static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS];
+
+		void CalculateA8(float a_fRadius);
+
+		Block4x4EncodingBits_A8 *m_pencodingbitsA8;	// A8 portion of Block4x4EncodingBits_RGBA8
+
+		float m_fBase;
+		float m_fMultiplier;
+		unsigned int m_uiModifierTableIndex;
+		unsigned int m_auiAlphaSelectors[PIXELS];
+
+	private:
+
+		inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier,
+										unsigned int a_uiTableIndex, unsigned int a_uiSelector)
+		{
+			float fPixelAlpha = a_fBase + 
+								a_fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector];
+			if (fPixelAlpha < 0.0f)
+			{
+				fPixelAlpha = 0.0f;
+			}
+			else if (fPixelAlpha > 1.0f)
+			{
+				fPixelAlpha = 1.0f;
+			}
+
+			return fPixelAlpha;
+		}
+
+	};
+
+	// ################################################################################
+	// Block4x4Encoding_RGBA8_Opaque
+	// RGBA8 if all pixels have alpha==1
+	// ################################################################################
+
+	class Block4x4Encoding_RGBA8_Opaque : public Block4x4Encoding_RGBA8
+	{
+	public:
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+	};
+
+	// ################################################################################
+	// Block4x4Encoding_RGBA8_Transparent
+	// RGBA8 if all pixels have alpha==0
+	// ################################################################################
+
+	class Block4x4Encoding_RGBA8_Transparent : public Block4x4Encoding_RGBA8
+	{
+	public:
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcDifferentialTrys.cpp b/extern/EtcLib/EtcCodec/EtcDifferentialTrys.cpp
new file mode 100644
index 0000000..0fcc550
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcDifferentialTrys.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcDifferentialTrys.cpp
+
+Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcDifferentialTrys.h"
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// construct a list of trys (encoding attempts)
+	//
+	// a_frgbaColor1 is the basecolor for the first half
+	// a_frgbaColor2 is the basecolor for the second half
+	// a_pauiPixelMapping1 is the pixel order for the first half
+	// a_pauiPixelMapping2 is the pixel order for the second half
+	// a_uiRadius is the amount to vary the base colors
+	//
+	DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
+										const unsigned int *a_pauiPixelMapping1,
+										const unsigned int *a_pauiPixelMapping2,
+										unsigned int a_uiRadius,
+										int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+		assert(a_uiRadius <= MAX_RADIUS);
+
+		m_boolSeverelyBentColors = false;
+
+		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5();
+		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5();
+
+		// quantize base colors
+		// ensure that trys with a_uiRadius don't overflow
+		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius);
+		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius);
+		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius);
+		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius);
+		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius);
+		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius);
+
+		int iDeltaRed = iRed2 - iRed1;
+		int iDeltaGreen = iGreen2 - iGreen1;
+		int iDeltaBlue = iBlue2 - iBlue1;
+
+		// make sure components are within range
+		{
+			if (iDeltaRed > 3)
+			{
+				if (iDeltaRed > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iRed1 += (iDeltaRed - 3) / 2;
+				iRed2 = iRed1 + 3;
+				iDeltaRed = 3;
+			}
+			else if (iDeltaRed < -4)
+			{
+				if (iDeltaRed < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iRed1 += (iDeltaRed + 4) / 2;
+				iRed2 = iRed1 - 4;
+				iDeltaRed = -4;
+			}
+			assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius));
+			assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaRed >= -4 && iDeltaRed <= 3);
+
+			if (iDeltaGreen > 3)
+			{
+				if (iDeltaGreen > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iGreen1 += (iDeltaGreen - 3) / 2;
+				iGreen2 = iGreen1 + 3;
+				iDeltaGreen = 3;
+			}
+			else if (iDeltaGreen < -4)
+			{
+				if (iDeltaGreen < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iGreen1 += (iDeltaGreen + 4) / 2;
+				iGreen2 = iGreen1 - 4;
+				iDeltaGreen = -4;
+			}
+			assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius));
+			assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaGreen >= -4 && iDeltaGreen <= 3);
+
+			if (iDeltaBlue > 3)
+			{
+				if (iDeltaBlue > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iBlue1 += (iDeltaBlue - 3) / 2;
+				iBlue2 = iBlue1 + 3;
+				iDeltaBlue = 3;
+			}
+			else if (iDeltaBlue < -4)
+			{
+				if (iDeltaBlue < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iBlue1 += (iDeltaBlue + 4) / 2;
+				iBlue2 = iBlue1 - 4;
+				iDeltaBlue = -4;
+			}
+			assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius));
+			assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaBlue >= -4 && iDeltaBlue <= 3);
+		}
+
+		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
+		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, 
+										const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
+	{
+
+		m_iRed = a_iRed;
+		m_iGreen = a_iGreen;
+		m_iBlue = a_iBlue;
+
+		m_pauiPixelMapping = a_pauiPixelMapping;
+		m_uiRadius = a_uiRadius;
+
+		m_uiTrys = 0;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcDifferentialTrys.h b/extern/EtcLib/EtcCodec/EtcDifferentialTrys.h
new file mode 100644
index 0000000..6b1cd9c
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcDifferentialTrys.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+namespace Etc
+{
+
+	class DifferentialTrys
+	{
+	public:
+
+		static const unsigned int MAX_RADIUS = 2;
+
+		DifferentialTrys(ColorFloatRGBA a_frgbaColor1,
+							ColorFloatRGBA a_frgbaColor2,
+							const unsigned int *a_pauiPixelMapping1,
+							const unsigned int *a_pauiPixelMapping2,
+							unsigned int a_uiRadius,
+							int a_iGrayOffset1, int a_iGrayOffset2);
+
+		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
+		{
+			if (a_i < (0+ a_iDistance))
+			{
+				return (0 + a_iDistance);
+			}
+			else if (a_i > (31- a_iDistance))
+			{
+				return (31 - a_iDistance);
+			}
+
+			return a_i;
+		}
+
+		class Try
+		{
+        public :
+			static const unsigned int SELECTORS = 8;	// per half
+
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+			unsigned int m_uiCW;
+			unsigned int m_auiSelectors[SELECTORS];
+			float m_fError;
+        };
+
+		class Half
+		{
+		public:
+
+			static const unsigned int MAX_TRYS = 125;
+
+			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
+						const unsigned int *a_pauiPixelMapping,
+						unsigned int a_uiRadius);
+
+			// center of trys
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+
+			const unsigned int *m_pauiPixelMapping;
+			unsigned int m_uiRadius;
+
+			unsigned int m_uiTrys;
+			Try m_atry[MAX_TRYS];
+
+			Try *m_ptryBest;
+		};
+
+		Half m_half1;
+		Half m_half2;
+
+		bool m_boolSeverelyBentColors;
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcErrorMetric.h b/extern/EtcLib/EtcCodec/EtcErrorMetric.h
new file mode 100644
index 0000000..29bb33b
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcErrorMetric.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace Etc
+{
+
+	enum ErrorMetric
+	{
+		RGBA,
+		REC709,
+		NUMERIC,
+		NORMALXYZ,
+		//
+		ERROR_METRICS,
+		//
+		BT709 = REC709
+	};
+
+	inline const char *ErrorMetricToString(ErrorMetric errorMetric)
+	{
+		switch (errorMetric)
+		{
+		case RGBA:
+			return "RGBA";
+		case REC709:
+			return "REC709";
+		case NUMERIC:
+			return "NUMERIC";
+		case NORMALXYZ:
+			return "NORMALXYZ";
+		case ERROR_METRICS:
+		default:
+			return "UNKNOWN";
+		}
+	}
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcIndividualTrys.cpp b/extern/EtcLib/EtcCodec/EtcIndividualTrys.cpp
new file mode 100644
index 0000000..951edec
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcIndividualTrys.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcIndividualTrys.cpp
+
+Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcIndividualTrys.h"
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// construct a list of trys (encoding attempts)
+	//
+	// a_frgbaColor1 is the basecolor for the first half
+	// a_frgbaColor2 is the basecolor for the second half
+	// a_pauiPixelMapping1 is the pixel order for the first half
+	// a_pauiPixelMapping2 is the pixel order for the second half
+	// a_uiRadius is the amount to vary the base colors
+	//
+	IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
+									const unsigned int *a_pauiPixelMapping1,
+									const unsigned int *a_pauiPixelMapping2,
+									unsigned int a_uiRadius)
+	{
+		assert(a_uiRadius <= MAX_RADIUS);
+
+		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4();
+		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4();
+
+		// quantize base colors
+		// ensure that trys with a_uiRadius don't overflow
+		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius);
+		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius);
+		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius);
+		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius);
+		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius);
+		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius);
+
+		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
+		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue,
+									const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
+	{
+
+		m_iRed = a_iRed;
+		m_iGreen = a_iGreen;
+		m_iBlue = a_iBlue;
+
+		m_pauiPixelMapping = a_pauiPixelMapping;
+		m_uiRadius = a_uiRadius;
+
+		m_uiTrys = 0;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcIndividualTrys.h b/extern/EtcLib/EtcCodec/EtcIndividualTrys.h
new file mode 100644
index 0000000..49170d4
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcIndividualTrys.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+namespace Etc
+{
+
+	class IndividualTrys
+	{
+	public:
+
+		static const unsigned int MAX_RADIUS = 1;
+
+		IndividualTrys(ColorFloatRGBA a_frgbaColor1,
+						ColorFloatRGBA a_frgbaColor2,
+						const unsigned int *a_pauiPixelMapping1,
+						const unsigned int *a_pauiPixelMapping2,
+						unsigned int a_uiRadius);
+
+		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
+		{
+			if (a_i < (0+ a_iDistance))
+			{
+				return (0 + a_iDistance);
+			}
+			else if (a_i > (15- a_iDistance))
+			{
+				return (15 - a_iDistance);
+			}
+
+			return a_i;
+		}
+
+		class Try
+		{
+        public :
+			static const unsigned int SELECTORS = 8;	// per half
+
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+			unsigned int m_uiCW;
+			unsigned int m_auiSelectors[SELECTORS];
+			float m_fError;
+        };
+
+		class Half
+		{
+		public:
+
+			static const unsigned int MAX_TRYS = 27;
+
+			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
+						const unsigned int *a_pauiPixelMapping,
+						unsigned int a_uiRadius);
+
+			// center of trys
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+
+			const unsigned int *m_pauiPixelMapping;
+			unsigned int m_uiRadius;
+
+			unsigned int m_uiTrys;
+			Try m_atry[MAX_TRYS];
+
+			Try *m_ptryBest;
+		};
+
+		Half m_half1;
+		Half m_half2;
+
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcSortedBlockList.cpp b/extern/EtcLib/EtcCodec/EtcSortedBlockList.cpp
new file mode 100644
index 0000000..7f4f56e
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcSortedBlockList.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcSortedBlockList.cpp
+
+SortedBlockList is a list of 4x4 blocks that can be used by the "effort" system to prioritize
+the encoding of the 4x4 blocks.
+
+The sorting is done with buckets, where each bucket is an indication of how much error each 4x4 block has
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcSortedBlockList.h"
+
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// construct an empty list
+	//
+	// allocate enough memory to add all of the image's 4x4 blocks later
+	// allocate enough buckets to sort the blocks
+	//
+	SortedBlockList::SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets)
+	{
+		m_uiImageBlocks = a_uiImageBlocks;
+		m_iBuckets = (int)a_uiBuckets;
+
+		m_uiAddedBlocks = 0;
+		m_uiSortedBlocks = 0;
+		m_palinkPool = new Link[m_uiImageBlocks];
+		m_pabucket = new Bucket[m_iBuckets];
+		m_fMaxError = 0.0f;
+
+		InitBuckets();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	SortedBlockList::~SortedBlockList(void)
+	{
+		delete[] m_palinkPool;
+		delete[] m_pabucket;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+    // add a 4x4 block to the list
+	// the 4x4 block will be sorted later
+	//
+    void SortedBlockList::AddBlock(Block4x4 *a_pblock)
+    {
+        assert(m_uiAddedBlocks < m_uiImageBlocks);
+        Link *plink = &m_palinkPool[m_uiAddedBlocks++];
+		plink->Init(a_pblock);
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	// sort all of the 4x4 blocks that have been added to the list
+	//
+	// first, determine the maximum error, then assign an error range to each bucket
+	// next, determine which bucket each 4x4 block belongs to based on the 4x4 block's error
+	// add the 4x4 block to the appropriate bucket
+	// lastly, walk thru the buckets and add each bucket to a sorted linked list
+	//
+	// the resultant sorting is an approximate sorting from most to least error
+	//
+    void SortedBlockList::Sort(void)
+    {
+		assert(m_uiAddedBlocks == m_uiImageBlocks);
+        InitBuckets();
+
+        // find max block error
+        m_fMaxError = -1.0f;
+
+        for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++)
+        {
+            Link *plinkBlock = &m_palinkPool[uiLink];
+
+            float fBlockError = plinkBlock->GetBlock()->GetError();
+            if (fBlockError > m_fMaxError)
+            {
+                m_fMaxError = fBlockError;
+            }
+        }
+        // prevent divide by zero or divide by negative
+        if (m_fMaxError <= 0.0f)
+        {
+            m_fMaxError = 1.0f;
+        }
+		//used for debugging
+		//int numDone = 0;
+        // put all of the blocks with unfinished encodings into the appropriate bucket
+		m_uiSortedBlocks = 0;
+        for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++)
+        {
+            Link *plinkBlock = &m_palinkPool[uiLink];
+
+			// if the encoding is done, don't add it to the list
+			if (plinkBlock->GetBlock()->GetEncoding()->IsDone())
+			{
+				//numDone++;
+				continue;
+			}
+
+            // calculate the appropriate sort bucket
+            float fBlockError = plinkBlock->GetBlock()->GetError();
+            int iBucket = (int) floorf(m_iBuckets * fBlockError / m_fMaxError);
+            // clamp to bucket index
+            iBucket = iBucket < 0 ? 0 : iBucket >= m_iBuckets ? m_iBuckets - 1 : iBucket;
+
+            // add block to bucket
+			{
+				Bucket *pbucket = &m_pabucket[iBucket];
+				if (pbucket->plinkLast)
+				{
+					pbucket->plinkLast->SetNext(plinkBlock);
+					pbucket->plinkLast = plinkBlock;
+				}
+				else
+				{
+					pbucket->plinkFirst = pbucket->plinkLast = plinkBlock;
+				}
+				plinkBlock->SetNext(nullptr);
+			}
+
+			m_uiSortedBlocks++;
+
+            if (0)
+            {
+                printf("%u: e=%.3f\n", uiLink, fBlockError);
+                Print();
+                printf("\n\n\n");
+            }
+        }
+		//printf("num blocks already done: %d\n",numDone);
+		//link the blocks together across buckets
+		m_plinkFirst = nullptr;
+		m_plinkLast = nullptr;
+		for (int iBucket = m_iBuckets - 1; iBucket >= 0; iBucket--)
+		{
+			Bucket *pbucket = &m_pabucket[iBucket];
+
+			if (pbucket->plinkFirst)
+			{
+				if (m_plinkFirst == nullptr)
+				{
+					m_plinkFirst = pbucket->plinkFirst;
+				}
+				else
+				{
+					assert(pbucket->plinkLast->GetNext() == nullptr);
+					m_plinkLast->SetNext(pbucket->plinkFirst);
+				}
+
+				m_plinkLast = pbucket->plinkLast;
+			}
+		}
+
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// clear all of the buckets.  normally done in preparation for a sort
+	//
+	void SortedBlockList::InitBuckets(void)
+    {
+        for (int iBucket = 0; iBucket < m_iBuckets; iBucket++)
+        {
+            Bucket *pbucket = &m_pabucket[iBucket];
+
+            pbucket->plinkFirst = 0;
+            pbucket->plinkLast = 0;
+        }
+    }
+
+    // ----------------------------------------------------------------------------------------------------
+    // print out the list of sorted 4x4 blocks
+	// normally used for debugging
+	//
+    void SortedBlockList::Print(void)
+    {
+        for (int iBucket = m_iBuckets-1; iBucket >= 0; iBucket--)
+        {
+            Bucket *pbucket = &m_pabucket[iBucket];
+
+            unsigned int uiBlocks = 0;
+            for (Link *plink = pbucket->plinkFirst; plink != nullptr; plink = plink->GetNext() )
+            {
+                uiBlocks++;
+
+				if (plink == pbucket->plinkLast)
+				{
+					break;
+				}
+            }
+
+            float fBucketError = m_fMaxError * iBucket / m_iBuckets;
+            float fBucketRMS = sqrtf(fBucketError / (4.0f*16.0f) );
+            printf("%3d: e=%.3f rms=%.6f %u\n", iBucket, fBucketError, fBucketRMS, uiBlocks);
+        }
+    }
+
+    // ----------------------------------------------------------------------------------------------------
+    //
+
+}   // namespace Etc
diff --git a/extern/EtcLib/EtcCodec/EtcSortedBlockList.h b/extern/EtcLib/EtcCodec/EtcSortedBlockList.h
new file mode 100644
index 0000000..8ebb978
--- /dev/null
+++ b/extern/EtcLib/EtcCodec/EtcSortedBlockList.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace Etc
+{
+	class Block4x4;
+
+    class SortedBlockList
+    {
+    public:
+
+		class Link
+		{
+		public:
+
+			inline void Init(Block4x4 *a_pblock)
+			{
+				m_pblock = a_pblock;
+				m_plinkNext = nullptr;
+			}
+
+			inline Block4x4 * GetBlock(void)
+			{
+				return m_pblock;
+			}
+
+			inline void SetNext(Link *a_plinkNext)
+			{
+				m_plinkNext = a_plinkNext;
+			}
+
+			inline Link * GetNext(void)
+			{
+				return m_plinkNext;
+			}
+
+			inline Link * Advance(unsigned int a_uiSteps = 1)
+			{
+				Link *plink = this;
+
+				for (unsigned int uiStep = 0; uiStep < a_uiSteps; uiStep++)
+				{
+					if (plink == nullptr)
+					{
+						break;
+					}
+
+					plink = plink->m_plinkNext;
+				}
+
+				return plink;
+			}
+
+		private:
+
+			Block4x4 *m_pblock;
+			Link *m_plinkNext;
+		};
+
+		SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets);
+		~SortedBlockList(void);
+
+        void AddBlock(Block4x4 *a_pblock);
+
+        void Sort(void);
+
+		inline Link * GetLinkToFirstBlock(void)
+		{
+			return m_plinkFirst;
+		}
+
+		inline unsigned int GetNumberOfAddedBlocks(void)
+		{
+			return m_uiAddedBlocks;
+		}
+
+		inline unsigned int GetNumberOfSortedBlocks(void)
+		{
+			return m_uiSortedBlocks;
+		}
+
+		void Print(void);
+
+	private:
+
+        void InitBuckets(void);
+
+        class Bucket
+        {
+        public:
+            Link *plinkFirst;
+            Link *plinkLast;
+        };
+
+        unsigned int m_uiImageBlocks;
+        int m_iBuckets;
+
+		unsigned int m_uiAddedBlocks;
+		unsigned int m_uiSortedBlocks;
+		Link *m_palinkPool;
+        Bucket *m_pabucket;
+        float m_fMaxError;
+
+		Link *m_plinkFirst;
+		Link *m_plinkLast;
+
+    };
+
+} // namespace Etc
diff --git a/extern/butteraugli/CMakeLists.txt b/extern/butteraugli/CMakeLists.txt
new file mode 100644
index 0000000..da0d7da
--- /dev/null
+++ b/extern/butteraugli/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+SET(BUTTERAUGLI_SRCS
+	butteraugli.cc
+	butteraugli.h)
+
+ADD_LIBRARY(butteraugli STATIC ${BUTTERAUGLI_SRCS})
+
diff --git a/extern/butteraugli/butteraugli.cc b/extern/butteraugli/butteraugli.cc
new file mode 100755
index 0000000..ebf7cc1
--- /dev/null
+++ b/extern/butteraugli/butteraugli.cc
@@ -0,0 +1,1588 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+//
+// The physical architecture of butteraugli is based on the following naming
+// convention:
+//   * Opsin - dynamics of the photosensitive chemicals in the retina
+//             with their immediate electrical processing
+//   * Xyb - hybrid opponent/trichromatic color space
+//     x is roughly red-subtract-green.
+//     y is yellow.
+//     b is blue.
+//     Xyb values are computed from Opsin mixing, not directly from rgb.
+//   * Mask - for visual masking
+//   * Hf - color modeling for spatially high-frequency features
+//   * Lf - color modeling for spatially low-frequency features
+//   * Diffmap - to cluster and build an image of error between the images
+//   * Blur - to hold the smoothing code
+
+#include "butteraugli.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+
+// Restricted pointers speed up Convolution(); MSVC uses a different keyword.
+#ifdef _MSC_VER
+#define __restrict__ __restrict
+#endif
+
+namespace butteraugli {
+
+void *CacheAligned::Allocate(const size_t bytes) {
+  char *const allocated = static_cast<char *>(malloc(bytes + kCacheLineSize));
+  if (allocated == nullptr) {
+    return nullptr;
+  }
+  const uintptr_t misalignment =
+      reinterpret_cast<uintptr_t>(allocated) & (kCacheLineSize - 1);
+  // malloc is at least kPointerSize aligned, so we can store the "allocated"
+  // pointer immediately before the aligned memory.
+  assert(misalignment % kPointerSize == 0);
+  char *const aligned = allocated + kCacheLineSize - misalignment;
+  memcpy(aligned - kPointerSize, &allocated, kPointerSize);
+  return aligned;
+}
+
+void CacheAligned::Free(void *aligned_pointer) {
+  if (aligned_pointer == nullptr) {
+    return;
+  }
+  char *const aligned = static_cast<char *>(aligned_pointer);
+  assert(reinterpret_cast<uintptr_t>(aligned) % kCacheLineSize == 0);
+  char *allocated;
+  memcpy(&allocated, aligned - kPointerSize, kPointerSize);
+  assert(allocated <= aligned - kPointerSize);
+  assert(allocated >= aligned - kCacheLineSize);
+  free(allocated);
+}
+
+static inline bool IsNan(const float x) {
+  uint32_t bits;
+  memcpy(&bits, &x, sizeof(bits));
+  const uint32_t bitmask_exp = 0x7F800000;
+  return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF);
+}
+
+static inline bool IsNan(const double x) {
+  uint64_t bits;
+  memcpy(&bits, &x, sizeof(bits));
+  return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) ||
+         (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL);
+}
+
+static inline void CheckImage(const ImageF &image, const char *name) {
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    ConstRestrict<const float *> row = image.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      if (IsNan(row[x])) {
+        printf("Image %s @ %lu,%lu (of %lu,%lu)\n", name, x, y, image.xsize(),
+               image.ysize());
+        exit(1);
+      }
+    }
+  }
+}
+
+#if BUTTERAUGLI_ENABLE_CHECKS
+
+#define CHECK_NAN(x, str)                \
+  do {                                   \
+    if (IsNan(x)) {                      \
+      printf("%d: %s\n", __LINE__, str); \
+      abort();                           \
+    }                                    \
+  } while (0)
+
+#define CHECK_IMAGE(image, name) CheckImage(image, name)
+
+#else
+
+#define CHECK_NAN(x, str)
+#define CHECK_IMAGE(image, name)
+
+#endif
+
+
+static const double kInternalGoodQualityThreshold = 14.921561160295326;
+static const double kGlobalScale = 1.0 / kInternalGoodQualityThreshold;
+
+inline double DotProduct(const double u[3], const double v[3]) {
+  return u[0] * v[0] + u[1] * v[1] + u[2] * v[2];
+}
+
+inline double DotProduct(const float u[3], const double v[3]) {
+  return u[0] * v[0] + u[1] * v[1] + u[2] * v[2];
+}
+
+// Computes a horizontal convolution and transposes the result.
+static void Convolution(size_t xsize, size_t ysize,
+                        size_t xstep,
+                        size_t len, size_t offset,
+                        const float* __restrict__ multipliers,
+                        const float* __restrict__ inp,
+                        double border_ratio,
+                        float* __restrict__ result) {
+  PROFILER_FUNC;
+  double weight_no_border = 0;
+  for (int j = 0; j <= 2 * offset; ++j) {
+    weight_no_border += multipliers[j];
+  }
+  for (size_t x = 0, ox = 0; x < xsize; x += xstep, ox++) {
+    int minx = x < offset ? 0 : x - offset;
+    int maxx = std::min(xsize, x + len - offset) - 1;
+    double weight = 0.0;
+    for (int j = minx; j <= maxx; ++j) {
+      weight += multipliers[j - x + offset];
+    }
+    // Interpolate linearly between the no-border scaling and border scaling.
+    weight = (1.0 - border_ratio) * weight + border_ratio * weight_no_border;
+    double scale = 1.0 / weight;
+    for (size_t y = 0; y < ysize; ++y) {
+      double sum = 0.0;
+      for (int j = minx; j <= maxx; ++j) {
+        sum += inp[y * xsize + j] * multipliers[j - x + offset];
+      }
+      result[ox * ysize + y] = sum * scale;
+    }
+  }
+}
+
+void Blur(size_t xsize, size_t ysize, float* channel, double sigma,
+          double border_ratio) {
+  PROFILER_FUNC;
+  double m = 2.25;  // Accuracy increases when m is increased.
+  const double scaler = -1.0 / (2 * sigma * sigma);
+  // For m = 9.0: exp(-scaler * diff * diff) < 2^ {-52}
+  const int diff = std::max<int>(1, m * fabs(sigma));
+  const int expn_size = 2 * diff + 1;
+  std::vector<float> expn(expn_size);
+  for (int i = -diff; i <= diff; ++i) {
+    expn[i + diff] = exp(scaler * i * i);
+  }
+  const int xstep = std::max(1, int(sigma / 3));
+  const int ystep = xstep;
+  int dxsize = (xsize + xstep - 1) / xstep;
+  int dysize = (ysize + ystep - 1) / ystep;
+  std::vector<float> tmp(dxsize * ysize);
+  std::vector<float> downsampled_output(dxsize * dysize);
+  Convolution(xsize, ysize, xstep, expn_size, diff, expn.data(), channel,
+              border_ratio,
+              tmp.data());
+  Convolution(ysize, dxsize, ystep, expn_size, diff, expn.data(), tmp.data(),
+              border_ratio,
+              downsampled_output.data());
+  for (int y = 0; y < ysize; y++) {
+    for (int x = 0; x < xsize; x++) {
+      // TODO: Use correct rounding.
+      channel[y * xsize + x] =
+          downsampled_output[(y / ystep) * dxsize + (x / xstep)];
+    }
+  }
+}
+
+// To change this to n, add the relevant FFTn function and kFFTnMapIndexTable.
+constexpr size_t kBlockEdge = 8;
+constexpr size_t kBlockSize = kBlockEdge * kBlockEdge;
+constexpr size_t kBlockEdgeHalf = kBlockEdge / 2;
+constexpr size_t kBlockHalf = kBlockEdge * kBlockEdgeHalf;
+
+// Contrast sensitivity related weights.
+static const double *GetContrastSensitivityMatrix() {
+  static double csf8x8[kBlockHalf + kBlockEdgeHalf + 1] = {
+    5.28270670524,
+    0.0,
+    0.0,
+    0.0,
+    0.3831134973,
+    0.676303603859,
+    3.58927792424,
+    18.6104367002,
+    18.6104367002,
+    3.09093131948,
+    1.0,
+    0.498250875965,
+    0.36198671102,
+    0.308982169883,
+    0.1312701920435,
+    2.37370549629,
+    3.58927792424,
+    1.0,
+    2.37370549629,
+    0.991205724152,
+    1.05178802919,
+    0.627264168628,
+    0.4,
+    0.1312701920435,
+    0.676303603859,
+    0.498250875965,
+    0.991205724152,
+    0.5,
+    0.3831134973,
+    0.349686450518,
+    0.627264168628,
+    0.308982169883,
+    0.3831134973,
+    0.36198671102,
+    1.05178802919,
+    0.3831134973,
+    0.12,
+  };
+  return &csf8x8[0];
+}
+
+std::array<double, 21> MakeHighFreqColorDiffDx() {
+  std::array<double, 21> lut;
+  static const double off = 11.38708334481672;
+  static const double inc = 14.550189611520716;
+  lut[1] = off;
+  for (int i = 2; i < 21; ++i) {
+    lut[i] = lut[i - 1] + inc;
+  }
+  return lut;
+}
+
+const double *GetHighFreqColorDiffDx() {
+  static const std::array<double, 21> kLut = MakeHighFreqColorDiffDx();
+  return kLut.data();
+}
+
+std::array<double, 21> MakeHighFreqColorDiffDy() {
+  std::array<double, 21> lut;
+  static const double off = 1.4103373714040413;
+  static const double inc = 0.7084088867024;
+  lut[1] = off;
+  for (int i = 2; i < 21; ++i) {
+    lut[i] = lut[i - 1] + inc;
+  }
+  return lut;
+}
+
+const double *GetHighFreqColorDiffDy() {
+  static const std::array<double, 21> kLut = MakeHighFreqColorDiffDy();
+  return kLut.data();
+}
+
+std::array<double, 21> MakeLowFreqColorDiffDy() {
+  std::array<double, 21> lut;
+  static const double inc = 5.2511644570349185;
+  for (int i = 1; i < 21; ++i) {
+    lut[i] = lut[i - 1] + inc;
+  }
+  return lut;
+}
+
+const double *GetLowFreqColorDiffDy() {
+  static const std::array<double, 21> kLut = MakeLowFreqColorDiffDy();
+  return kLut.data();
+}
+
+inline double Interpolate(const double *array, int size, double sx) {
+  double ix = fabs(sx);
+  assert(ix < 10000);
+  int baseix = static_cast<int>(ix);
+  double res;
+  if (baseix >= size - 1) {
+    res = array[size - 1];
+  } else {
+    double mix = ix - baseix;
+    int nextix = baseix + 1;
+    res = array[baseix] + mix * (array[nextix] - array[baseix]);
+  }
+  if (sx < 0) res = -res;
+  return res;
+}
+
+inline double InterpolateClampNegative(const double *array,
+                                       int size, double sx) {
+  if (sx < 0) {
+    sx = 0;
+  }
+  double ix = fabs(sx);
+  int baseix = static_cast<int>(ix);
+  double res;
+  if (baseix >= size - 1) {
+    res = array[size - 1];
+  } else {
+    double mix = ix - baseix;
+    int nextix = baseix + 1;
+    res = array[baseix] + mix * (array[nextix] - array[baseix]);
+  }
+  return res;
+}
+
+void RgbToXyb(double r, double g, double b,
+              double *valx, double *valy, double *valz) {
+  static const double a0 = 1.01611726948;
+  static const double a1 = 0.982482243696;
+  static const double a2 = 1.43571362627;
+  static const double a3 = 0.896039849412;
+  *valx = a0 * r - a1 * g;
+  *valy = a2 * r + a3 * g;
+  *valz = b;
+}
+
+static inline void XybToVals(double x, double y, double z,
+                             double *valx, double *valy, double *valz) {
+  static const double xmul = 0.758304045695;
+  static const double ymul = 2.28148649801;
+  static const double zmul = 1.87816926918;
+  *valx = Interpolate(GetHighFreqColorDiffDx(), 21, x * xmul);
+  *valy = Interpolate(GetHighFreqColorDiffDy(), 21, y * ymul);
+  *valz = zmul * z;
+}
+
+// Rough psychovisual distance to gray for low frequency colors.
+static void XybLowFreqToVals(double x, double y, double z,
+                             double *valx, double *valy, double *valz) {
+  static const double xmul = 6.64482198135;
+  static const double ymul = 0.837846224276;
+  static const double zmul = 7.34905756986;
+  static const double y_to_z_mul = 0.0812519812628;
+  z += y_to_z_mul * y;
+  *valz = z * zmul;
+  *valx = x * xmul;
+  *valy = Interpolate(GetLowFreqColorDiffDy(), 21, y * ymul);
+}
+
+double RemoveRangeAroundZero(double v, double range) {
+  if (v >= -range && v < range) {
+    return 0;
+  }
+  if (v < 0) {
+    return v + range;
+  } else {
+    return v - range;
+  }
+}
+
+void XybDiffLowFreqSquaredAccumulate(double r0, double g0, double b0,
+                                     double r1, double g1, double b1,
+                                     double factor, double res[3]) {
+  double valx0, valy0, valz0;
+  double valx1, valy1, valz1;
+  XybLowFreqToVals(r0, g0, b0, &valx0, &valy0, &valz0);
+  if (r1 == 0.0 && g1 == 0.0 && b1 == 0.0) {
+    PROFILER_ZONE("XybDiff r1=g1=b1=0");
+    res[0] += factor * valx0 * valx0;
+    res[1] += factor * valy0 * valy0;
+    res[2] += factor * valz0 * valz0;
+    return;
+  }
+  XybLowFreqToVals(r1, g1, b1, &valx1, &valy1, &valz1);
+  // Approximate the distance of the colors by their respective distances
+  // to gray.
+  double valx = valx0 - valx1;
+  double valy = valy0 - valy1;
+  double valz = valz0 - valz1;
+  res[0] += factor * valx * valx;
+  res[1] += factor * valy * valy;
+  res[2] += factor * valz * valz;
+}
+
+struct Complex {
+ public:
+  double real;
+  double imag;
+};
+
+inline double abssq(const Complex& c) {
+  return c.real * c.real + c.imag * c.imag;
+}
+
+static void TransposeBlock(Complex data[kBlockSize]) {
+  for (int i = 0; i < kBlockEdge; i++) {
+    for (int j = 0; j < i; j++) {
+      std::swap(data[kBlockEdge * i + j], data[kBlockEdge * j + i]);
+    }
+  }
+}
+
+//  D. J. Bernstein's Fast Fourier Transform algorithm on 4 elements.
+inline void FFT4(Complex* a) {
+  double t1, t2, t3, t4, t5, t6, t7, t8;
+  t5 = a[2].real;
+  t1 = a[0].real - t5;
+  t7 = a[3].real;
+  t5 += a[0].real;
+  t3 = a[1].real - t7;
+  t7 += a[1].real;
+  t8 = t5 + t7;
+  a[0].real = t8;
+  t5 -= t7;
+  a[1].real = t5;
+  t6 = a[2].imag;
+  t2 = a[0].imag - t6;
+  t6 += a[0].imag;
+  t5 = a[3].imag;
+  a[2].imag = t2 + t3;
+  t2 -= t3;
+  a[3].imag = t2;
+  t4 = a[1].imag - t5;
+  a[3].real = t1 + t4;
+  t1 -= t4;
+  a[2].real = t1;
+  t5 += a[1].imag;
+  a[0].imag = t6 + t5;
+  t6 -= t5;
+  a[1].imag = t6;
+}
+
+static const double kSqrtHalf = 0.70710678118654752440084436210484903;
+
+//  D. J. Bernstein's Fast Fourier Transform algorithm on 8 elements.
+void FFT8(Complex* a) {
+  double t1, t2, t3, t4, t5, t6, t7, t8;
+
+  t7 = a[4].imag;
+  t4 = a[0].imag - t7;
+  t7 += a[0].imag;
+  a[0].imag = t7;
+
+  t8 = a[6].real;
+  t5 = a[2].real - t8;
+  t8 += a[2].real;
+  a[2].real = t8;
+
+  t7 = a[6].imag;
+  a[6].imag = t4 - t5;
+  t4 += t5;
+  a[4].imag = t4;
+
+  t6 = a[2].imag - t7;
+  t7 += a[2].imag;
+  a[2].imag = t7;
+
+  t8 = a[4].real;
+  t3 = a[0].real - t8;
+  t8 += a[0].real;
+  a[0].real = t8;
+
+  a[4].real = t3 - t6;
+  t3 += t6;
+  a[6].real = t3;
+
+  t7 = a[5].real;
+  t3 = a[1].real - t7;
+  t7 += a[1].real;
+  a[1].real = t7;
+
+  t8 = a[7].imag;
+  t6 = a[3].imag - t8;
+  t8 += a[3].imag;
+  a[3].imag = t8;
+  t1 = t3 - t6;
+  t3 += t6;
+
+  t7 = a[5].imag;
+  t4 = a[1].imag - t7;
+  t7 += a[1].imag;
+  a[1].imag = t7;
+
+  t8 = a[7].real;
+  t5 = a[3].real - t8;
+  t8 += a[3].real;
+  a[3].real = t8;
+
+  t2 = t4 - t5;
+  t4 += t5;
+
+  t6 = t1 - t4;
+  t8 = kSqrtHalf;
+  t6 *= t8;
+  a[5].real = a[4].real - t6;
+  t1 += t4;
+  t1 *= t8;
+  a[5].imag = a[4].imag - t1;
+  t6 += a[4].real;
+  a[4].real = t6;
+  t1 += a[4].imag;
+  a[4].imag = t1;
+
+  t5 = t2 - t3;
+  t5 *= t8;
+  a[7].imag = a[6].imag - t5;
+  t2 += t3;
+  t2 *= t8;
+  a[7].real = a[6].real - t2;
+  t2 += a[6].real;
+  a[6].real = t2;
+  t5 += a[6].imag;
+  a[6].imag = t5;
+
+  FFT4(a);
+
+  // Reorder to the correct output order.
+  // TODO: Modify the above computation so that this is not needed.
+  Complex tmp = a[2];
+  a[2] = a[3];
+  a[3] = a[5];
+  a[5] = a[7];
+  a[7] = a[4];
+  a[4] = a[1];
+  a[1] = a[6];
+  a[6] = tmp;
+}
+
+// Same as FFT8, but all inputs are real.
+// TODO: Since this does not need to be in-place, maybe there is a
+// faster FFT than this one, which is derived from DJB's in-place complex FFT.
+void RealFFT8(const double* in, Complex* out) {
+  double t1, t2, t3, t5, t6, t7, t8;
+  t8 = in[6];
+  t5 = in[2] - t8;
+  t8 += in[2];
+  out[2].real = t8;
+  out[6].imag = -t5;
+  out[4].imag = t5;
+  t8 = in[4];
+  t3 = in[0] - t8;
+  t8 += in[0];
+  out[0].real = t8;
+  out[4].real = t3;
+  out[6].real = t3;
+  t7 = in[5];
+  t3 = in[1] - t7;
+  t7 += in[1];
+  out[1].real = t7;
+  t8 = in[7];
+  t5 = in[3] - t8;
+  t8 += in[3];
+  out[3].real = t8;
+  t2 = -t5;
+  t6 = t3 - t5;
+  t8 = kSqrtHalf;
+  t6 *= t8;
+  out[5].real = out[4].real - t6;
+  t1 = t3 + t5;
+  t1 *= t8;
+  out[5].imag = out[4].imag - t1;
+  t6 += out[4].real;
+  out[4].real = t6;
+  t1 += out[4].imag;
+  out[4].imag = t1;
+  t5 = t2 - t3;
+  t5 *= t8;
+  out[7].imag = out[6].imag - t5;
+  t2 += t3;
+  t2 *= t8;
+  out[7].real = out[6].real - t2;
+  t2 += out[6].real;
+  out[6].real = t2;
+  t5 += out[6].imag;
+  out[6].imag = t5;
+  t5 = out[2].real;
+  t1 = out[0].real - t5;
+  t7 = out[3].real;
+  t5 += out[0].real;
+  t3 = out[1].real - t7;
+  t7 += out[1].real;
+  t8 = t5 + t7;
+  out[0].real = t8;
+  t5 -= t7;
+  out[1].real = t5;
+  out[2].imag = t3;
+  out[3].imag = -t3;
+  out[3].real = t1;
+  out[2].real = t1;
+  out[0].imag = 0;
+  out[1].imag = 0;
+
+  // Reorder to the correct output order.
+  // TODO: Modify the above computation so that this is not needed.
+  Complex tmp = out[2];
+  out[2] = out[3];
+  out[3] = out[5];
+  out[5] = out[7];
+  out[7] = out[4];
+  out[4] = out[1];
+  out[1] = out[6];
+  out[6] = tmp;
+}
+
+// Fills in block[kBlockEdgeHalf..(kBlockHalf+kBlockEdgeHalf)], and leaves the
+// rest unmodified.
+void ButteraugliFFTSquared(double block[kBlockSize]) {
+  double global_mul = 0.000064;
+  Complex block_c[kBlockSize];
+  assert(kBlockEdge == 8);
+  for (int y = 0; y < kBlockEdge; ++y) {
+    RealFFT8(block + y * kBlockEdge, block_c + y * kBlockEdge);
+  }
+  TransposeBlock(block_c);
+  double r0[kBlockEdge];
+  double r1[kBlockEdge];
+  for (int x = 0; x < kBlockEdge; ++x) {
+    r0[x] = block_c[x].real;
+    r1[x] = block_c[kBlockHalf + x].real;
+  }
+  RealFFT8(r0, block_c);
+  RealFFT8(r1, block_c + kBlockHalf);
+  for (int y = 1; y < kBlockEdgeHalf; ++y) {
+    FFT8(block_c + y * kBlockEdge);
+  }
+  for (int i = kBlockEdgeHalf; i < kBlockHalf + kBlockEdgeHalf + 1; ++i) {
+    block[i] = abssq(block_c[i]);
+    block[i] *= global_mul;
+  }
+}
+
+// Computes 8x8 FFT of each channel of xyb0 and xyb1 and adds the total squared
+// 3-dimensional xybdiff of the two blocks to diff_xyb_{dc,ac} and the average
+// diff on the edges to diff_xyb_edge_dc.
+void ButteraugliBlockDiff(double xyb0[3 * kBlockSize],
+                          double xyb1[3 * kBlockSize],
+                          double diff_xyb_dc[3],
+                          double diff_xyb_ac[3],
+                          double diff_xyb_edge_dc[3]) {
+  PROFILER_FUNC;
+  const double *csf8x8 = GetContrastSensitivityMatrix();
+
+  double avgdiff_xyb[3] = {0.0};
+  double avgdiff_edge[3][4] = { {0.0} };
+  for (int i = 0; i < 3 * kBlockSize; ++i) {
+    const double diff_xyb = xyb0[i] - xyb1[i];
+    const int c = i / kBlockSize;
+    avgdiff_xyb[c] += diff_xyb / kBlockSize;
+    const int k = i % kBlockSize;
+    const int kx = k % kBlockEdge;
+    const int ky = k / kBlockEdge;
+    const int h_edge_idx = ky == 0 ? 1 : ky == 7 ? 3 : -1;
+    const int v_edge_idx = kx == 0 ? 0 : kx == 7 ? 2 : -1;
+    if (h_edge_idx >= 0) {
+      avgdiff_edge[c][h_edge_idx] += diff_xyb / kBlockEdge;
+    }
+    if (v_edge_idx >= 0) {
+      avgdiff_edge[c][v_edge_idx] += diff_xyb / kBlockEdge;
+    }
+  }
+  XybDiffLowFreqSquaredAccumulate(avgdiff_xyb[0],
+                                  avgdiff_xyb[1],
+                                  avgdiff_xyb[2],
+                                  0, 0, 0, csf8x8[0],
+                                  diff_xyb_dc);
+  for (int i = 0; i < 4; ++i) {
+    XybDiffLowFreqSquaredAccumulate(avgdiff_edge[0][i],
+                                    avgdiff_edge[1][i],
+                                    avgdiff_edge[2][i],
+                                    0, 0, 0, csf8x8[0],
+                                    diff_xyb_edge_dc);
+  }
+
+  double* xyb_avg = xyb0;
+  double* xyb_halfdiff = xyb1;
+  for(int i = 0; i < 3 * kBlockSize; ++i) {
+    double avg = (xyb0[i] + xyb1[i])/2;
+    double halfdiff = (xyb0[i] - xyb1[i])/2;
+    xyb_avg[i] = avg;
+    xyb_halfdiff[i] = halfdiff;
+  }
+  double *y_avg = &xyb_avg[kBlockSize];
+  double *x_halfdiff_squared = &xyb_halfdiff[0];
+  double *y_halfdiff = &xyb_halfdiff[kBlockSize];
+  double *z_halfdiff_squared = &xyb_halfdiff[2 * kBlockSize];
+  ButteraugliFFTSquared(y_avg);
+  ButteraugliFFTSquared(x_halfdiff_squared);
+  ButteraugliFFTSquared(y_halfdiff);
+  ButteraugliFFTSquared(z_halfdiff_squared);
+
+  static const double xmul = 64.8;
+  static const double ymul = 1.753123908348329;
+  static const double ymul2 = 1.51983458269;
+  static const double zmul = 2.4;
+
+  for (size_t i = kBlockEdgeHalf; i < kBlockHalf + kBlockEdgeHalf + 1; ++i) {
+    double d = csf8x8[i];
+    diff_xyb_ac[0] += d * xmul * x_halfdiff_squared[i];
+    diff_xyb_ac[2] += d * zmul * z_halfdiff_squared[i];
+
+    y_avg[i] = sqrt(y_avg[i]);
+    y_halfdiff[i] = sqrt(y_halfdiff[i]);
+    double y0 = y_avg[i] - y_halfdiff[i];
+    double y1 = y_avg[i] + y_halfdiff[i];
+    // Remove the impact of small absolute values.
+    // This improves the behavior with flat noise.
+    static const double ylimit = 0.04;
+    y0 = RemoveRangeAroundZero(y0, ylimit);
+    y1 = RemoveRangeAroundZero(y1, ylimit);
+    if (y0 != y1) {
+      double valy0 = Interpolate(GetHighFreqColorDiffDy(), 21, y0 * ymul2);
+      double valy1 = Interpolate(GetHighFreqColorDiffDy(), 21, y1 * ymul2);
+      double valy = ymul * (valy0 - valy1);
+      diff_xyb_ac[1] += d * valy * valy;
+    }
+  }
+}
+
+// Low frequency edge detectors.
+// Two edge detectors are applied in each corner of the 8x8 square.
+// The squared 3-dimensional error vector is added to diff_xyb.
+void Butteraugli8x8CornerEdgeDetectorDiff(
+    const size_t pos_x,
+    const size_t pos_y,
+    const size_t xsize,
+    const size_t ysize,
+    const std::vector<std::vector<float> > &blurred0,
+    const std::vector<std::vector<float> > &blurred1,
+    double diff_xyb[3]) {
+  PROFILER_FUNC;
+  int local_count = 0;
+  double local_xyb[3] = { 0 };
+  static const double w = 0.711100840192;
+  for (int k = 0; k < 4; ++k) {
+    size_t step = 3;
+    size_t offset[4][2] = { { 0, 0 }, { 0, 7 }, { 7, 0 }, { 7, 7 } };
+    size_t x = pos_x + offset[k][0];
+    size_t y = pos_y + offset[k][1];
+    if (x >= step && x + step < xsize) {
+      size_t ix = y * xsize + (x - step);
+      size_t ix2 = ix + 2 * step;
+      XybDiffLowFreqSquaredAccumulate(
+          w * (blurred0[0][ix] - blurred0[0][ix2]),
+          w * (blurred0[1][ix] - blurred0[1][ix2]),
+          w * (blurred0[2][ix] - blurred0[2][ix2]),
+          w * (blurred1[0][ix] - blurred1[0][ix2]),
+          w * (blurred1[1][ix] - blurred1[1][ix2]),
+          w * (blurred1[2][ix] - blurred1[2][ix2]),
+          1.0, local_xyb);
+      ++local_count;
+    }
+    if (y >= step && y + step < ysize) {
+      size_t ix = (y - step) * xsize + x;
+      size_t ix2 = ix + 2 * step * xsize;
+      XybDiffLowFreqSquaredAccumulate(
+          w * (blurred0[0][ix] - blurred0[0][ix2]),
+          w * (blurred0[1][ix] - blurred0[1][ix2]),
+          w * (blurred0[2][ix] - blurred0[2][ix2]),
+          w * (blurred1[0][ix] - blurred1[0][ix2]),
+          w * (blurred1[1][ix] - blurred1[1][ix2]),
+          w * (blurred1[2][ix] - blurred1[2][ix2]),
+          1.0, local_xyb);
+      ++local_count;
+    }
+  }
+  static const double weight = 0.01617112696;
+  const double mul = weight * 8.0 / local_count;
+  for (int i = 0; i < 3; ++i) {
+    diff_xyb[i] += mul * local_xyb[i];
+  }
+}
+
+// https://en.wikipedia.org/wiki/Photopsin absordance modeling.
+const double *GetOpsinAbsorbance() {
+  static const double kMix[12] = {
+    0.348036746003,
+    0.577814843137,
+    0.0544556093735,
+    0.774145581713,
+    0.26922717275,
+    0.767247733938,
+    0.0366922708552,
+    0.920130265014,
+    0.0882062883536,
+    0.158581714673,
+    0.712857943858,
+    10.6524069248,
+  };
+  return &kMix[0];
+}
+
+void OpsinAbsorbance(const double in[3], double out[3]) {
+  const double *mix = GetOpsinAbsorbance();
+  out[0] = mix[0] * in[0] + mix[1] * in[1] + mix[2] * in[2] + mix[3];
+  out[1] = mix[4] * in[0] + mix[5] * in[1] + mix[6] * in[2] + mix[7];
+  out[2] = mix[8] * in[0] + mix[9] * in[1] + mix[10] * in[2] + mix[11];
+}
+
+double GammaMinArg() {
+  double in[3] = { 0.0, 0.0, 0.0 };
+  double out[3];
+  OpsinAbsorbance(in, out);
+  return std::min(out[0], std::min(out[1], out[2]));
+}
+
+double GammaMaxArg() {
+  double in[3] = { 255.0, 255.0, 255.0 };
+  double out[3];
+  OpsinAbsorbance(in, out);
+  return std::max(out[0], std::max(out[1], out[2]));
+}
+
+ButteraugliComparator::ButteraugliComparator(
+    size_t xsize, size_t ysize, int step)
+    : xsize_(xsize),
+      ysize_(ysize),
+      num_pixels_(xsize * ysize),
+      step_(step),
+      res_xsize_((xsize + step - 1) / step),
+      res_ysize_((ysize + step - 1) / step) {
+  assert(step <= 4);
+}
+
+void MaskHighIntensityChange(
+    size_t xsize, size_t ysize,
+    const std::vector<std::vector<float> > &c0,
+    const std::vector<std::vector<float> > &c1,
+    std::vector<std::vector<float> > &xyb0,
+    std::vector<std::vector<float> > &xyb1) {
+  PROFILER_FUNC;
+  for (int y = 0; y < ysize; ++y) {
+    for (int x = 0; x < xsize; ++x) {
+      int ix = y * xsize + x;
+      const double ave[3] = {
+        (c0[0][ix] + c1[0][ix]) * 0.5,
+        (c0[1][ix] + c1[1][ix]) * 0.5,
+        (c0[2][ix] + c1[2][ix]) * 0.5,
+      };
+      double sqr_max_diff = -1;
+      {
+        int offset[4] =
+            { -1, 1, -static_cast<int>(xsize), static_cast<int>(xsize) };
+        int border[4] =
+            { x == 0, x + 1 == xsize, y == 0, y + 1 == ysize };
+        for (int dir = 0; dir < 4; ++dir) {
+          if (border[dir]) {
+            continue;
+          }
+          const int ix2 = ix + offset[dir];
+          double diff = 0.5 * (c0[1][ix2] + c1[1][ix2]) - ave[1];
+          diff *= diff;
+          if (sqr_max_diff < diff) {
+            sqr_max_diff = diff;
+          }
+        }
+      }
+      static const double kReductionX = 275.19165240059317;
+      static const double kReductionY = 18599.41286306991;
+      static const double kReductionZ = 410.8995306951065;
+      static const double kChromaBalance = 106.95800948271017;
+      double chroma_scale = kChromaBalance / (ave[1] + kChromaBalance);
+
+      const double mix[3] = {
+        chroma_scale * kReductionX / (sqr_max_diff + kReductionX),
+        kReductionY / (sqr_max_diff + kReductionY),
+        chroma_scale * kReductionZ / (sqr_max_diff + kReductionZ),
+      };
+      // Interpolate lineraly between the average color and the actual
+      // color -- to reduce the importance of this pixel.
+      for (int i = 0; i < 3; ++i) {
+        xyb0[i][ix] = mix[i] * c0[i][ix] + (1 - mix[i]) * ave[i];
+        xyb1[i][ix] = mix[i] * c1[i][ix] + (1 - mix[i]) * ave[i];
+      }
+    }
+  }
+}
+
+double SimpleGamma(double v) {
+  static const double kGamma = 0.387494322593;
+  static const double limit = 43.01745241042018;
+  double bright = v - limit;
+  if (bright >= 0) {
+    static const double mul = 0.0383723643799;
+    v -= bright * mul;
+  }
+  static const double limit2 = 94.68634353321337;
+  double bright2 = v - limit2;
+  if (bright2 >= 0) {
+    static const double mul = 0.22885405968;
+    v -= bright2 * mul;
+  }
+  static const double offset = 0.156775786057;
+  static const double scale = 8.898059160493739;
+  double retval = scale * (offset + pow(v, kGamma));
+  return retval;
+}
+
+static inline double Gamma(double v) {
+  // return SimpleGamma(v);
+  return GammaPolynomial(v);
+}
+
+void OpsinDynamicsImage(size_t xsize, size_t ysize,
+                        std::vector<std::vector<float> > &rgb) {
+  PROFILER_FUNC;
+  std::vector<std::vector<float> > blurred = rgb;
+  static const double kSigma = 1.1;
+  for (int i = 0; i < 3; ++i) {
+    Blur(xsize, ysize, blurred[i].data(), kSigma, 0.0);
+  }
+  for (int i = 0; i < rgb[0].size(); ++i) {
+    double sensitivity[3];
+    {
+      // Calculate sensitivity[3] based on the smoothed image gamma derivative.
+      double pre_rgb[3] = { blurred[0][i], blurred[1][i], blurred[2][i] };
+      double pre_mixed[3];
+      OpsinAbsorbance(pre_rgb, pre_mixed);
+      sensitivity[0] = Gamma(pre_mixed[0]) / pre_mixed[0];
+      sensitivity[1] = Gamma(pre_mixed[1]) / pre_mixed[1];
+      sensitivity[2] = Gamma(pre_mixed[2]) / pre_mixed[2];
+    }
+    double cur_rgb[3] = { rgb[0][i],  rgb[1][i],  rgb[2][i] };
+    double cur_mixed[3];
+    OpsinAbsorbance(cur_rgb, cur_mixed);
+    cur_mixed[0] *= sensitivity[0];
+    cur_mixed[1] *= sensitivity[1];
+    cur_mixed[2] *= sensitivity[2];
+    double x, y, z;
+    RgbToXyb(cur_mixed[0], cur_mixed[1], cur_mixed[2], &x, &y, &z);
+    rgb[0][i] = x;
+    rgb[1][i] = y;
+    rgb[2][i] = z;
+  }
+}
+
+static void ScaleImage(double scale, std::vector<float> *result) {
+  PROFILER_FUNC;
+  for (size_t i = 0; i < result->size(); ++i) {
+    (*result)[i] *= scale;
+  }
+}
+
+// Making a cluster of local errors to be more impactful than
+// just a single error.
+void CalculateDiffmap(const size_t xsize, const size_t ysize,
+                      const int step,
+                      std::vector<float>* diffmap) {
+  PROFILER_FUNC;
+  // Shift the diffmap more correctly above the pixels, from 2.5 pixels to 0.5
+  // pixels distance over the original image. The border of 2 pixels on top and
+  // left side and 3 pixels on right and bottom side are zeroed, but these
+  // values have no meaning, they only exist to keep the result map the same
+  // size as the input images.
+  int s2 = (8 - step) / 2;
+  // Upsample and take square root.
+  std::vector<float> diffmap_out(xsize * ysize);
+  const size_t res_xsize = (xsize + step - 1) / step;
+  for (size_t res_y = 0; res_y + 8 - step < ysize; res_y += step) {
+    for (size_t res_x = 0; res_x + 8 - step < xsize; res_x += step) {
+      size_t res_ix = (res_y * res_xsize + res_x) / step;
+      float orig_val = (*diffmap)[res_ix];
+      constexpr float kInitialSlope = 100;
+      // TODO(b/29974893): Until that is fixed do not call sqrt on very small
+      // numbers.
+      double val = orig_val < (1.0 / (kInitialSlope * kInitialSlope))
+                       ? kInitialSlope * orig_val
+                       : std::sqrt(orig_val);
+      for (size_t off_y = 0; off_y < step; ++off_y) {
+        for (size_t off_x = 0; off_x < step; ++off_x) {
+          diffmap_out[(res_y + off_y + s2) * xsize + res_x + off_x + s2] = val;
+        }
+      }
+    }
+  }
+  *diffmap = diffmap_out;
+  {
+    static const double kSigma = 8.8510880283;
+    static const double mul1 = 24.8235314874;
+    static const double scale = 1.0 / (1.0 + mul1);
+    const int s = 8 - step;
+    std::vector<float> blurred((xsize - s) * (ysize - s));
+    for (int y = 0; y < ysize - s; ++y) {
+      for (int x = 0; x < xsize - s; ++x) {
+        blurred[y * (xsize - s) + x] = (*diffmap)[(y + s2) * xsize + x + s2];
+      }
+    }
+    static const double border_ratio = 0.03027655136;
+    Blur(xsize - s, ysize - s, blurred.data(), kSigma, border_ratio);
+    for (int y = 0; y < ysize - s; ++y) {
+      for (int x = 0; x < xsize - s; ++x) {
+        (*diffmap)[(y + s2) * xsize + x + s2]
+            += mul1 * blurred[y * (xsize - s) + x];
+      }
+    }
+    ScaleImage(scale, diffmap);
+  }
+}
+
+void ButteraugliComparator::Diffmap(const std::vector<ImageF> &rgb0_arg,
+                                    const std::vector<ImageF> &rgb1_arg,
+                                    ImageF &result) {
+  result = ImageF(xsize_, ysize_);
+  if (xsize_ < 8 || ysize_ < 8) return;
+  std::vector<std::vector<float>> rgb0_c = PackedFromPlanes(rgb0_arg);
+  std::vector<std::vector<float>> rgb1_c = PackedFromPlanes(rgb1_arg);
+  OpsinDynamicsImage(xsize_, ysize_, rgb0_c);
+  OpsinDynamicsImage(xsize_, ysize_, rgb1_c);
+  std::vector<ImageF> pg0 = PlanesFromPacked(xsize_, ysize_, rgb0_c);
+  std::vector<ImageF> pg1 = PlanesFromPacked(xsize_, ysize_, rgb1_c);
+  DiffmapOpsinDynamicsImage(pg0, pg1, result);
+}
+
+void ButteraugliComparator::DiffmapOpsinDynamicsImage(
+    const std::vector<ImageF> &xyb0_arg, const std::vector<ImageF> &xyb1_arg,
+    ImageF &result) {
+  result = ImageF(xsize_, ysize_);
+  if (xsize_ < 8 || ysize_ < 8) return;
+  std::vector<std::vector<float>> xyb0 = PackedFromPlanes(xyb0_arg);
+  std::vector<std::vector<float>> xyb1 = PackedFromPlanes(xyb1_arg);
+  auto xyb0_c = xyb0;
+  auto xyb1_c = xyb1;
+
+  MaskHighIntensityChange(xsize_, ysize_, xyb0_c, xyb1_c, xyb0, xyb1);
+  assert(8 <= xsize_);
+  for (int i = 0; i < 3; i++) {
+    assert(xyb0[i].size() == num_pixels_);
+    assert(xyb1[i].size() == num_pixels_);
+  }
+  std::vector<std::vector<float> > mask_xyb(3);
+  std::vector<std::vector<float> > mask_xyb_dc(3);
+  std::vector<float> block_diff_dc(3 * res_xsize_ * res_ysize_);
+  std::vector<float> block_diff_ac(3 * res_xsize_ * res_ysize_);
+  std::vector<float> edge_detector_map(3 * res_xsize_ * res_ysize_);
+  std::vector<float> packed_result;
+  BlockDiffMap(xyb0, xyb1, &block_diff_dc, &block_diff_ac);
+  EdgeDetectorMap(xyb0, xyb1, &edge_detector_map);
+  EdgeDetectorLowFreq(xyb0, xyb1, &block_diff_ac);
+  Mask(xyb0, xyb1, xsize_, ysize_, &mask_xyb, &mask_xyb_dc);
+  CombineChannels(mask_xyb, mask_xyb_dc, block_diff_dc, block_diff_ac,
+                  edge_detector_map, &packed_result);
+  CalculateDiffmap(xsize_, ysize_, step_, &packed_result);
+  CopyFromPacked(packed_result, &result);
+}
+
+void ButteraugliComparator::BlockDiffMap(
+    const std::vector<std::vector<float> > &xyb0,
+    const std::vector<std::vector<float> > &xyb1,
+    std::vector<float>* block_diff_dc,
+    std::vector<float>* block_diff_ac) {
+  PROFILER_FUNC;
+  for (size_t res_y = 0; res_y + (kBlockEdge - step_ - 1) < ysize_;
+       res_y += step_) {
+    for (size_t res_x = 0; res_x + (kBlockEdge - step_ - 1) < xsize_;
+         res_x += step_) {
+      size_t res_ix = (res_y * res_xsize_ + res_x) / step_;
+      size_t offset = (std::min(res_y, ysize_ - 8) * xsize_ +
+                       std::min(res_x, xsize_ - 8));
+      double block0[3 * kBlockEdge * kBlockEdge];
+      double block1[3 * kBlockEdge * kBlockEdge];
+      for (int i = 0; i < 3; ++i) {
+        double *m0 = &block0[i * kBlockEdge * kBlockEdge];
+        double *m1 = &block1[i * kBlockEdge * kBlockEdge];
+        for (size_t y = 0; y < kBlockEdge; y++) {
+          for (size_t x = 0; x < kBlockEdge; x++) {
+            m0[kBlockEdge * y + x] = xyb0[i][offset + y * xsize_ + x];
+            m1[kBlockEdge * y + x] = xyb1[i][offset + y * xsize_ + x];
+          }
+        }
+      }
+      double diff_xyb_dc[3] = { 0.0 };
+      double diff_xyb_ac[3] = { 0.0 };
+      double diff_xyb_edge_dc[3] = { 0.0 };
+      ButteraugliBlockDiff(block0, block1,
+                           diff_xyb_dc, diff_xyb_ac, diff_xyb_edge_dc);
+      for (int i = 0; i < 3; ++i) {
+        (*block_diff_dc)[3 * res_ix + i] = diff_xyb_dc[i];
+        (*block_diff_ac)[3 * res_ix + i] = diff_xyb_ac[i];
+      }
+    }
+  }
+}
+
+void ButteraugliComparator::EdgeDetectorMap(
+    const std::vector<std::vector<float> > &xyb0,
+    const std::vector<std::vector<float> > &xyb1,
+    std::vector<float>* edge_detector_map) {
+  PROFILER_FUNC;
+  static const double kSigma[3] = {
+    1.5,
+    0.586,
+    0.4,
+  };
+  std::vector<std::vector<float> > blurred0(xyb0);
+  std::vector<std::vector<float> > blurred1(xyb1);
+  for (int i = 0; i < 3; i++) {
+    Blur(xsize_, ysize_, blurred0[i].data(), kSigma[i], 0.0);
+    Blur(xsize_, ysize_, blurred1[i].data(), kSigma[i], 0.0);
+  }
+  for (size_t res_y = 0; res_y + (8 - step_) < ysize_; res_y += step_) {
+    for (size_t res_x = 0; res_x + (8 - step_) < xsize_; res_x += step_) {
+      size_t res_ix = (res_y * res_xsize_ + res_x) / step_;
+      double diff_xyb[3] = { 0.0 };
+      Butteraugli8x8CornerEdgeDetectorDiff(std::min(res_x, xsize_ - 8),
+                                           std::min(res_y, ysize_ - 8),
+                                           xsize_, ysize_,
+                                           blurred0, blurred1,
+                                           diff_xyb);
+      for (int i = 0; i < 3; ++i) {
+        (*edge_detector_map)[3 * res_ix + i] = diff_xyb[i];
+      }
+    }
+  }
+}
+
+void ButteraugliComparator::EdgeDetectorLowFreq(
+    const std::vector<std::vector<float> > &xyb0,
+    const std::vector<std::vector<float> > &xyb1,
+    std::vector<float>* block_diff_ac) {
+  PROFILER_FUNC;
+  static const double kSigma = 14;
+  static const double kMul = 10;
+  std::vector<std::vector<float> > blurred0(xyb0);
+  std::vector<std::vector<float> > blurred1(xyb1);
+  for (int i = 0; i < 3; i++) {
+    Blur(xsize_, ysize_, blurred0[i].data(), kSigma, 0.0);
+    Blur(xsize_, ysize_, blurred1[i].data(), kSigma, 0.0);
+  }
+  const int step = 8;
+  for (int y = 0; y + step < ysize_; y += step_) {
+    int resy = y / step_;
+    int resx = step / step_;
+    for (int x = 0; x + step < xsize_; x += step_, resx++) {
+      const int ix = y * xsize_ + x;
+      const int res_ix = resy * res_xsize_ + resx;
+      double diff[4][3];
+      for (int i = 0; i < 3; ++i) {
+        int ix2 = ix + 8;
+        diff[0][i] =
+            ((blurred1[i][ix] - blurred0[i][ix]) +
+             (blurred0[i][ix2] - blurred1[i][ix2]));
+        ix2 = ix + 8 * xsize_;
+        diff[1][i] =
+            ((blurred1[i][ix] - blurred0[i][ix]) +
+             (blurred0[i][ix2] - blurred1[i][ix2]));
+        ix2 = ix + 6 * xsize_ + 6;
+        diff[2][i] =
+            ((blurred1[i][ix] - blurred0[i][ix]) +
+             (blurred0[i][ix2] - blurred1[i][ix2]));
+        ix2 = ix + 6 * xsize_ - 6;
+        diff[3][i] = x < step ? 0 :
+            ((blurred1[i][ix] - blurred0[i][ix]) +
+             (blurred0[i][ix2] - blurred1[i][ix2]));
+      }
+      double max_diff_xyb[3] = { 0 };
+      for (int k = 0; k < 4; ++k) {
+        double diff_xyb[3] = { 0 };
+        XybDiffLowFreqSquaredAccumulate(diff[k][0], diff[k][1], diff[k][2],
+                                        0, 0, 0, 1.0,
+                                        diff_xyb);
+        for (int i = 0; i < 3; ++i) {
+          max_diff_xyb[i] = std::max<double>(max_diff_xyb[i], diff_xyb[i]);
+        }
+      }
+      for (int i = 0; i < 3; ++i) {
+        (*block_diff_ac)[3 * res_ix + i] += kMul * max_diff_xyb[i];
+      }
+    }
+  }
+}
+
+void ButteraugliComparator::CombineChannels(
+    const std::vector<std::vector<float> >& mask_xyb,
+    const std::vector<std::vector<float> >& mask_xyb_dc,
+    const std::vector<float>& block_diff_dc,
+    const std::vector<float>& block_diff_ac,
+    const std::vector<float>& edge_detector_map,
+    std::vector<float>* result) {
+  PROFILER_FUNC;
+  result->resize(res_xsize_ * res_ysize_);
+  for (size_t res_y = 0; res_y + (8 - step_) < ysize_; res_y += step_) {
+    for (size_t res_x = 0; res_x + (8 - step_) < xsize_; res_x += step_) {
+      size_t res_ix = (res_y * res_xsize_ + res_x) / step_;
+      double mask[3];
+      double dc_mask[3];
+      for (int i = 0; i < 3; ++i) {
+        mask[i] = mask_xyb[i][(res_y + 3) * xsize_ + (res_x + 3)];
+        dc_mask[i] = mask_xyb_dc[i][(res_y + 3) * xsize_ + (res_x + 3)];
+      }
+      (*result)[res_ix] =
+          (DotProduct(&block_diff_dc[3 * res_ix], dc_mask) +
+           DotProduct(&block_diff_ac[3 * res_ix], mask) +
+           DotProduct(&edge_detector_map[3 * res_ix], mask));
+    }
+  }
+}
+
+double ButteraugliScoreFromDiffmap(const ImageF& diffmap) {
+  PROFILER_FUNC;
+  float retval = 0.0f;
+  for (size_t y = 0; y < diffmap.ysize(); ++y) {
+    ConstRestrict<const float *> row = diffmap.Row(y);
+    for (size_t x = 0; x < diffmap.xsize(); ++x) {
+      retval = std::max(retval, row[x]);
+    }
+  }
+  return retval;
+}
+
+static std::array<double, 512> MakeMask(
+    double extmul, double extoff,
+    double mul, double offset,
+    double scaler) {
+  std::array<double, 512> lut;
+  for (int i = 0; i < lut.size(); ++i) {
+    const double c = mul / ((0.01 * scaler * i) + offset);
+    lut[i] = 1.0 + extmul * (c + extoff);
+    assert(lut[i] >= 0.0);
+    lut[i] *= lut[i];
+  }
+  return lut;
+}
+
+double MaskX(double delta) {
+  PROFILER_FUNC;
+  static const double extmul = 0.975741017749;
+  static const double extoff = -4.25328244168;
+  static const double offset = 0.454909521427;
+  static const double scaler = 0.0738288224836;
+  static const double mul = 20.8029176447;
+  static const std::array<double, 512> lut =
+                MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+double MaskY(double delta) {
+  PROFILER_FUNC;
+  static const double extmul = 0.373995618954;
+  static const double extoff = 1.5307267433;
+  static const double offset = 0.911952641929;
+  static const double scaler = 1.1731667845;
+  static const double mul = 16.2447033988;
+  static const std::array<double, 512> lut =
+      MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+double MaskB(double delta) {
+  PROFILER_FUNC;
+  static const double extmul = 0.61582234137;
+  static const double extoff = -4.25376118646;
+  static const double offset = 1.05105070921;
+  static const double scaler = 0.47434643535;
+  static const double mul = 31.1444967089;
+  static const std::array<double, 512> lut =
+      MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+double MaskDcX(double delta) {
+  PROFILER_FUNC;
+  static const double extmul = 1.79116943438;
+  static const double extoff = -3.86797479189;
+  static const double offset = 0.670960225853;
+  static const double scaler = 0.486575865525;
+  static const double mul = 20.4563479139;
+  static const std::array<double, 512> lut =
+      MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+double MaskDcY(double delta) {
+  PROFILER_FUNC;
+  static const double extmul = 0.212223514236;
+  static const double extoff = -3.65647120524;
+  static const double offset = 1.73396799447;
+  static const double scaler = 0.170392660501;
+  static const double mul = 21.6566724788;
+  static const std::array<double, 512> lut =
+      MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+double MaskDcB(double delta) {
+  PROFILER_FUNC;
+  static const double extmul = 0.349376011816;
+  static const double extoff = -0.894711072781;
+  static const double offset = 0.901647926679;
+  static const double scaler = 0.380086095024;
+  static const double mul = 18.0373825149;
+  static const std::array<double, 512> lut =
+      MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+// Replaces values[x + y * xsize] with the minimum of the values in the
+// square_size square with coordinates
+//   x - offset .. x + square_size - offset - 1,
+//   y - offset .. y + square_size - offset - 1.
+void MinSquareVal(size_t square_size, size_t offset,
+                  size_t xsize, size_t ysize,
+                  float *values) {
+  PROFILER_FUNC;
+  // offset is not negative and smaller than square_size.
+  assert(offset < square_size);
+  std::vector<float> tmp(xsize * ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const size_t minh = offset > y ? 0 : y - offset;
+    const size_t maxh = std::min<size_t>(ysize, y + square_size - offset);
+    for (size_t x = 0; x < xsize; ++x) {
+      double min = values[x + minh * xsize];
+      for (size_t j = minh + 1; j < maxh; ++j) {
+        min = fmin(min, values[x + j * xsize]);
+      }
+      tmp[x + y * xsize] = min;
+    }
+  }
+  for (size_t x = 0; x < xsize; ++x) {
+    const size_t minw = offset > x ? 0 : x - offset;
+    const size_t maxw = std::min<size_t>(xsize, x + square_size - offset);
+    for (size_t y = 0; y < ysize; ++y) {
+      double min = tmp[minw + y * xsize];
+      for (size_t j = minw + 1; j < maxw; ++j) {
+        min = fmin(min, tmp[j + y * xsize]);
+      }
+      values[x + y * xsize] = min;
+    }
+  }
+}
+
+// ===== Functions used by Mask only =====
+void Average5x5(int xsize, int ysize, std::vector<float>* diffs) {
+  PROFILER_FUNC;
+  if (xsize < 4 || ysize < 4) {
+    // TODO: Make this work for small dimensions as well.
+    return;
+  }
+  static const float w = 0.679144890667;
+  static const float scale = 1.0 / (5.0 + 4 * w);
+  std::vector<float> result = *diffs;
+  std::vector<float> tmp0 = *diffs;
+  std::vector<float> tmp1 = *diffs;
+  ScaleImage(w, &tmp1);
+  for (int y = 0; y < ysize; y++) {
+    const int row0 = y * xsize;
+    result[row0 + 1] += tmp0[row0];
+    result[row0 + 0] += tmp0[row0 + 1];
+    result[row0 + 2] += tmp0[row0 + 1];
+    for (int x = 2; x < xsize - 2; ++x) {
+      result[row0 + x - 1] += tmp0[row0 + x];
+      result[row0 + x + 1] += tmp0[row0 + x];
+    }
+    result[row0 + xsize - 3] += tmp0[row0 + xsize - 2];
+    result[row0 + xsize - 1] += tmp0[row0 + xsize - 2];
+    result[row0 + xsize - 2] += tmp0[row0 + xsize - 1];
+    if (y > 0) {
+      const int rowd1 = row0 - xsize;
+      result[rowd1 + 1] += tmp1[row0];
+      result[rowd1 + 0] += tmp0[row0];
+      for (int x = 1; x < xsize - 1; ++x) {
+        result[rowd1 + x + 1] += tmp1[row0 + x];
+        result[rowd1 + x + 0] += tmp0[row0 + x];
+        result[rowd1 + x - 1] += tmp1[row0 + x];
+      }
+      result[rowd1 + xsize - 1] += tmp0[row0 + xsize - 1];
+      result[rowd1 + xsize - 2] += tmp1[row0 + xsize - 1];
+    }
+    if (y + 1 < ysize) {
+      const int rowu1 = row0 + xsize;
+      result[rowu1 + 1] += tmp1[row0];
+      result[rowu1 + 0] += tmp0[row0];
+      for (int x = 1; x < xsize - 1; ++x) {
+        result[rowu1 + x + 1] += tmp1[row0 + x];
+        result[rowu1 + x + 0] += tmp0[row0 + x];
+        result[rowu1 + x - 1] += tmp1[row0 + x];
+      }
+      result[rowu1 + xsize - 1] += tmp0[row0 + xsize - 1];
+      result[rowu1 + xsize - 2] += tmp1[row0 + xsize - 1];
+    }
+  }
+  *diffs = result;
+  ScaleImage(scale, diffs);
+}
+
+void DiffPrecompute(
+    const std::vector<std::vector<float> > &xyb0,
+    const std::vector<std::vector<float> > &xyb1,
+    size_t xsize, size_t ysize,
+    std::vector<std::vector<float> > *mask) {
+  PROFILER_FUNC;
+  mask->resize(3, std::vector<float>(xyb0[0].size()));
+  double valsh0[3] = { 0.0 };
+  double valsv0[3] = { 0.0 };
+  double valsh1[3] = { 0.0 };
+  double valsv1[3] = { 0.0 };
+  int ix2;
+  for (size_t y = 0; y < ysize; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      size_t ix = x + xsize * y;
+      if (x + 1 < xsize) {
+        ix2 = ix + 1;
+      } else {
+        ix2 = ix - 1;
+      }
+      {
+        double x0 = (xyb0[0][ix] - xyb0[0][ix2]);
+        double y0 = (xyb0[1][ix] - xyb0[1][ix2]);
+        double z0 = (xyb0[2][ix] - xyb0[2][ix2]);
+        XybToVals(x0, y0, z0, &valsh0[0], &valsh0[1], &valsh0[2]);
+        double x1 = (xyb1[0][ix] - xyb1[0][ix2]);
+        double y1 = (xyb1[1][ix] - xyb1[1][ix2]);
+        double z1 = (xyb1[2][ix] - xyb1[2][ix2]);
+        XybToVals(x1, y1, z1, &valsh1[0], &valsh1[1], &valsh1[2]);
+      }
+      if (y + 1 < ysize) {
+        ix2 = ix + xsize;
+      } else {
+        ix2 = ix - xsize;
+      }
+      {
+        double x0 = (xyb0[0][ix] - xyb0[0][ix2]);
+        double y0 = (xyb0[1][ix] - xyb0[1][ix2]);
+        double z0 = (xyb0[2][ix] - xyb0[2][ix2]);
+        XybToVals(x0, y0, z0, &valsv0[0], &valsv0[1], &valsv0[2]);
+        double x1 = (xyb1[0][ix] - xyb1[0][ix2]);
+        double y1 = (xyb1[1][ix] - xyb1[1][ix2]);
+        double z1 = (xyb1[2][ix] - xyb1[2][ix2]);
+        XybToVals(x1, y1, z1, &valsv1[0], &valsv1[1], &valsv1[2]);
+      }
+      for (int i = 0; i < 3; ++i) {
+        double sup0 = fabs(valsh0[i]) + fabs(valsv0[i]);
+        double sup1 = fabs(valsh1[i]) + fabs(valsv1[i]);
+        double m = std::min(sup0, sup1);
+        (*mask)[i][ix] = m;
+      }
+    }
+  }
+}
+
+void Mask(const std::vector<std::vector<float> > &xyb0,
+          const std::vector<std::vector<float> > &xyb1,
+          size_t xsize, size_t ysize,
+          std::vector<std::vector<float> > *mask,
+          std::vector<std::vector<float> > *mask_dc) {
+   PROFILER_FUNC;
+  mask->resize(3);
+  mask_dc->resize(3);
+  for (int i = 0; i < 3; ++i) {
+    (*mask)[i].resize(xsize * ysize);
+    (*mask_dc)[i].resize(xsize * ysize);
+  }
+  DiffPrecompute(xyb0, xyb1, xsize, ysize, mask);
+  for (int i = 0; i < 3; ++i) {
+    Average5x5(xsize, ysize, &(*mask)[i]);
+    MinSquareVal(4, 0, xsize, ysize, (*mask)[i].data());
+    static const double sigma[3] = {
+      9.65781083553,
+      14.2644604355,
+      4.53358927369,
+    };
+    Blur(xsize, ysize, (*mask)[i].data(), sigma[i], 0.0);
+  }
+  static const double w00 = 232.206464018;
+  static const double w11 = 22.9455222245;
+  static const double w22 = 503.962310606;
+
+  for (size_t y = 0; y < ysize; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const size_t idx = y * xsize + x;
+      const double s0 = (*mask)[0][idx];
+      const double s1 = (*mask)[1][idx];
+      const double s2 = (*mask)[2][idx];
+      const double p0 = w00 * s0;
+      const double p1 = w11 * s1;
+      const double p2 = w22 * s2;
+
+      (*mask)[0][idx] = MaskX(p0);
+      (*mask)[1][idx] = MaskY(p1);
+      (*mask)[2][idx] = MaskB(p2);
+      (*mask_dc)[0][idx] = MaskDcX(p0);
+      (*mask_dc)[1][idx] = MaskDcY(p1);
+      (*mask_dc)[2][idx] = MaskDcB(p2);
+    }
+  }
+  for (int i = 0; i < 3; ++i) {
+    ScaleImage(kGlobalScale * kGlobalScale, &(*mask)[i]);
+    ScaleImage(kGlobalScale * kGlobalScale, &(*mask_dc)[i]);
+  }
+}
+
+void ButteraugliDiffmap(const std::vector<ImageF> &rgb0_image,
+                        const std::vector<ImageF> &rgb1_image,
+                        ImageF &result_image) {
+  const size_t xsize = rgb0_image[0].xsize();
+  const size_t ysize = rgb0_image[0].ysize();
+  ButteraugliComparator butteraugli(xsize, ysize, 3);
+  butteraugli.Diffmap(rgb0_image, rgb1_image, result_image);
+}
+
+bool ButteraugliInterface(const std::vector<ImageF> &rgb0,
+                          const std::vector<ImageF> &rgb1,
+                          ImageF &diffmap,
+                          double &diffvalue) {
+  const size_t xsize = rgb0[0].xsize();
+  const size_t ysize = rgb0[0].ysize();
+  if (xsize < 1 || ysize < 1) {
+    // Butteraugli values for small (where xsize or ysize is smaller
+    // than 8 pixels) images are non-sensical, but most likely it is
+    // less disruptive to try to compute something than just give up.
+    return false;  // No image.
+  }
+  for (int i = 1; i < 3; i++) {
+    if (rgb0[i].xsize() != xsize || rgb0[i].ysize() != ysize ||
+        rgb1[i].xsize() != xsize || rgb1[i].ysize() != ysize) {
+      return false;  // Image planes must have same dimensions.
+    }
+  }
+  if (xsize < 8 || ysize < 8) {
+    for (int y = 0; y < ysize; ++y) {
+      for (int x = 0; x < xsize; ++x) {
+        diffmap.Row(y)[x] = 0;
+      }
+    }
+    diffvalue = 0;
+    return true;
+  }
+  ButteraugliDiffmap(rgb0, rgb1, diffmap);
+  diffvalue = ButteraugliScoreFromDiffmap(diffmap);
+  return true;
+}
+
+bool ButteraugliAdaptiveQuantization(size_t xsize, size_t ysize,
+    const std::vector<std::vector<float> > &rgb, std::vector<float> &quant) {
+  if (xsize < 16 || ysize < 16) {
+    return false;  // Butteraugli is undefined for small images.
+  }
+  size_t size = xsize * ysize;
+
+  std::vector<std::vector<float> > scale_xyb(3);
+  std::vector<std::vector<float> > scale_xyb_dc(3);
+  Mask(rgb, rgb, xsize, ysize, &scale_xyb, &scale_xyb_dc);
+  quant.resize(size);
+
+  // Mask gives us values in 3 color channels, but for now we take only
+  // the intensity channel.
+  for (size_t i = 0; i < size; i++) {
+    quant[i] = scale_xyb[1][i];
+  }
+  return true;
+}
+
+double ButteraugliFuzzyClass(double score) {
+  static const double fuzzy_width_up = 10.287189655;
+  static const double fuzzy_width_down = 6.97490803335;
+  static const double m0 = 2.0;
+  double fuzzy_width = score < 1.0 ? fuzzy_width_down : fuzzy_width_up;
+  return m0 / (1.0 + exp((score - 1.0) * fuzzy_width));
+}
+
+double ButteraugliFuzzyInverse(double seek) {
+  double pos = 0;
+  for (double range = 1.0; range >= 1e-10; range *= 0.5) {
+    double cur = ButteraugliFuzzyClass(pos);
+    if (cur < seek) {
+      pos -= range;
+    } else {
+      pos += range;
+    }
+  }
+  return pos;
+}
+
+}  // namespace butteraugli
diff --git a/extern/butteraugli/butteraugli.h b/extern/butteraugli/butteraugli.h
new file mode 100755
index 0000000..31824b8
--- /dev/null
+++ b/extern/butteraugli/butteraugli.h
@@ -0,0 +1,560 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Disclaimer: This is not an official Google product.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+
+#ifndef BUTTERAUGLI_BUTTERAUGLI_H_
+#define BUTTERAUGLI_BUTTERAUGLI_H_
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#ifndef PROFILER_ENABLED
+#define PROFILER_ENABLED 0
+#endif
+#if PROFILER_ENABLED
+#else
+#define PROFILER_FUNC
+#define PROFILER_ZONE(name)
+#endif
+
+#define BUTTERAUGLI_ENABLE_CHECKS 0
+
+// This is the main interface to butteraugli image similarity
+// analysis function.
+
+namespace butteraugli {
+
+template<typename T>
+class Image;
+
+using Image8 = Image<uint8_t>;
+using ImageF = Image<float>;
+using ImageD = Image<double>;
+
+// ButteraugliInterface defines the public interface for butteraugli.
+//
+// It calculates the difference between rgb0 and rgb1.
+//
+// rgb0 and rgb1 contain the images. rgb0[c][px] and rgb1[c][px] contains
+// the red image for c == 0, green for c == 1, blue for c == 2. Location index
+// px is calculated as y * xsize + x.
+//
+// Value of pixels of images rgb0 and rgb1 need to be represented as raw
+// intensity. Most image formats store gamma corrected intensity in pixel
+// values. This gamma correction has to be removed, by applying the following
+// function:
+// butteraugli_val = 255.0 * pow(png_val / 255.0, gamma);
+// A typical value of gamma is 2.2. It is usually stored in the image header.
+// Take care not to confuse that value with its inverse. The gamma value should
+// be always greater than one.
+// Butteraugli does not work as intended if the caller does not perform
+// gamma correction.
+//
+// diffmap will contain an image of the size xsize * ysize, containing
+// localized differences for values px (indexed with the px the same as rgb0
+// and rgb1). diffvalue will give a global score of similarity.
+//
+// A diffvalue smaller than kButteraugliGood indicates that images can be
+// observed as the same image.
+// diffvalue larger than kButteraugliBad indicates that a difference between
+// the images can be observed.
+// A diffvalue between kButteraugliGood and kButteraugliBad indicates that
+// a subtle difference can be observed between the images.
+//
+// Returns true on success.
+
+bool ButteraugliInterface(const std::vector<ImageF> &rgb0,
+                          const std::vector<ImageF> &rgb1,
+                          ImageF &diffmap,
+                          double &diffvalue);
+
+const double kButteraugliQuantLow = 0.26;
+const double kButteraugliQuantHigh = 1.454;
+
+// Converts the butteraugli score into fuzzy class values that are continuous
+// at the class boundary. The class boundary location is based on human
+// raters, but the slope is arbitrary. Particularly, it does not reflect
+// the expectation value of probabilities of the human raters. It is just
+// expected that a smoother class boundary will allow for higher-level
+// optimization algorithms to work faster.
+//
+// Returns 2.0 for a perfect match, and 1.0 for 'ok', 0.0 for bad. Because the
+// scoring is fuzzy, a butteraugli score of 0.96 would return a class of
+// around 1.9.
+double ButteraugliFuzzyClass(double score);
+
+// Input values should be in range 0 (bad) to 2 (good). Use
+// kButteraugliNormalization as normalization.
+double ButteraugliFuzzyInverse(double seek);
+
+// Returns a map which can be used for adaptive quantization. Values can
+// typically range from kButteraugliQuantLow to kButteraugliQuantHigh. Low
+// values require coarse quantization (e.g. near random noise), high values
+// require fine quantization (e.g. in smooth bright areas).
+bool ButteraugliAdaptiveQuantization(size_t xsize, size_t ysize,
+    const std::vector<std::vector<float> > &rgb, std::vector<float> &quant);
+
+// Implementation details, don't use anything below or your code will
+// break in the future.
+
+#ifdef _MSC_VER
+#define BUTTERAUGLI_RESTRICT __restrict
+#else
+#define BUTTERAUGLI_RESTRICT __restrict__
+#endif
+
+#ifdef _MSC_VER
+#define BUTTERAUGLI_CACHE_ALIGNED_RETURN /* not supported */
+#else
+#define BUTTERAUGLI_CACHE_ALIGNED_RETURN __attribute__((assume_aligned(64)))
+#endif
+
+// Alias for unchangeable, non-aliased pointers. T is a pointer type,
+// possibly to a const type. Example: ConstRestrict<uint8_t*> ptr = nullptr.
+// The conventional syntax uint8_t* const RESTRICT is more confusing - it is
+// not immediately obvious that the pointee is non-const.
+template <typename T>
+using ConstRestrict = T const BUTTERAUGLI_RESTRICT;
+
+// Functions that depend on the cache line size.
+class CacheAligned {
+ public:
+  static constexpr size_t kPointerSize = sizeof(void *);
+  static constexpr size_t kCacheLineSize = 64;
+
+  // The aligned-return annotation is only allowed on function declarations.
+  static void *Allocate(const size_t bytes) BUTTERAUGLI_CACHE_ALIGNED_RETURN;
+  static void Free(void *aligned_pointer);
+};
+
+template <typename T>
+using CacheAlignedUniquePtrT = std::unique_ptr<T[], void (*)(void *)>;
+
+using CacheAlignedUniquePtr = CacheAlignedUniquePtrT<uint8_t>;
+
+template <typename T = uint8_t>
+static inline CacheAlignedUniquePtrT<T> Allocate(const size_t entries) {
+  return CacheAlignedUniquePtrT<T>(
+      static_cast<ConstRestrict<T *>>(
+          CacheAligned::Allocate(entries * sizeof(T))),
+      CacheAligned::Free);
+}
+
+// Returns the smallest integer not less than "amount" that is divisible by
+// "multiple", which must be a power of two.
+template <size_t multiple>
+static inline size_t Align(const size_t amount) {
+  static_assert(multiple != 0 && ((multiple & (multiple - 1)) == 0),
+                "Align<> argument must be a power of two");
+  return (amount + multiple - 1) & ~(multiple - 1);
+}
+
+// Single channel, contiguous (cache-aligned) rows separated by padding.
+// T must be POD.
+//
+// Rationale: vectorization benefits from aligned operands - unaligned loads and
+// especially stores are expensive when the address crosses cache line
+// boundaries. Introducing padding after each row ensures the start of a row is
+// aligned, and that row loops can process entire vectors (writes to the padding
+// are allowed and ignored).
+//
+// We prefer a planar representation, where channels are stored as separate
+// 2D arrays, because that simplifies vectorization (repeating the same
+// operation on multiple adjacent components) without the complexity of a
+// hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients can easily iterate
+// over all components in a row and Image requires no knowledge of the pixel
+// format beyond the component type "T". The downside is that we duplicate the
+// xsize/ysize members for each channel.
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Image {
+  // Returns cache-aligned row stride, being careful to avoid 2K aliasing.
+  static size_t BytesPerRow(const size_t xsize) {
+    // Allow reading one extra AVX-2 vector on the right margin.
+    const size_t row_size = xsize * sizeof(T) + 32;
+    const size_t align = CacheAligned::kCacheLineSize;
+    size_t bytes_per_row = (row_size + align - 1) & ~(align - 1);
+    // During the lengthy window before writes are committed to memory, CPUs
+    // guard against read after write hazards by checking the address, but
+    // only the lower 11 bits. We avoid a false dependency between writes to
+    // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+    if (bytes_per_row % 2048 == 0) {
+      bytes_per_row += align;
+    }
+    return bytes_per_row;
+  }
+
+ public:
+  using T = ComponentType;
+
+  Image() : xsize_(0), ysize_(0), bytes_per_row_(0), bytes_(nullptr, Ignore) {}
+
+  Image(const size_t xsize, const size_t ysize)
+      : xsize_(xsize),
+        ysize_(ysize),
+        bytes_per_row_(BytesPerRow(xsize)),
+        bytes_(Allocate(bytes_per_row_ * ysize)) {}
+
+  Image(const size_t xsize, const size_t ysize, ConstRestrict<uint8_t *> bytes,
+        const size_t bytes_per_row)
+      : xsize_(xsize),
+        ysize_(ysize),
+        bytes_per_row_(bytes_per_row),
+        bytes_(bytes, Ignore) {}
+
+  // Move constructor (required for returning Image from function)
+  Image(Image &&other)
+      : xsize_(other.xsize_),
+        ysize_(other.ysize_),
+        bytes_per_row_(other.bytes_per_row_),
+        bytes_(std::move(other.bytes_)) {}
+
+  // Move assignment (required for std::vector)
+  Image &operator=(Image &&other) {
+    xsize_ = other.xsize_;
+    ysize_ = other.ysize_;
+    bytes_per_row_ = other.bytes_per_row_;
+    bytes_ = std::move(other.bytes_);
+    return *this;
+  }
+
+  void Swap(Image &other) {
+    std::swap(xsize_, other.xsize_);
+    std::swap(ysize_, other.ysize_);
+    std::swap(bytes_per_row_, other.bytes_per_row_);
+    std::swap(bytes_, other.bytes_);
+  }
+
+  // How many pixels.
+  size_t xsize() const { return xsize_; }
+  size_t ysize() const { return ysize_; }
+
+  ConstRestrict<T *> Row(const size_t y) BUTTERAUGLI_CACHE_ALIGNED_RETURN {
+#ifdef BUTTERAUGLI_ENABLE_CHECKS
+    if (y >= ysize_) {
+      printf("Row %zu out of bounds (ysize=%zu)\n", y, ysize_);
+      abort();
+    }
+#endif
+    return reinterpret_cast<T *>(bytes_.get() + y * bytes_per_row_);
+  }
+
+  ConstRestrict<const T *> Row(const size_t y) const
+      BUTTERAUGLI_CACHE_ALIGNED_RETURN {
+#ifdef BUTTERAUGLI_ENABLE_CHECKS
+    if (y >= ysize_) {
+      printf("Const row %zu out of bounds (ysize=%zu)\n", y, ysize_);
+      abort();
+    }
+#endif
+    return reinterpret_cast<const T *>(bytes_.get() + y * bytes_per_row_);
+  }
+
+  // Raw access to byte contents, for interfacing with other libraries.
+  // Unsigned char instead of char to avoid surprises (sign extension).
+  ConstRestrict<uint8_t *> bytes() { return bytes_.get(); }
+  ConstRestrict<const uint8_t *> bytes() const { return bytes_.get(); }
+  size_t bytes_per_row() const { return bytes_per_row_; }
+
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic.
+  intptr_t PixelsPerRow() const {
+    static_assert(CacheAligned::kCacheLineSize % sizeof(T) == 0,
+                  "Padding must be divisible by the pixel size.");
+    return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+  }
+
+ private:
+  // Deleter used when bytes are not owned.
+  static void Ignore(void *ptr) {}
+
+  // (Members are non-const to enable assignment during move-assignment.)
+  size_t xsize_;  // original intended pixels, not including any padding.
+  size_t ysize_;
+  size_t bytes_per_row_;  // [bytes] including padding.
+  CacheAlignedUniquePtr bytes_;
+};
+
+// Returns newly allocated planes of the given dimensions.
+template <typename T>
+static inline std::vector<Image<T>> CreatePlanes(const size_t xsize,
+                                                 const size_t ysize,
+                                                 const size_t num_planes) {
+  std::vector<Image<T>> planes;
+  planes.reserve(num_planes);
+  for (size_t i = 0; i < num_planes; ++i) {
+    planes.emplace_back(xsize, ysize);
+  }
+  return planes;
+}
+
+// Returns a new image with the same dimensions and pixel values.
+template <typename T>
+static inline Image<T> CopyPixels(const Image<T> &other) {
+  Image<T> copy(other.xsize(), other.ysize());
+  const void *BUTTERAUGLI_RESTRICT from = other.bytes();
+  void *BUTTERAUGLI_RESTRICT to = copy.bytes();
+  memcpy(to, from, other.ysize() * other.bytes_per_row());
+  return copy;
+}
+
+// Returns new planes with the same dimensions and pixel values.
+template <typename T>
+static inline std::vector<Image<T>> CopyPlanes(
+    const std::vector<Image<T>> &planes) {
+  std::vector<Image<T>> copy;
+  copy.reserve(planes.size());
+  for (const Image<T> &plane : planes) {
+    copy.push_back(CopyPixels(plane));
+  }
+  return copy;
+}
+
+// Compacts a padded image into a preallocated packed vector.
+template <typename T>
+static inline void CopyToPacked(const Image<T> &from, std::vector<T> *to) {
+  const size_t xsize = from.xsize();
+  const size_t ysize = from.ysize();
+#if BUTTERAUGLI_ENABLE_CHECKS
+  if (to->size() < xsize * ysize) {
+    printf("%zu x %zu exceeds %zu capacity\n", xsize, ysize, to->size());
+    abort();
+  }
+#endif
+  for (size_t y = 0; y < ysize; ++y) {
+    ConstRestrict<const float*> row_from = from.Row(y);
+    ConstRestrict<float*> row_to = to->data() + y * xsize;
+    memcpy(row_to, row_from, xsize * sizeof(T));
+  }
+}
+
+// Expands a packed vector into a preallocated padded image.
+template <typename T>
+static inline void CopyFromPacked(const std::vector<T> &from, Image<T> *to) {
+  const size_t xsize = to->xsize();
+  const size_t ysize = to->ysize();
+  assert(from.size() == xsize * ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    ConstRestrict<const float*> row_from = from.data() + y * xsize;
+    ConstRestrict<float*> row_to = to->Row(y);
+    memcpy(row_to, row_from, xsize * sizeof(T));
+  }
+}
+
+template <typename T>
+static inline std::vector<Image<T>> PlanesFromPacked(
+    const size_t xsize, const size_t ysize,
+    const std::vector<std::vector<T>> &packed) {
+  std::vector<Image<T>> planes;
+  planes.reserve(packed.size());
+  for (const std::vector<T> &p : packed) {
+    planes.push_back(Image<T>(xsize, ysize));
+    CopyFromPacked(p, &planes.back());
+  }
+  return planes;
+}
+
+template <typename T>
+static inline std::vector<std::vector<T>> PackedFromPlanes(
+    const std::vector<Image<T>> &planes) {
+  assert(!planes.empty());
+  const size_t num_pixels = planes[0].xsize() * planes[0].ysize();
+  std::vector<std::vector<T>> packed;
+  packed.reserve(planes.size());
+  for (const Image<T> &image : planes) {
+    packed.push_back(std::vector<T>(num_pixels));
+    CopyToPacked(image, &packed.back());
+  }
+  return packed;
+}
+
+class ButteraugliComparator {
+ public:
+  ButteraugliComparator(size_t xsize, size_t ysize, int step);
+
+  // Computes the butteraugli map between rgb0 and rgb1 and updates result.
+  void Diffmap(const std::vector<ImageF> &rgb0,
+               const std::vector<ImageF> &rgb1,
+               ImageF &result);
+
+  // Same as above, but OpsinDynamicsImage() was already applied to
+  // rgb0 and rgb1.
+  void DiffmapOpsinDynamicsImage(const std::vector<ImageF> &rgb0,
+                                 const std::vector<ImageF> &rgb1,
+                                 ImageF &result);
+
+ private:
+  void BlockDiffMap(const std::vector<std::vector<float> > &rgb0,
+                    const std::vector<std::vector<float> > &rgb1,
+                    std::vector<float>* block_diff_dc,
+                    std::vector<float>* block_diff_ac);
+
+
+  void EdgeDetectorMap(const std::vector<std::vector<float> > &rgb0,
+                       const std::vector<std::vector<float> > &rgb1,
+                       std::vector<float>* edge_detector_map);
+
+  void EdgeDetectorLowFreq(const std::vector<std::vector<float> > &rgb0,
+                           const std::vector<std::vector<float> > &rgb1,
+                           std::vector<float>* block_diff_ac);
+
+  void CombineChannels(const std::vector<std::vector<float> >& scale_xyb,
+                       const std::vector<std::vector<float> >& scale_xyb_dc,
+                       const std::vector<float>& block_diff_dc,
+                       const std::vector<float>& block_diff_ac,
+                       const std::vector<float>& edge_detector_map,
+                       std::vector<float>* result);
+
+  const size_t xsize_;
+  const size_t ysize_;
+  const size_t num_pixels_;
+  const int step_;
+  const size_t res_xsize_;
+  const size_t res_ysize_;
+};
+
+void ButteraugliDiffmap(const std::vector<ImageF> &rgb0,
+                        const std::vector<ImageF> &rgb1,
+                        ImageF &diffmap);
+
+double ButteraugliScoreFromDiffmap(const ImageF& distmap);
+
+// Compute values of local frequency and dc masking based on the activity
+// in the two images.
+void Mask(const std::vector<std::vector<float> > &rgb0,
+          const std::vector<std::vector<float> > &rgb1,
+          size_t xsize, size_t ysize,
+          std::vector<std::vector<float> > *mask,
+          std::vector<std::vector<float> > *mask_dc);
+
+// Computes difference metrics for one 8x8 block.
+void ButteraugliBlockDiff(double rgb0[192],
+                          double rgb1[192],
+                          double diff_xyb_dc[3],
+                          double diff_xyb_ac[3],
+                          double diff_xyb_edge_dc[3]);
+
+void OpsinAbsorbance(const double in[3], double out[3]);
+
+void OpsinDynamicsImage(size_t xsize, size_t ysize,
+                        std::vector<std::vector<float> > &rgb);
+
+void MaskHighIntensityChange(
+    size_t xsize, size_t ysize,
+    const std::vector<std::vector<float> > &c0,
+    const std::vector<std::vector<float> > &c1,
+    std::vector<std::vector<float> > &rgb0,
+    std::vector<std::vector<float> > &rgb1);
+
+void Blur(size_t xsize, size_t ysize, float* channel, double sigma,
+          double border_ratio = 0.0);
+
+void RgbToXyb(double r, double g, double b,
+              double *valx, double *valy, double *valz);
+
+double SimpleGamma(double v);
+
+double GammaMinArg();
+double GammaMaxArg();
+
+// Polynomial evaluation via Clenshaw's scheme (similar to Horner's).
+// Template enables compile-time unrolling of the recursion, but must reside
+// outside of a class due to the specialization.
+template <int INDEX>
+static inline void ClenshawRecursion(const double x, const double *coefficients,
+                                     double *b1, double *b2) {
+  const double x_b1 = x * (*b1);
+  const double t = (x_b1 + x_b1) - (*b2) + coefficients[INDEX];
+  *b2 = *b1;
+  *b1 = t;
+
+  ClenshawRecursion<INDEX - 1>(x, coefficients, b1, b2);
+}
+
+// Base case
+template <>
+inline void ClenshawRecursion<0>(const double x, const double *coefficients,
+                                 double *b1, double *b2) {
+  const double x_b1 = x * (*b1);
+  // The final iteration differs - no 2 * x_b1 here.
+  *b1 = x_b1 - (*b2) + coefficients[0];
+}
+
+// Rational polynomial := dividing two polynomial evaluations. These are easier
+// to find than minimax polynomials.
+struct RationalPolynomial {
+  template <int N>
+  static double EvaluatePolynomial(const double x,
+                                   const double (&coefficients)[N]) {
+    double b1 = 0.0;
+    double b2 = 0.0;
+    ClenshawRecursion<N - 1>(x, coefficients, &b1, &b2);
+    return b1;
+  }
+
+  // Evaluates the polynomial at x (in [min_value, max_value]).
+  inline double operator()(const float x) const {
+    // First normalize to [0, 1].
+    const double x01 = (x - min_value) / (max_value - min_value);
+    // And then to [-1, 1] domain of Chebyshev polynomials.
+    const double xc = 2.0 * x01 - 1.0;
+
+    const double yp = EvaluatePolynomial(xc, p);
+    const double yq = EvaluatePolynomial(xc, q);
+    if (yq == 0.0) return 0.0;
+    return static_cast<float>(yp / yq);
+  }
+
+  // Domain of the polynomials; they are undefined elsewhere.
+  double min_value;
+  double max_value;
+
+  // Coefficients of T_n (Chebyshev polynomials of the first kind).
+  // Degree 5/5 is a compromise between accuracy (0.1%) and numerical stability.
+  double p[5 + 1];
+  double q[5 + 1];
+};
+
+static inline float GammaPolynomial(float value) {
+  // Generated by gamma_polynomial.m from equispaced x/gamma(x) samples.
+  static const RationalPolynomial r = {
+  0.770000000000000, 274.579999999999984,
+  {
+    881.979476556478289, 1496.058452015812463, 908.662212739659481,
+    373.566100223287378, 85.840860336314364, 6.683258861509244,
+  },
+  {
+    12.262350348616792, 20.557285797683576, 12.161463238367844,
+    4.711532733641639, 0.899112889751053, 0.035662329617191,
+  }};
+  return r(value);
+}
+
+}  // namespace butteraugli
+
+#endif  // BUTTERAUGLI_BUTTERAUGLI_H_
diff --git a/extern/etcpack/CMakeLists.txt b/extern/etcpack/CMakeLists.txt
new file mode 100644
index 0000000..2690373
--- /dev/null
+++ b/extern/etcpack/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+SET(ETCPACK_SRCS
+    etcdec.cxx
+    etcpack.cxx
+    image.cxx
+    image.h)
+
+add_definitions("-DPGMOUT=0")
+ADD_LIBRARY(etcpack STATIC ${ETCPACK_SRCS})
+
diff --git a/extern/etcpack/etcdec.cxx b/extern/etcpack/etcdec.cxx
new file mode 100755
index 0000000..4d9cfd6
--- /dev/null
+++ b/extern/etcpack/etcdec.cxx
@@ -0,0 +1,1842 @@
+/**
+ 
+@~English
+@page licensing Licensing
+ 
+@section etcdec etcdec.cxx License
+ 
+etcdec.cxx is made available under the terms and conditions of the following
+License Agreement.
+
+Software License Agreement
+
+PLEASE REVIEW THE FOLLOWING TERMS AND CONDITIONS PRIOR TO USING THE
+ERICSSON TEXTURE COMPRESSION CODEC SOFTWARE (THE "SOFTWARE"). THE USE
+OF THE SOFTWARE IS SUBJECT TO THE TERMS AND CONDITIONS OF THE
+FOLLOWING SOFTWARE LICENSE AGREEMENT (THE "SLA"). IF YOU DO NOT ACCEPT
+SUCH TERMS AND CONDITIONS YOU MAY NOT USE THE SOFTWARE.
+
+Subject to the terms and conditions of the SLA, the licensee of the
+Software (the "Licensee") hereby, receives a non-exclusive,
+non-transferable, limited, free-of-charge, perpetual and worldwide
+license, to copy, use, distribute and modify the Software, but only
+for the purpose of developing, manufacturing, selling, using and
+distributing products including the Software in binary form, which
+products are used for compression and/or decompression according to
+the Khronos standard specifications OpenGL, OpenGL ES and
+WebGL. Notwithstanding anything of the above, Licensee may distribute
+[etcdec.cxx] in source code form provided (i) it is in unmodified
+form; and (ii) it is included in software owned by Licensee.
+
+If Licensee institutes, or threatens to institute, patent litigation
+against Ericsson or Ericsson's affiliates for using the Software for
+developing, having developed, manufacturing, having manufactured,
+selling, offer for sale, importing, using, leasing, operating,
+repairing and/or distributing products (i) within the scope of the
+Khronos framework; or (ii) using software or other intellectual
+property rights owned by Ericsson or its affiliates and provided under
+the Khronos framework, Ericsson shall have the right to terminate this
+SLA with immediate effect. Moreover, if Licensee institutes, or
+threatens to institute, patent litigation against any other licensee
+of the Software for using the Software in products within the scope of
+the Khronos framework, Ericsson shall have the right to terminate this
+SLA with immediate effect. However, should Licensee institute, or
+threaten to institute, patent litigation against any other licensee of
+the Software based on such other licensee's use of any other software
+together with the Software, then Ericsson shall have no right to
+terminate this SLA.
+
+This SLA does not transfer to Licensee any ownership to any Ericsson
+or third party intellectual property rights. All rights not expressly
+granted by Ericsson under this SLA are hereby expressly
+reserved. Furthermore, nothing in this SLA shall be construed as a
+right to use or sell products in a manner which conveys or purports to
+convey whether explicitly, by principles of implied license, or
+otherwise, any rights to any third party, under any patent of Ericsson
+or of Ericsson's affiliates covering or relating to any combination of
+the Software with any other software or product (not licensed
+hereunder) where the right applies specifically to the combination and
+not to the software or product itself.
+
+THE SOFTWARE IS PROVIDED "AS IS". ERICSSON MAKES NO REPRESENTATIONS OF
+ANY KIND, EXTENDS NO WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
+EXPRESS, IMPLIED OR STATUTORY; INCLUDING, BUT NOT LIMITED TO, EXPRESS,
+IMPLIED OR STATUTORY WARRANTIES OR CONDITIONS OF TITLE,
+MERCHANTABILITY, SATISFACTORY QUALITY, SUITABILITY, AND FITNESS FOR A
+PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+OF THE SOFTWARE IS WITH THE LICENSEE. SHOULD THE SOFTWARE PROVE
+DEFECTIVE, THE LICENSEE ASSUMES THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION. ERICSSON MAKES NO WARRANTY THAT THE MANUFACTURE,
+SALE, OFFERING FOR SALE, DISTRIBUTION, LEASE, USE OR IMPORTATION UNDER
+THE SLA WILL BE FREE FROM INFRINGEMENT OF PATENTS, COPYRIGHTS OR OTHER
+INTELLECTUAL PROPERTY RIGHTS OF OTHERS, AND THE VALIDITY OF THE
+LICENSE AND THE SLA ARE SUBJECT TO LICENSEE'S SOLE RESPONSIBILITY TO
+MAKE SUCH DETERMINATION AND ACQUIRE SUCH LICENSES AS MAY BE NECESSARY
+WITH RESPECT TO PATENTS, COPYRIGHT AND OTHER INTELLECTUAL PROPERTY OF
+THIRD PARTIES.
+
+THE LICENSEE ACKNOWLEDGES AND ACCEPTS THAT THE SOFTWARE (I) IS NOT
+LICENSED FOR; (II) IS NOT DESIGNED FOR OR INTENDED FOR; AND (III) MAY
+NOT BE USED FOR; ANY MISSION CRITICAL APPLICATIONS SUCH AS, BUT NOT
+LIMITED TO OPERATION OF NUCLEAR OR HEALTHCARE COMPUTER SYSTEMS AND/OR
+NETWORKS, AIRCRAFT OR TRAIN CONTROL AND/OR COMMUNICATION SYSTEMS OR
+ANY OTHER COMPUTER SYSTEMS AND/OR NETWORKS OR CONTROL AND/OR
+COMMUNICATION SYSTEMS ALL IN WHICH CASE THE FAILURE OF THE SOFTWARE
+COULD LEAD TO DEATH, PERSONAL INJURY, OR SEVERE PHYSICAL, MATERIAL OR
+ENVIRONMENTAL DAMAGE. LICENSEE'S RIGHTS UNDER THIS LICENSE WILL
+TERMINATE AUTOMATICALLY AND IMMEDIATELY WITHOUT NOTICE IF LICENSEE
+FAILS TO COMPLY WITH THIS PARAGRAPH.
+
+IN NO EVENT SHALL ERICSSON BE LIABLE FOR ANY DAMAGES WHATSOEVER,
+INCLUDING BUT NOT LIMITED TO PERSONAL INJURY, ANY GENERAL, SPECIAL,
+INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR INABILITY TO USE THE SOFTWARE (INCLUDING
+BUT NOT LIMITED TO LOSS OF PROFITS, BUSINESS INTERUPTIONS, OR ANY
+OTHER COMMERCIAL DAMAGES OR LOSSES, LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY THE LICENSEE OR THIRD
+PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER
+SOFTWARE) REGARDLESS OF THE THEORY OF LIABILITY (CONTRACT, TORT, OR
+OTHERWISE), EVEN IF THE LICENSEE OR ANY OTHER PARTY HAS BEEN ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGES.
+
+Licensee acknowledges that "ERICSSON ///" is the corporate trademark
+of Telefonaktiebolaget LM Ericsson and that both "Ericsson" and the
+figure "///" are important features of the trade names of
+Telefonaktiebolaget LM Ericsson. Nothing contained in these terms and
+conditions shall be deemed to grant Licensee any right, title or
+interest in the word "Ericsson" or the figure "///". No delay or
+omission by Ericsson to exercise any right or power shall impair any
+such right or power to be construed to be a waiver thereof. Consent by
+Ericsson to, or waiver of, a breach by the Licensee shall not
+constitute consent to, waiver of, or excuse for any other different or
+subsequent breach.
+
+This SLA shall be governed by the substantive law of Sweden. Any
+dispute, controversy or claim arising out of or in connection with
+this SLA, or the breach, termination or invalidity thereof, shall be
+submitted to the exclusive jurisdiction of the Swedish Courts.
+
+*/
+
+//// etcpack v2.74
+//// 
+//// NO WARRANTY 
+//// 
+//// BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE THE PROGRAM IS PROVIDED
+//// "AS IS". ERICSSON MAKES NO REPRESENTATIONS OF ANY KIND, EXTENDS NO
+//// WARRANTIES OR CONDITIONS OF ANY KIND; EITHER EXPRESS, IMPLIED OR
+//// STATUTORY; INCLUDING, BUT NOT LIMITED TO, EXPRESS, IMPLIED OR
+//// STATUTORY WARRANTIES OR CONDITIONS OF TITLE, MERCHANTABILITY,
+//// SATISFACTORY QUALITY, SUITABILITY AND FITNESS FOR A PARTICULAR
+//// PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+//// PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+//// THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. ERICSSON
+//// MAKES NO WARRANTY THAT THE MANUFACTURE, SALE, OFFERING FOR SALE,
+//// DISTRIBUTION, LEASE, USE OR IMPORTATION UNDER THE LICENSE WILL BE FREE
+//// FROM INFRINGEMENT OF PATENTS, COPYRIGHTS OR OTHER INTELLECTUAL
+//// PROPERTY RIGHTS OF OTHERS, AND THE VALIDITY OF THE LICENSE IS SUBJECT
+//// TO YOUR SOLE RESPONSIBILITY TO MAKE SUCH DETERMINATION AND ACQUIRE
+//// SUCH LICENSES AS MAY BE NECESSARY WITH RESPECT TO PATENTS, COPYRIGHT
+//// AND OTHER INTELLECTUAL PROPERTY OF THIRD PARTIES.
+//// 
+//// FOR THE AVOIDANCE OF DOUBT THE PROGRAM (I) IS NOT LICENSED FOR; (II)
+//// IS NOT DESIGNED FOR OR INTENDED FOR; AND (III) MAY NOT BE USED FOR;
+//// ANY MISSION CRITICAL APPLICATIONS SUCH AS, BUT NOT LIMITED TO
+//// OPERATION OF NUCLEAR OR HEALTHCARE COMPUTER SYSTEMS AND/OR NETWORKS,
+//// AIRCRAFT OR TRAIN CONTROL AND/OR COMMUNICATION SYSTEMS OR ANY OTHER
+//// COMPUTER SYSTEMS AND/OR NETWORKS OR CONTROL AND/OR COMMUNICATION
+//// SYSTEMS ALL IN WHICH CASE THE FAILURE OF THE PROGRAM COULD LEAD TO
+//// DEATH, PERSONAL INJURY, OR SEVERE PHYSICAL, MATERIAL OR ENVIRONMENTAL
+//// DAMAGE. YOUR RIGHTS UNDER THIS LICENSE WILL TERMINATE AUTOMATICALLY
+//// AND IMMEDIATELY WITHOUT NOTICE IF YOU FAIL TO COMPLY WITH THIS
+//// PARAGRAPH.
+//// 
+//// IN NO EVENT WILL ERICSSON, BE LIABLE FOR ANY DAMAGES WHATSOEVER,
+//// INCLUDING BUT NOT LIMITED TO PERSONAL INJURY, ANY GENERAL, SPECIAL,
+//// INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR IN
+//// CONNECTION WITH THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
+//// NOT LIMITED TO LOSS OF PROFITS, BUSINESS INTERUPTIONS, OR ANY OTHER
+//// COMMERCIAL DAMAGES OR LOSSES, LOSS OF DATA OR DATA BEING RENDERED
+//// INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+//// THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) REGARDLESS OF THE
+//// THEORY OF LIABILITY (CONTRACT, TORT OR OTHERWISE), EVEN IF SUCH HOLDER
+//// OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+//// 
+//// (C) Ericsson AB 2005-2013. All Rights Reserved.
+//// 
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// Typedefs
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+typedef short int16;
+
+// Macros to help with bit extraction/insertion
+#define SHIFT(size,startpos) ((startpos)-(size)+1)
+#define MASK(size, startpos) (((2<<(size-1))-1) << SHIFT(size,startpos))
+#define PUTBITS( dest, data, size, startpos) dest = ((dest & ~MASK(size, startpos)) | ((data << SHIFT(size, startpos)) & MASK(size,startpos)))
+#define SHIFTHIGH(size, startpos) (((startpos)-32)-(size)+1)
+#define MASKHIGH(size, startpos) (((1<<(size))-1) << SHIFTHIGH(size,startpos))
+#define PUTBITSHIGH(dest, data, size, startpos) dest = ((dest & ~MASKHIGH(size, startpos)) | ((data << SHIFTHIGH(size, startpos)) & MASKHIGH(size,startpos)))
+#define GETBITS(source, size, startpos)  (( (source) >> ((startpos)-(size)+1) ) & ((1<<(size)) -1))
+#define GETBITSHIGH(source, size, startpos)  (( (source) >> (((startpos)-32)-(size)+1) ) & ((1<<(size)) -1))
+#ifndef PGMOUT
+#define PGMOUT 1
+#endif
+// Thumb macros and definitions
+#define	R_BITS59T 4
+#define G_BITS59T 4
+#define	B_BITS59T 4
+#define	R_BITS58H 4
+#define G_BITS58H 4
+#define	B_BITS58H 4
+#define	MAXIMUM_ERROR (255*255*16*1000)
+#define R 0
+#define G 1
+#define B 2
+#define BLOCKHEIGHT 4
+#define BLOCKWIDTH 4
+#define BINPOW(power) (1<<(power))
+#define	TABLE_BITS_59T 3
+#define	TABLE_BITS_58H 3
+
+// Helper Macros
+#define CLAMP(ll,x,ul) (((x)<(ll)) ? (ll) : (((x)>(ul)) ? (ul) : (x)))
+#define JAS_ROUND(x) (((x) < 0.0 ) ? ((int)((x)-0.5)) : ((int)((x)+0.5)))
+
+#define RED_CHANNEL(img,width,x,y,channels)   img[channels*(y*width+x)+0]
+#define GREEN_CHANNEL(img,width,x,y,channels) img[channels*(y*width+x)+1]
+#define BLUE_CHANNEL(img,width,x,y,channels)  img[channels*(y*width+x)+2]
+#define ALPHA_CHANNEL(img,width,x,y,channels)  img[channels*(y*width+x)+3]
+
+
+// Global tables
+static uint8 table59T[8] = {3,6,11,16,23,32,41,64};  // 3-bit table for the 59 bit T-mode
+static uint8 table58H[8] = {3,6,11,16,23,32,41,64};  // 3-bit table for the 58 bit H-mode
+static int compressParams[16][4] = {{-8, -2,  2, 8}, {-8, -2,  2, 8}, {-17, -5, 5, 17}, {-17, -5, 5, 17}, {-29, -9, 9, 29}, {-29, -9, 9, 29}, {-42, -13, 13, 42}, {-42, -13, 13, 42}, {-60, -18, 18, 60}, {-60, -18, 18, 60}, {-80, -24, 24, 80}, {-80, -24, 24, 80}, {-106, -33, 33, 106}, {-106, -33, 33, 106}, {-183, -47, 47, 183}, {-183, -47, 47, 183}};
+static int unscramble[4] = {2, 3, 1, 0};
+int alphaTableInitialized = 0;
+int alphaTable[256][8];
+int alphaBase[16][4] = {	
+              {-15,-9,-6,-3},
+							{-13,-10,-7,-3},
+							{-13,-8,-5,-2},
+							{-13,-6,-4,-2},
+							{-12,-8,-6,-3},
+							{-11,-9,-7,-3},
+							{-11,-8,-7,-4},
+							{-11,-8,-5,-3},
+							{ -10,-8,-6,-2},
+							{ -10,-8,-5,-2},
+							{ -10,-8,-4,-2},
+							{ -10,-7,-5,-2},
+							{ -10,-7,-4,-3},
+							{ -10,-3,-2, -1},
+							{ -9,-8,-6,-4},
+							{ -9,-7,-5,-3}
+											};
+
+// Global variables
+int formatSigned = 0;
+
+// Enums
+ enum{PATTERN_H = 0, 
+      PATTERN_T = 1};
+
+
+// Code used to create the valtab
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void setupAlphaTable() 
+{
+  if(alphaTableInitialized)
+    return;
+  alphaTableInitialized = 1;
+
+	//read table used for alpha compression
+	int buf;
+	for(int i = 16; i<32; i++) 
+	{
+		for(int j=0; j<8; j++) 
+		{
+			buf=alphaBase[i-16][3-j%4];
+			if(j<4)
+				alphaTable[i][j]=buf;
+			else
+				alphaTable[i][j]=(-buf-1);
+		}
+	}
+	
+	//beyond the first 16 values, the rest of the table is implicit.. so calculate that!
+	for(int i=0; i<256; i++) 
+	{
+		//fill remaining slots in table with multiples of the first ones.
+		int mul = i/16;
+		int old = 16+i%16;
+		for(int j = 0; j<8; j++) 
+		{
+			alphaTable[i][j]=alphaTable[old][j]*mul;
+			//note: we don't do clamping here, though we could, because we'll be clamped afterwards anyway.
+		}
+	}
+}
+
+// Read a word in big endian style
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void read_big_endian_2byte_word(unsigned short *blockadr, FILE *f)
+{
+	uint8 bytes[2];
+	unsigned short block;
+
+	fread(&bytes[0], 1, 1, f);
+	fread(&bytes[1], 1, 1, f);
+
+	block = 0;
+	block |= bytes[0];
+	block = block << 8;
+	block |= bytes[1];
+
+	blockadr[0] = block;
+}
+
+// Read a word in big endian style
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void read_big_endian_4byte_word(unsigned int *blockadr, FILE *f)
+{
+	uint8 bytes[4];
+	unsigned int block;
+
+	fread(&bytes[0], 1, 1, f);
+	fread(&bytes[1], 1, 1, f);
+	fread(&bytes[2], 1, 1, f);
+	fread(&bytes[3], 1, 1, f);
+
+	block = 0;
+	block |= bytes[0];
+	block = block << 8;
+	block |= bytes[1];
+	block = block << 8;
+	block |= bytes[2];
+	block = block << 8;
+	block |= bytes[3];
+
+	blockadr[0] = block;
+}
+
+// The format stores the bits for the three extra modes in a roundabout way to be able to
+// fit them without increasing the bit rate. This function converts them into something
+// that is easier to work with. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void unstuff57bits(unsigned int planar_word1, unsigned int planar_word2, unsigned int &planar57_word1, unsigned int &planar57_word2)
+{
+	// Get bits from twotimer configuration for 57 bits
+	// 
+	// Go to this bit layout:
+	//
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     |R0               |G01G02              |B01B02  ;B03     |RH1           |RH2|GH                 |
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+	//      -----------------------------------------------------------------------------------------------
+	//     |BH               |RV               |GV                  |BV                | not used          |   
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//  From this:
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      ------------------------------------------------------------------------------------------------
+	//     |//|R0               |G01|/|G02              |B01|/ // //|B02  |//|B03     |RH1           |df|RH2|
+	//      ------------------------------------------------------------------------------------------------
+	//
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+	//      -----------------------------------------------------------------------------------------------
+	//     |GH                  |BH               |RV               |GV                   |BV              |
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+
+	uint8 RO, GO1, GO2, BO1, BO2, BO3, RH1, RH2, GH, BH, RV, GV, BV;
+
+	RO  = GETBITSHIGH( planar_word1, 6, 62);
+	GO1 = GETBITSHIGH( planar_word1, 1, 56);
+	GO2 = GETBITSHIGH( planar_word1, 6, 54);
+	BO1 = GETBITSHIGH( planar_word1, 1, 48);
+	BO2 = GETBITSHIGH( planar_word1, 2, 44);
+	BO3 = GETBITSHIGH( planar_word1, 3, 41);
+	RH1 = GETBITSHIGH( planar_word1, 5, 38);
+	RH2 = GETBITSHIGH( planar_word1, 1, 32);
+	GH  = GETBITS(     planar_word2, 7, 31);
+	BH  = GETBITS(     planar_word2, 6, 24);
+	RV  = GETBITS(     planar_word2, 6, 18);
+	GV  = GETBITS(     planar_word2, 7, 12);
+	BV  = GETBITS(     planar_word2, 6,  5);
+
+	planar57_word1 = 0; planar57_word2 = 0;
+	PUTBITSHIGH( planar57_word1, RO,  6, 63);
+	PUTBITSHIGH( planar57_word1, GO1, 1, 57);
+	PUTBITSHIGH( planar57_word1, GO2, 6, 56);
+	PUTBITSHIGH( planar57_word1, BO1, 1, 50);
+	PUTBITSHIGH( planar57_word1, BO2, 2, 49);
+	PUTBITSHIGH( planar57_word1, BO3, 3, 47);
+	PUTBITSHIGH( planar57_word1, RH1, 5, 44);
+	PUTBITSHIGH( planar57_word1, RH2, 1, 39);
+	PUTBITSHIGH( planar57_word1, GH, 7, 38);
+	PUTBITS(     planar57_word2, BH, 6, 31);
+	PUTBITS(     planar57_word2, RV, 6, 25);
+	PUTBITS(     planar57_word2, GV, 7, 19);
+	PUTBITS(     planar57_word2, BV, 6, 12);
+}
+
+// The format stores the bits for the three extra modes in a roundabout way to be able to
+// fit them without increasing the bit rate. This function converts them into something
+// that is easier to work with. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void unstuff58bits(unsigned int thumbH_word1, unsigned int thumbH_word2, unsigned int &thumbH58_word1, unsigned int &thumbH58_word2)
+{
+	// Go to this layout:
+	//
+	//     |63 62 61 60 59 58|57 56 55 54 53 52 51|50 49|48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33|32   |
+	//     |-------empty-----|part0---------------|part1|part2------------------------------------------|part3|
+	//
+	//  from this:
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      --------------------------------------------------------------------------------------------------|
+	//     |//|part0               |// // //|part1|//|part2                                          |df|part3|
+	//      --------------------------------------------------------------------------------------------------|
+
+	unsigned int part0, part1, part2, part3;
+
+	// move parts
+	part0 = GETBITSHIGH( thumbH_word1, 7, 62);
+	part1 = GETBITSHIGH( thumbH_word1, 2, 52);
+	part2 = GETBITSHIGH( thumbH_word1,16, 49);
+	part3 = GETBITSHIGH( thumbH_word1, 1, 32);
+	thumbH58_word1 = 0;
+	PUTBITSHIGH( thumbH58_word1, part0,  7, 57);
+	PUTBITSHIGH( thumbH58_word1, part1,  2, 50);
+	PUTBITSHIGH( thumbH58_word1, part2, 16, 48);
+	PUTBITSHIGH( thumbH58_word1, part3,  1, 32);
+
+	thumbH58_word2 = thumbH_word2;
+}
+
+// The format stores the bits for the three extra modes in a roundabout way to be able to
+// fit them without increasing the bit rate. This function converts them into something
+// that is easier to work with. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void unstuff59bits(unsigned int thumbT_word1, unsigned int thumbT_word2, unsigned int &thumbT59_word1, unsigned int &thumbT59_word2)
+{
+	// Get bits from twotimer configuration 59 bits. 
+	// 
+	// Go to this bit layout:
+	//
+	//     |63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+	//     |----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+	//
+	//     |31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+	//     |----------------------------------------index bits---------------------------------------------|
+	//
+	//
+	//  From this:
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     |// // //|R0a  |//|R0b  |G0         |B0         |R1         |G1         |B1          |da  |df|db|
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//     |31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+	//     |----------------------------------------index bits---------------------------------------------|
+	//
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |df|fp|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bt|bt|
+	//      ------------------------------------------------------------------------------------------------
+
+	uint8 R0a;
+
+	// Fix middle part
+	thumbT59_word1 = thumbT_word1 >> 1;
+	// Fix db (lowest bit of d)
+	PUTBITSHIGH( thumbT59_word1, thumbT_word1,  1, 32);
+	// Fix R0a (top two bits of R0)
+	R0a = GETBITSHIGH( thumbT_word1, 2, 60);
+	PUTBITSHIGH( thumbT59_word1, R0a,  2, 58);
+
+	// Zero top part (not needed)
+	PUTBITSHIGH( thumbT59_word1, 0,  5, 63);
+
+	thumbT59_word2 = thumbT_word2;
+}
+
+// The color bits are expanded to the full color
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressColor(int R_B, int G_B, int B_B, uint8 (colors_RGB444)[2][3], uint8 (colors)[2][3]) 
+{
+	// The color should be retrieved as:
+	//
+	// c = round(255/(r_bits^2-1))*comp_color
+	//
+	// This is similar to bit replication
+	// 
+	// Note -- this code only work for bit replication from 4 bits and up --- 3 bits needs
+	// two copy operations.
+
+ 	colors[0][R] = (colors_RGB444[0][R] << (8 - R_B)) | (colors_RGB444[0][R] >> (R_B - (8-R_B)) );
+ 	colors[0][G] = (colors_RGB444[0][G] << (8 - G_B)) | (colors_RGB444[0][G] >> (G_B - (8-G_B)) );
+ 	colors[0][B] = (colors_RGB444[0][B] << (8 - B_B)) | (colors_RGB444[0][B] >> (B_B - (8-B_B)) );
+ 	colors[1][R] = (colors_RGB444[1][R] << (8 - R_B)) | (colors_RGB444[1][R] >> (R_B - (8-R_B)) );
+ 	colors[1][G] = (colors_RGB444[1][G] << (8 - G_B)) | (colors_RGB444[1][G] >> (G_B - (8-G_B)) );
+ 	colors[1][B] = (colors_RGB444[1][B] << (8 - B_B)) | (colors_RGB444[1][B] >> (B_B - (8-B_B)) );
+}
+
+void calculatePaintColors59T(uint8 d, uint8 p, uint8 (colors)[2][3], uint8 (possible_colors)[4][3]) 
+{
+	//////////////////////////////////////////////
+	//
+	//		C3      C1		C4----C1---C2
+	//		|		|			  |
+	//		|		|			  |
+	//		|-------|			  |
+	//		|		|			  |
+	//		|		|			  |
+	//		C4      C2			  C3
+	//
+	//////////////////////////////////////////////
+
+	// C4
+	possible_colors[3][R] = CLAMP(0,colors[1][R] - table59T[d],255);
+	possible_colors[3][G] = CLAMP(0,colors[1][G] - table59T[d],255);
+	possible_colors[3][B] = CLAMP(0,colors[1][B] - table59T[d],255);
+	
+	if (p == PATTERN_T) 
+	{
+		// C3
+		possible_colors[0][R] = colors[0][R];
+		possible_colors[0][G] = colors[0][G];
+		possible_colors[0][B] = colors[0][B];
+		// C2
+		possible_colors[1][R] = CLAMP(0,colors[1][R] + table59T[d],255);
+		possible_colors[1][G] = CLAMP(0,colors[1][G] + table59T[d],255);
+		possible_colors[1][B] = CLAMP(0,colors[1][B] + table59T[d],255);
+		// C1
+		possible_colors[2][R] = colors[1][R];
+		possible_colors[2][G] = colors[1][G];
+		possible_colors[2][B] = colors[1][B];
+
+	} 
+	else 
+	{
+		printf("Invalid pattern. Terminating");
+		exit(1);
+	}
+}
+// Decompress a T-mode block (simple packing)
+// Simple 59T packing:
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockTHUMB59Tc(unsigned int block_part1, unsigned int block_part2, uint8 *img,int width,int height,int startx,int starty, int channels)
+{
+	uint8 colorsRGB444[2][3];
+	uint8 colors[2][3];
+	uint8 paint_colors[4][3];
+	uint8 distance;
+	uint8 block_mask[4][4];
+
+	// First decode left part of block.
+	colorsRGB444[0][R]= GETBITSHIGH(block_part1, 4, 58);
+	colorsRGB444[0][G]= GETBITSHIGH(block_part1, 4, 54);
+	colorsRGB444[0][B]= GETBITSHIGH(block_part1, 4, 50);
+
+	colorsRGB444[1][R]= GETBITSHIGH(block_part1, 4, 46);
+	colorsRGB444[1][G]= GETBITSHIGH(block_part1, 4, 42);
+	colorsRGB444[1][B]= GETBITSHIGH(block_part1, 4, 38);
+
+	distance   = GETBITSHIGH(block_part1, TABLE_BITS_59T, 34);
+
+	// Extend the two colors to RGB888	
+	decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);	
+	calculatePaintColors59T(distance, PATTERN_T, colors, paint_colors);
+	
+	// Choose one of the four paint colors for each texel
+	for (uint8 x = 0; x < BLOCKWIDTH; ++x) 
+	{
+		for (uint8 y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			//block_mask[x][y] = GETBITS(block_part2,2,31-(y*4+x)*2);
+			block_mask[x][y] = GETBITS(block_part2,1,(y+x*4)+16)<<1;
+			block_mask[x][y] |= GETBITS(block_part2,1,(y+x*4));
+			img[channels*((starty+y)*width+startx+x)+R] =
+				CLAMP(0,paint_colors[block_mask[x][y]][R],255); // RED
+			img[channels*((starty+y)*width+startx+x)+G] =
+				CLAMP(0,paint_colors[block_mask[x][y]][G],255); // GREEN
+			img[channels*((starty+y)*width+startx+x)+B] =
+				CLAMP(0,paint_colors[block_mask[x][y]][B],255); // BLUE
+		}
+	}
+}
+
+void decompressBlockTHUMB59T(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty)
+{
+  decompressBlockTHUMB59Tc(block_part1, block_part2, img, width, height, startx, starty, 3);
+}
+
+// Calculate the paint colors from the block colors 
+// using a distance d and one of the H- or T-patterns.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void calculatePaintColors58H(uint8 d, uint8 p, uint8 (colors)[2][3], uint8 (possible_colors)[4][3]) 
+{
+	
+	//////////////////////////////////////////////
+	//
+	//		C3      C1		C4----C1---C2
+	//		|		|			  |
+	//		|		|			  |
+	//		|-------|			  |
+	//		|		|			  |
+	//		|		|			  |
+	//		C4      C2			  C3
+	//
+	//////////////////////////////////////////////
+
+	// C4
+	possible_colors[3][R] = CLAMP(0,colors[1][R] - table58H[d],255);
+	possible_colors[3][G] = CLAMP(0,colors[1][G] - table58H[d],255);
+	possible_colors[3][B] = CLAMP(0,colors[1][B] - table58H[d],255);
+	
+	if (p == PATTERN_H) 
+	{ 
+		// C1
+		possible_colors[0][R] = CLAMP(0,colors[0][R] + table58H[d],255);
+		possible_colors[0][G] = CLAMP(0,colors[0][G] + table58H[d],255);
+		possible_colors[0][B] = CLAMP(0,colors[0][B] + table58H[d],255);
+		// C2
+		possible_colors[1][R] = CLAMP(0,colors[0][R] - table58H[d],255);
+		possible_colors[1][G] = CLAMP(0,colors[0][G] - table58H[d],255);
+		possible_colors[1][B] = CLAMP(0,colors[0][B] - table58H[d],255);
+		// C3
+		possible_colors[2][R] = CLAMP(0,colors[1][R] + table58H[d],255);
+		possible_colors[2][G] = CLAMP(0,colors[1][G] + table58H[d],255);
+		possible_colors[2][B] = CLAMP(0,colors[1][B] + table58H[d],255);
+	} 
+	else 
+	{
+		printf("Invalid pattern. Terminating");
+		exit(1);
+	}
+}
+
+// Decompress an H-mode block 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockTHUMB58Hc(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty, int channels)
+{
+	unsigned int col0, col1;
+	uint8 colors[2][3];
+	uint8 colorsRGB444[2][3];
+	uint8 paint_colors[4][3];
+	uint8 distance;
+	uint8 block_mask[4][4];
+	
+	// First decode left part of block.
+	colorsRGB444[0][R]= GETBITSHIGH(block_part1, 4, 57);
+	colorsRGB444[0][G]= GETBITSHIGH(block_part1, 4, 53);
+	colorsRGB444[0][B]= GETBITSHIGH(block_part1, 4, 49);
+
+	colorsRGB444[1][R]= GETBITSHIGH(block_part1, 4, 45);
+	colorsRGB444[1][G]= GETBITSHIGH(block_part1, 4, 41);
+	colorsRGB444[1][B]= GETBITSHIGH(block_part1, 4, 37);
+
+	distance = 0;
+	distance = (GETBITSHIGH(block_part1, 2, 33)) << 1;
+
+	col0 = GETBITSHIGH(block_part1, 12, 57);
+	col1 = GETBITSHIGH(block_part1, 12, 45);
+
+	if(col0 >= col1)
+	{
+		distance |= 1;
+	}
+
+	// Extend the two colors to RGB888	
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);	
+	
+	calculatePaintColors58H(distance, PATTERN_H, colors, paint_colors);
+	
+	// Choose one of the four paint colors for each texel
+	for (uint8 x = 0; x < BLOCKWIDTH; ++x) 
+	{
+		for (uint8 y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			//block_mask[x][y] = GETBITS(block_part2,2,31-(y*4+x)*2);
+			block_mask[x][y] = GETBITS(block_part2,1,(y+x*4)+16)<<1;
+			block_mask[x][y] |= GETBITS(block_part2,1,(y+x*4));
+			img[channels*((starty+y)*width+startx+x)+R] =
+				CLAMP(0,paint_colors[block_mask[x][y]][R],255); // RED
+			img[channels*((starty+y)*width+startx+x)+G] =
+				CLAMP(0,paint_colors[block_mask[x][y]][G],255); // GREEN
+			img[channels*((starty+y)*width+startx+x)+B] =
+				CLAMP(0,paint_colors[block_mask[x][y]][B],255); // BLUE
+		}
+	}
+}
+void decompressBlockTHUMB58H(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty)
+{
+  decompressBlockTHUMB58Hc(block_part1, block_part2, img, width, height, startx, starty, 3);
+}
+
+// Decompress the planar mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockPlanar57c(unsigned int compressed57_1, unsigned int compressed57_2, uint8 *img, int width, int height, int startx, int starty, int channels)
+{
+	uint8 colorO[3], colorH[3], colorV[3];
+
+	colorO[0] = GETBITSHIGH( compressed57_1, 6, 63);
+	colorO[1] = GETBITSHIGH( compressed57_1, 7, 57);
+	colorO[2] = GETBITSHIGH( compressed57_1, 6, 50);
+	colorH[0] = GETBITSHIGH( compressed57_1, 6, 44);
+	colorH[1] = GETBITSHIGH( compressed57_1, 7, 38);
+	colorH[2] = GETBITS(     compressed57_2, 6, 31);
+	colorV[0] = GETBITS(     compressed57_2, 6, 25);
+	colorV[1] = GETBITS(     compressed57_2, 7, 19);
+	colorV[2] = GETBITS(     compressed57_2, 6, 12);
+
+	colorO[0] = (colorO[0] << 2) | (colorO[0] >> 4);
+	colorO[1] = (colorO[1] << 1) | (colorO[1] >> 6);
+	colorO[2] = (colorO[2] << 2) | (colorO[2] >> 4);
+
+	colorH[0] = (colorH[0] << 2) | (colorH[0] >> 4);
+	colorH[1] = (colorH[1] << 1) | (colorH[1] >> 6);
+	colorH[2] = (colorH[2] << 2) | (colorH[2] >> 4);
+
+	colorV[0] = (colorV[0] << 2) | (colorV[0] >> 4);
+	colorV[1] = (colorV[1] << 1) | (colorV[1] >> 6);
+	colorV[2] = (colorV[2] << 2) | (colorV[2] >> 4);
+
+	int xx, yy;
+
+	for( xx=0; xx<4; xx++)
+	{
+		for( yy=0; yy<4; yy++)
+		{
+			img[channels*width*(starty+yy) + channels*(startx+xx) + 0] = CLAMP(0, ((xx*(colorH[0]-colorO[0]) + yy*(colorV[0]-colorO[0]) + 4*colorO[0] + 2) >> 2),255);
+			img[channels*width*(starty+yy) + channels*(startx+xx) + 1] = CLAMP(0, ((xx*(colorH[1]-colorO[1]) + yy*(colorV[1]-colorO[1]) + 4*colorO[1] + 2) >> 2),255);
+			img[channels*width*(starty+yy) + channels*(startx+xx) + 2] = CLAMP(0, ((xx*(colorH[2]-colorO[2]) + yy*(colorV[2]-colorO[2]) + 4*colorO[2] + 2) >> 2),255);
+
+			//Equivalent method
+			/*img[channels*width*(starty+yy) + channels*(startx+xx) + 0] = (int)CLAMP(0, JAS_ROUND((xx*(colorH[0]-colorO[0])/4.0 + yy*(colorV[0]-colorO[0])/4.0 + colorO[0])), 255);
+			img[channels*width*(starty+yy) + channels*(startx+xx) + 1] = (int)CLAMP(0, JAS_ROUND((xx*(colorH[1]-colorO[1])/4.0 + yy*(colorV[1]-colorO[1])/4.0 + colorO[1])), 255);
+			img[channels*width*(starty+yy) + channels*(startx+xx) + 2] = (int)CLAMP(0, JAS_ROUND((xx*(colorH[2]-colorO[2])/4.0 + yy*(colorV[2]-colorO[2])/4.0 + colorO[2])), 255);*/
+			
+		}
+	}
+}
+void decompressBlockPlanar57(unsigned int compressed57_1, unsigned int compressed57_2, uint8 *img, int width, int height, int startx, int starty)
+{
+  decompressBlockPlanar57c(compressed57_1, compressed57_2, img, width, height, startx, starty, 3);
+}
+// Decompress an ETC1 block (or ETC2 using individual or differential mode).
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockDiffFlipC(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty, int channels)
+{
+	uint8 avg_color[3], enc_color1[3], enc_color2[3];
+	signed char diff[3];
+	int table;
+	int index,shift;
+	int r,g,b;
+	int diffbit;
+	int flipbit;
+
+	diffbit = (GETBITSHIGH(block_part1, 1, 33));
+	flipbit = (GETBITSHIGH(block_part1, 1, 32));
+
+	if( !diffbit )
+	{
+		// We have diffbit = 0.
+
+		// First decode left part of block.
+		avg_color[0]= GETBITSHIGH(block_part1, 4, 63);
+		avg_color[1]= GETBITSHIGH(block_part1, 4, 55);
+		avg_color[2]= GETBITSHIGH(block_part1, 4, 47);
+
+		// Here, we should really multiply by 17 instead of 16. This can
+		// be done by just copying the four lower bits to the upper ones
+		// while keeping the lower bits.
+		avg_color[0] |= (avg_color[0] <<4);
+		avg_color[1] |= (avg_color[1] <<4);
+		avg_color[2] |= (avg_color[2] <<4);
+
+		table = GETBITSHIGH(block_part1, 3, 39) << 1;
+
+		unsigned int pixel_indices_MSB, pixel_indices_LSB;
+			
+		pixel_indices_MSB = GETBITS(block_part2, 16, 31);
+		pixel_indices_LSB = GETBITS(block_part2, 16, 15);
+
+		if( (flipbit) == 0 )
+		{
+			// We should not flip
+			shift = 0;
+			for(int x=startx; x<startx+2; x++)
+			{
+                for(int y=starty; y<starty+4; y++)
+				{
+					index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+					index |= ((pixel_indices_LSB >> shift) & 1);
+					shift++;
+					index=unscramble[index];
+
+ 					r=RED_CHANNEL(img,width,x,y,channels)  =CLAMP(0,avg_color[0]+compressParams[table][index],255);
+ 					g=GREEN_CHANNEL(img,width,x,y,channels)=CLAMP(0,avg_color[1]+compressParams[table][index],255);
+ 					b=BLUE_CHANNEL(img,width,x,y,channels) =CLAMP(0,avg_color[2]+compressParams[table][index],255);
+				}
+			}
+		}
+		else
+		{
+			// We should flip
+			shift = 0;
+			for(int x=startx; x<startx+4; x++)
+			{
+				for(int y=starty; y<starty+2; y++)
+				{
+					index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+					index |= ((pixel_indices_LSB >> shift) & 1);
+					shift++;
+					index=unscramble[index];
+
+ 					r=RED_CHANNEL(img,width,x,y,channels)  =CLAMP(0,avg_color[0]+compressParams[table][index],255);
+ 					g=GREEN_CHANNEL(img,width,x,y,channels)=CLAMP(0,avg_color[1]+compressParams[table][index],255);
+ 					b=BLUE_CHANNEL(img,width,x,y,channels) =CLAMP(0,avg_color[2]+compressParams[table][index],255);
+				}
+				shift+=2;
+			}
+		}
+
+		// Now decode other part of block. 
+		avg_color[0]= GETBITSHIGH(block_part1, 4, 59);
+		avg_color[1]= GETBITSHIGH(block_part1, 4, 51);
+		avg_color[2]= GETBITSHIGH(block_part1, 4, 43);
+
+		// Here, we should really multiply by 17 instead of 16. This can
+		// be done by just copying the four lower bits to the upper ones
+		// while keeping the lower bits.
+		avg_color[0] |= (avg_color[0] <<4);
+		avg_color[1] |= (avg_color[1] <<4);
+		avg_color[2] |= (avg_color[2] <<4);
+
+		table = GETBITSHIGH(block_part1, 3, 36) << 1;
+		pixel_indices_MSB = GETBITS(block_part2, 16, 31);
+		pixel_indices_LSB = GETBITS(block_part2, 16, 15);
+
+		if( (flipbit) == 0 )
+		{
+			// We should not flip
+			shift=8;
+			for(int x=startx+2; x<startx+4; x++)
+			{
+				for(int y=starty; y<starty+4; y++)
+				{
+					index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+					index |= ((pixel_indices_LSB >> shift) & 1);
+					shift++;
+					index=unscramble[index];
+
+ 					r=RED_CHANNEL(img,width,x,y,channels)  =CLAMP(0,avg_color[0]+compressParams[table][index],255);
+ 					g=GREEN_CHANNEL(img,width,x,y,channels)=CLAMP(0,avg_color[1]+compressParams[table][index],255);
+ 					b=BLUE_CHANNEL(img,width,x,y,channels) =CLAMP(0,avg_color[2]+compressParams[table][index],255);
+				}
+			}
+		}
+		else
+		{
+			// We should flip
+			shift=2;
+			for(int x=startx; x<startx+4; x++)
+			{
+				for(int y=starty+2; y<starty+4; y++)
+				{
+					index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+					index |= ((pixel_indices_LSB >> shift) & 1);
+					shift++;
+					index=unscramble[index];
+
+ 					r=RED_CHANNEL(img,width,x,y,channels)  =CLAMP(0,avg_color[0]+compressParams[table][index],255);
+ 					g=GREEN_CHANNEL(img,width,x,y,channels)=CLAMP(0,avg_color[1]+compressParams[table][index],255);
+ 					b=BLUE_CHANNEL(img,width,x,y,channels) =CLAMP(0,avg_color[2]+compressParams[table][index],255);
+				}
+				shift += 2;
+			}
+		}
+	}
+	else
+	{
+		// We have diffbit = 1. 
+
+//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+//      ---------------------------------------------------------------------------------------------------
+//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+//      ---------------------------------------------------------------------------------------------------
+// 
+// 
+//     c) bit layout in bits 31 through 0 (in both cases)
+// 
+//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+//      --------------------------------------------------------------------------------------------------
+//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+//      --------------------------------------------------------------------------------------------------      
+
+		// First decode left part of block.
+		enc_color1[0]= GETBITSHIGH(block_part1, 5, 63);
+		enc_color1[1]= GETBITSHIGH(block_part1, 5, 55);
+		enc_color1[2]= GETBITSHIGH(block_part1, 5, 47);
+
+		// Expand from 5 to 8 bits
+		avg_color[0] = (enc_color1[0] <<3) | (enc_color1[0] >> 2);
+		avg_color[1] = (enc_color1[1] <<3) | (enc_color1[1] >> 2);
+		avg_color[2] = (enc_color1[2] <<3) | (enc_color1[2] >> 2);
+
+		table = GETBITSHIGH(block_part1, 3, 39) << 1;
+
+		unsigned int pixel_indices_MSB, pixel_indices_LSB;
+			
+		pixel_indices_MSB = GETBITS(block_part2, 16, 31);
+		pixel_indices_LSB = GETBITS(block_part2, 16, 15);
+
+		if( (flipbit) == 0 )
+		{
+			// We should not flip
+			shift = 0;
+			for(int x=startx; x<startx+2; x++)
+			{
+				for(int y=starty; y<starty+4; y++)
+				{
+					index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+					index |= ((pixel_indices_LSB >> shift) & 1);
+					shift++;
+					index=unscramble[index];
+
+ 					r=RED_CHANNEL(img,width,x,y,channels)  =CLAMP(0,avg_color[0]+compressParams[table][index],255);
+ 					g=GREEN_CHANNEL(img,width,x,y,channels)=CLAMP(0,avg_color[1]+compressParams[table][index],255);
+ 					b=BLUE_CHANNEL(img,width,x,y,channels) =CLAMP(0,avg_color[2]+compressParams[table][index],255);
+				}
+			}
+		}
+		else
+		{
+			// We should flip
+			shift = 0;
+			for(int x=startx; x<startx+4; x++)
+			{
+				for(int y=starty; y<starty+2; y++)
+				{
+					index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+					index |= ((pixel_indices_LSB >> shift) & 1);
+					shift++;
+					index=unscramble[index];
+
+ 					r=RED_CHANNEL(img,width,x,y,channels)  =CLAMP(0,avg_color[0]+compressParams[table][index],255);
+ 					g=GREEN_CHANNEL(img,width,x,y,channels)=CLAMP(0,avg_color[1]+compressParams[table][index],255);
+ 					b=BLUE_CHANNEL(img,width,x,y,channels) =CLAMP(0,avg_color[2]+compressParams[table][index],255);
+				}
+				shift+=2;
+			}
+		}
+
+		// Now decode right part of block. 
+		diff[0]= GETBITSHIGH(block_part1, 3, 58);
+		diff[1]= GETBITSHIGH(block_part1, 3, 50);
+		diff[2]= GETBITSHIGH(block_part1, 3, 42);
+
+		// Extend sign bit to entire byte. 
+		diff[0] = (diff[0] << 5);
+		diff[1] = (diff[1] << 5);
+		diff[2] = (diff[2] << 5);
+		diff[0] = diff[0] >> 5;
+		diff[1] = diff[1] >> 5;
+		diff[2] = diff[2] >> 5;
+
+		//  Calculale second color
+		enc_color2[0]= enc_color1[0] + diff[0];
+		enc_color2[1]= enc_color1[1] + diff[1];
+		enc_color2[2]= enc_color1[2] + diff[2];
+
+		// Expand from 5 to 8 bits
+		avg_color[0] = (enc_color2[0] <<3) | (enc_color2[0] >> 2);
+		avg_color[1] = (enc_color2[1] <<3) | (enc_color2[1] >> 2);
+		avg_color[2] = (enc_color2[2] <<3) | (enc_color2[2] >> 2);
+
+		table = GETBITSHIGH(block_part1, 3, 36) << 1;
+		pixel_indices_MSB = GETBITS(block_part2, 16, 31);
+		pixel_indices_LSB = GETBITS(block_part2, 16, 15);
+
+		if( (flipbit) == 0 )
+		{
+			// We should not flip
+			shift=8;
+			for(int x=startx+2; x<startx+4; x++)
+			{
+				for(int y=starty; y<starty+4; y++)
+				{
+					index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+					index |= ((pixel_indices_LSB >> shift) & 1);
+					shift++;
+					index=unscramble[index];
+
+ 					r=RED_CHANNEL(img,width,x,y,channels)  =CLAMP(0,avg_color[0]+compressParams[table][index],255);
+ 					g=GREEN_CHANNEL(img,width,x,y,channels)=CLAMP(0,avg_color[1]+compressParams[table][index],255);
+ 					b=BLUE_CHANNEL(img,width,x,y,channels) =CLAMP(0,avg_color[2]+compressParams[table][index],255);
+				}
+			}
+		}
+		else
+		{
+			// We should flip
+			shift=2;
+			for(int x=startx; x<startx+4; x++)
+			{
+				for(int y=starty+2; y<starty+4; y++)
+				{
+					index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+					index |= ((pixel_indices_LSB >> shift) & 1);
+					shift++;
+					index=unscramble[index];
+
+ 					r=RED_CHANNEL(img,width,x,y,channels)  =CLAMP(0,avg_color[0]+compressParams[table][index],255);
+ 					g=GREEN_CHANNEL(img,width,x,y,channels)=CLAMP(0,avg_color[1]+compressParams[table][index],255);
+ 					b=BLUE_CHANNEL(img,width,x,y,channels) =CLAMP(0,avg_color[2]+compressParams[table][index],255);
+				}
+				shift += 2;
+			}
+		}
+	}
+}
+void decompressBlockDiffFlip(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty)
+{
+  decompressBlockDiffFlipC(block_part1, block_part2, img, width, height, startx, starty, 3);
+}
+
+// Decompress an ETC2 RGB block
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockETC2c(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty, int channels)
+{
+	int diffbit;
+	signed char color1[3];
+	signed char diff[3];
+	signed char red, green, blue;
+
+	diffbit = (GETBITSHIGH(block_part1, 1, 33));
+
+	if( diffbit )
+	{
+		// We have diffbit = 1;
+
+		// Base color
+		color1[0]= GETBITSHIGH(block_part1, 5, 63);
+		color1[1]= GETBITSHIGH(block_part1, 5, 55);
+		color1[2]= GETBITSHIGH(block_part1, 5, 47);
+
+		// Diff color
+		diff[0]= GETBITSHIGH(block_part1, 3, 58);
+		diff[1]= GETBITSHIGH(block_part1, 3, 50);
+		diff[2]= GETBITSHIGH(block_part1, 3, 42);
+
+		// Extend sign bit to entire byte. 
+		diff[0] = (diff[0] << 5);
+		diff[1] = (diff[1] << 5);
+		diff[2] = (diff[2] << 5);
+		diff[0] = diff[0] >> 5;
+		diff[1] = diff[1] >> 5;
+		diff[2] = diff[2] >> 5;
+
+		red   = color1[0] + diff[0];
+		green = color1[1] + diff[1];
+		blue  = color1[2] + diff[2];
+
+		if(red < 0 || red > 31)
+		{
+			unsigned int block59_part1, block59_part2;
+			unstuff59bits(block_part1, block_part2, block59_part1, block59_part2);
+			decompressBlockTHUMB59Tc(block59_part1, block59_part2, img, width, height, startx, starty, channels);
+		}
+		else if (green < 0 || green > 31)
+		{
+			unsigned int block58_part1, block58_part2;
+			unstuff58bits(block_part1, block_part2, block58_part1, block58_part2);
+			decompressBlockTHUMB58Hc(block58_part1, block58_part2, img, width, height, startx, starty, channels);
+		}
+		else if(blue < 0 || blue > 31)
+		{
+			unsigned int block57_part1, block57_part2;
+
+			unstuff57bits(block_part1, block_part2, block57_part1, block57_part2);
+			decompressBlockPlanar57c(block57_part1, block57_part2, img, width, height, startx, starty, channels);
+		}
+		else
+		{
+ 			decompressBlockDiffFlipC(block_part1, block_part2, img, width, height, startx, starty, channels);
+		}
+	}
+	else
+	{
+		// We have diffbit = 0;
+		decompressBlockDiffFlipC(block_part1, block_part2, img, width, height, startx, starty, channels);
+	}
+}
+void decompressBlockETC2(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty)
+{
+  decompressBlockETC2c(block_part1, block_part2, img, width, height, startx, starty, 3);
+}
+// Decompress an ETC2 block with punchthrough alpha
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockDifferentialWithAlphaC(unsigned int block_part1, unsigned int block_part2, uint8* img, uint8* alpha, int width, int height, int startx, int starty, int channelsRGB)
+{
+	
+	uint8 avg_color[3], enc_color1[3], enc_color2[3];
+	signed char diff[3];
+	int table;
+	int index,shift;
+	int r,g,b;
+	int diffbit;
+	int flipbit;
+  int channelsA;
+
+  if(channelsRGB == 3)
+  {
+    // We will decode the alpha data to a separate memory area. 
+    channelsA = 1;
+  }
+  else
+  {
+    // We will decode the RGB data and the alpha data to the same memory area, 
+    // interleaved as RGBA. 
+    channelsA = 4;
+    alpha = &img[0+3];
+  }
+
+	//the diffbit now encodes whether or not the entire alpha channel is 255.
+	diffbit = (GETBITSHIGH(block_part1, 1, 33));
+ 	flipbit = (GETBITSHIGH(block_part1, 1, 32));
+
+	// First decode left part of block.
+	enc_color1[0]= GETBITSHIGH(block_part1, 5, 63);
+	enc_color1[1]= GETBITSHIGH(block_part1, 5, 55);
+	enc_color1[2]= GETBITSHIGH(block_part1, 5, 47);
+
+	// Expand from 5 to 8 bits
+	avg_color[0] = (enc_color1[0] <<3) | (enc_color1[0] >> 2);
+	avg_color[1] = (enc_color1[1] <<3) | (enc_color1[1] >> 2);
+	avg_color[2] = (enc_color1[2] <<3) | (enc_color1[2] >> 2);
+
+	table = GETBITSHIGH(block_part1, 3, 39) << 1;
+
+	unsigned int pixel_indices_MSB, pixel_indices_LSB;
+		
+	pixel_indices_MSB = GETBITS(block_part2, 16, 31);
+	pixel_indices_LSB = GETBITS(block_part2, 16, 15);
+
+	if( (flipbit) == 0 )
+	{
+		// We should not flip
+		shift = 0;
+		for(int x=startx; x<startx+2; x++)
+		{
+			for(int y=starty; y<starty+4; y++)
+			{
+				index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+				index |= ((pixel_indices_LSB >> shift) & 1);
+				shift++;
+				index=unscramble[index];
+
+				int mod = compressParams[table][index];
+				if(diffbit==0&&(index==1||index==2)) 
+				{
+					mod=0;
+				}
+				
+				r=RED_CHANNEL(img,width,x,y,channelsRGB)  =CLAMP(0,avg_color[0]+mod,255);
+				g=GREEN_CHANNEL(img,width,x,y,channelsRGB)=CLAMP(0,avg_color[1]+mod,255);
+				b=BLUE_CHANNEL(img,width,x,y,channelsRGB) =CLAMP(0,avg_color[2]+mod,255);
+				if(diffbit==0&&index==1) 
+				{
+					alpha[(y*width+x)*channelsA]=0;
+					r=RED_CHANNEL(img,width,x,y,channelsRGB)=0;
+					g=GREEN_CHANNEL(img,width,x,y,channelsRGB)=0;
+					b=BLUE_CHANNEL(img,width,x,y,channelsRGB)=0;
+				}
+				else 
+				{
+					alpha[(y*width+x)*channelsA]=255;
+				}
+
+			}
+		}
+	}
+	else
+	{
+		// We should flip
+		shift = 0;
+		for(int x=startx; x<startx+4; x++)
+		{
+			for(int y=starty; y<starty+2; y++)
+			{
+				index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+				index |= ((pixel_indices_LSB >> shift) & 1);
+				shift++;
+				index=unscramble[index];
+				int mod = compressParams[table][index];
+				if(diffbit==0&&(index==1||index==2)) 
+				{
+					mod=0;
+				}
+				r=RED_CHANNEL(img,width,x,y,channelsRGB)  =CLAMP(0,avg_color[0]+mod,255);
+				g=GREEN_CHANNEL(img,width,x,y,channelsRGB)=CLAMP(0,avg_color[1]+mod,255);
+				b=BLUE_CHANNEL(img,width,x,y,channelsRGB) =CLAMP(0,avg_color[2]+mod,255);
+				if(diffbit==0&&index==1) 
+				{
+					alpha[(y*width+x)*channelsA]=0;
+					r=RED_CHANNEL(img,width,x,y,channelsRGB)=0;
+					g=GREEN_CHANNEL(img,width,x,y,channelsRGB)=0;
+					b=BLUE_CHANNEL(img,width,x,y,channelsRGB)=0;
+				}
+				else 
+				{
+					alpha[(y*width+x)*channelsA]=255;
+				}
+			}
+			shift+=2;
+		}
+	}
+	// Now decode right part of block. 
+	diff[0]= GETBITSHIGH(block_part1, 3, 58);
+	diff[1]= GETBITSHIGH(block_part1, 3, 50);
+	diff[2]= GETBITSHIGH(block_part1, 3, 42);
+
+	// Extend sign bit to entire byte. 
+	diff[0] = (diff[0] << 5);
+	diff[1] = (diff[1] << 5);
+	diff[2] = (diff[2] << 5);
+	diff[0] = diff[0] >> 5;
+	diff[1] = diff[1] >> 5;
+	diff[2] = diff[2] >> 5;
+
+	//  Calculate second color
+	enc_color2[0]= enc_color1[0] + diff[0];
+	enc_color2[1]= enc_color1[1] + diff[1];
+	enc_color2[2]= enc_color1[2] + diff[2];
+
+	// Expand from 5 to 8 bits
+	avg_color[0] = (enc_color2[0] <<3) | (enc_color2[0] >> 2);
+	avg_color[1] = (enc_color2[1] <<3) | (enc_color2[1] >> 2);
+	avg_color[2] = (enc_color2[2] <<3) | (enc_color2[2] >> 2);
+
+	table = GETBITSHIGH(block_part1, 3, 36) << 1;
+	pixel_indices_MSB = GETBITS(block_part2, 16, 31);
+	pixel_indices_LSB = GETBITS(block_part2, 16, 15);
+
+	if( (flipbit) == 0 )
+	{
+		// We should not flip
+		shift=8;
+		for(int x=startx+2; x<startx+4; x++)
+		{
+			for(int y=starty; y<starty+4; y++)
+			{
+				index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+				index |= ((pixel_indices_LSB >> shift) & 1);
+				shift++;
+				index=unscramble[index];
+				int mod = compressParams[table][index];
+				if(diffbit==0&&(index==1||index==2)) 
+				{
+					mod=0;
+				}
+				
+				r=RED_CHANNEL(img,width,x,y,channelsRGB)  =CLAMP(0,avg_color[0]+mod,255);
+				g=GREEN_CHANNEL(img,width,x,y,channelsRGB)=CLAMP(0,avg_color[1]+mod,255);
+				b=BLUE_CHANNEL(img,width,x,y,channelsRGB) =CLAMP(0,avg_color[2]+mod,255);
+				if(diffbit==0&&index==1) 
+				{
+					alpha[(y*width+x)*channelsA]=0;
+					r=RED_CHANNEL(img,width,x,y,channelsRGB)=0;
+					g=GREEN_CHANNEL(img,width,x,y,channelsRGB)=0;
+					b=BLUE_CHANNEL(img,width,x,y,channelsRGB)=0;
+				}
+				else 
+				{
+					alpha[(y*width+x)*channelsA]=255;
+				}
+			}
+		}
+	}
+	else
+	{
+		// We should flip
+		shift=2;
+		for(int x=startx; x<startx+4; x++)
+		{
+			for(int y=starty+2; y<starty+4; y++)
+			{
+				index  = ((pixel_indices_MSB >> shift) & 1) << 1;
+				index |= ((pixel_indices_LSB >> shift) & 1);
+				shift++;
+				index=unscramble[index];
+				int mod = compressParams[table][index];
+				if(diffbit==0&&(index==1||index==2)) 
+				{
+					mod=0;
+				}
+				
+				r=RED_CHANNEL(img,width,x,y,channelsRGB)  =CLAMP(0,avg_color[0]+mod,255);
+				g=GREEN_CHANNEL(img,width,x,y,channelsRGB)=CLAMP(0,avg_color[1]+mod,255);
+				b=BLUE_CHANNEL(img,width,x,y,channelsRGB) =CLAMP(0,avg_color[2]+mod,255);
+				if(diffbit==0&&index==1) 
+				{
+					alpha[(y*width+x)*channelsA]=0;
+					r=RED_CHANNEL(img,width,x,y,channelsRGB)=0;
+					g=GREEN_CHANNEL(img,width,x,y,channelsRGB)=0;
+					b=BLUE_CHANNEL(img,width,x,y,channelsRGB)=0;
+				}
+				else 
+				{
+					alpha[(y*width+x)*channelsA]=255;
+				}
+			}
+			shift += 2;
+		}
+	}
+}
+void decompressBlockDifferentialWithAlpha(unsigned int block_part1, unsigned int block_part2, uint8* img, uint8* alpha, int width, int height, int startx, int starty)
+{
+  decompressBlockDifferentialWithAlphaC(block_part1, block_part2, img, alpha, width, height, startx, starty, 3);
+}
+
+
+// similar to regular decompression, but alpha channel is set to 0 if pixel index is 2, otherwise 255.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockTHUMB59TAlphaC(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alpha, int width, int height, int startx, int starty, int channelsRGB)
+{
+
+	uint8 colorsRGB444[2][3];
+	uint8 colors[2][3];
+	uint8 paint_colors[4][3];
+	uint8 distance;
+	uint8 block_mask[4][4];
+  int channelsA;
+
+  if(channelsRGB == 3)
+  {
+    // We will decode the alpha data to a separate memory area. 
+    channelsA = 1;
+  }
+  else
+  {
+    // We will decode the RGB data and the alpha data to the same memory area, 
+    // interleaved as RGBA. 
+    channelsA = 4;
+    alpha = &img[0+3];
+  }
+
+	// First decode left part of block.
+	colorsRGB444[0][R]= GETBITSHIGH(block_part1, 4, 58);
+	colorsRGB444[0][G]= GETBITSHIGH(block_part1, 4, 54);
+	colorsRGB444[0][B]= GETBITSHIGH(block_part1, 4, 50);
+
+	colorsRGB444[1][R]= GETBITSHIGH(block_part1, 4, 46);
+	colorsRGB444[1][G]= GETBITSHIGH(block_part1, 4, 42);
+	colorsRGB444[1][B]= GETBITSHIGH(block_part1, 4, 38);
+
+	distance   = GETBITSHIGH(block_part1, TABLE_BITS_59T, 34);
+
+	// Extend the two colors to RGB888	
+	decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);	
+	calculatePaintColors59T(distance, PATTERN_T, colors, paint_colors);
+	
+	// Choose one of the four paint colors for each texel
+	for (uint8 x = 0; x < BLOCKWIDTH; ++x) 
+	{
+		for (uint8 y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			//block_mask[x][y] = GETBITS(block_part2,2,31-(y*4+x)*2);
+			block_mask[x][y] = GETBITS(block_part2,1,(y+x*4)+16)<<1;
+			block_mask[x][y] |= GETBITS(block_part2,1,(y+x*4));
+			img[channelsRGB*((starty+y)*width+startx+x)+R] = 
+				CLAMP(0,paint_colors[block_mask[x][y]][R],255); // RED
+			img[channelsRGB*((starty+y)*width+startx+x)+G] =
+				CLAMP(0,paint_colors[block_mask[x][y]][G],255); // GREEN
+			img[channelsRGB*((starty+y)*width+startx+x)+B] =
+				CLAMP(0,paint_colors[block_mask[x][y]][B],255); // BLUE
+			if(block_mask[x][y]==2)  
+			{
+				alpha[channelsA*(x+startx+(y+starty)*width)]=0;
+				img[channelsRGB*((starty+y)*width+startx+x)+R] =0;
+				img[channelsRGB*((starty+y)*width+startx+x)+G] =0;
+				img[channelsRGB*((starty+y)*width+startx+x)+B] =0;
+			}
+			else
+				alpha[channelsA*(x+startx+(y+starty)*width)]=255;
+		}
+	}
+}
+void decompressBlockTHUMB59TAlpha(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alpha, int width, int height, int startx, int starty)
+{
+  decompressBlockTHUMB59TAlphaC(block_part1, block_part2, img, alpha, width, height, startx, starty, 3);
+}
+
+
+// Decompress an H-mode block with alpha
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockTHUMB58HAlphaC(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alpha, int width, int height, int startx, int starty, int channelsRGB)
+{
+	unsigned int col0, col1;
+	uint8 colors[2][3];
+	uint8 colorsRGB444[2][3];
+	uint8 paint_colors[4][3];
+	uint8 distance;
+	uint8 block_mask[4][4];
+  int channelsA;	
+
+  if(channelsRGB == 3)
+  {
+    // We will decode the alpha data to a separate memory area. 
+    channelsA = 1;
+  }
+  else
+  {
+    // We will decode the RGB data and the alpha data to the same memory area, 
+    // interleaved as RGBA. 
+    channelsA = 4;
+    alpha = &img[0+3];
+  }
+
+	// First decode left part of block.
+	colorsRGB444[0][R]= GETBITSHIGH(block_part1, 4, 57);
+	colorsRGB444[0][G]= GETBITSHIGH(block_part1, 4, 53);
+	colorsRGB444[0][B]= GETBITSHIGH(block_part1, 4, 49);
+
+	colorsRGB444[1][R]= GETBITSHIGH(block_part1, 4, 45);
+	colorsRGB444[1][G]= GETBITSHIGH(block_part1, 4, 41);
+	colorsRGB444[1][B]= GETBITSHIGH(block_part1, 4, 37);
+
+  distance = 0;
+	distance = (GETBITSHIGH(block_part1, 2, 33)) << 1;
+
+	col0 = GETBITSHIGH(block_part1, 12, 57);
+	col1 = GETBITSHIGH(block_part1, 12, 45);
+
+	if(col0 >= col1)
+	{
+		distance |= 1;
+	}
+
+	// Extend the two colors to RGB888	
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);	
+	
+	calculatePaintColors58H(distance, PATTERN_H, colors, paint_colors);
+	
+	// Choose one of the four paint colors for each texel
+	for (uint8 x = 0; x < BLOCKWIDTH; ++x) 
+	{
+		for (uint8 y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			//block_mask[x][y] = GETBITS(block_part2,2,31-(y*4+x)*2);
+			block_mask[x][y] = GETBITS(block_part2,1,(y+x*4)+16)<<1;
+			block_mask[x][y] |= GETBITS(block_part2,1,(y+x*4));
+			img[channelsRGB*((starty+y)*width+startx+x)+R] =
+				CLAMP(0,paint_colors[block_mask[x][y]][R],255); // RED
+			img[channelsRGB*((starty+y)*width+startx+x)+G] =
+				CLAMP(0,paint_colors[block_mask[x][y]][G],255); // GREEN
+			img[channelsRGB*((starty+y)*width+startx+x)+B] =
+				CLAMP(0,paint_colors[block_mask[x][y]][B],255); // BLUE
+			
+			if(block_mask[x][y]==2)  
+			{
+				alpha[channelsA*(x+startx+(y+starty)*width)]=0;
+				img[channelsRGB*((starty+y)*width+startx+x)+R] =0;
+				img[channelsRGB*((starty+y)*width+startx+x)+G] =0;
+				img[channelsRGB*((starty+y)*width+startx+x)+B] =0;
+			}
+			else
+				alpha[channelsA*(x+startx+(y+starty)*width)]=255;
+		}
+	}
+}
+void decompressBlockTHUMB58HAlpha(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alpha, int width, int height, int startx, int starty)
+{
+  decompressBlockTHUMB58HAlphaC(block_part1, block_part2, img, alpha, width, height, startx, starty, 3);
+}
+// Decompression function for ETC2_RGBA1 format.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockETC21BitAlphaC(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alphaimg, int width, int height, int startx, int starty, int channelsRGB)
+{
+	int diffbit;
+	signed char color1[3];
+	signed char diff[3];
+	signed char red, green, blue;
+  int channelsA;	
+
+  if(channelsRGB == 3)
+  {
+    // We will decode the alpha data to a separate memory area. 
+    channelsA = 1;
+  }
+  else
+  {
+    // We will decode the RGB data and the alpha data to the same memory area, 
+    // interleaved as RGBA. 
+    channelsA = 4;
+    alphaimg = &img[0+3];
+  }
+
+	diffbit = (GETBITSHIGH(block_part1, 1, 33));
+
+	if( diffbit )
+	{
+		// We have diffbit = 1, meaning no transparent pixels. regular decompression.
+
+		// Base color
+		color1[0]= GETBITSHIGH(block_part1, 5, 63);
+		color1[1]= GETBITSHIGH(block_part1, 5, 55);
+		color1[2]= GETBITSHIGH(block_part1, 5, 47);
+
+		// Diff color
+		diff[0]= GETBITSHIGH(block_part1, 3, 58);
+		diff[1]= GETBITSHIGH(block_part1, 3, 50);
+		diff[2]= GETBITSHIGH(block_part1, 3, 42);
+
+		// Extend sign bit to entire byte. 
+		diff[0] = (diff[0] << 5);
+		diff[1] = (diff[1] << 5);
+		diff[2] = (diff[2] << 5);
+		diff[0] = diff[0] >> 5;
+		diff[1] = diff[1] >> 5;
+		diff[2] = diff[2] >> 5;
+
+		red   = color1[0] + diff[0];
+		green = color1[1] + diff[1];
+		blue  = color1[2] + diff[2];
+
+		if(red < 0 || red > 31)
+		{
+			unsigned int block59_part1, block59_part2;
+			unstuff59bits(block_part1, block_part2, block59_part1, block59_part2);
+			decompressBlockTHUMB59Tc(block59_part1, block59_part2, img, width, height, startx, starty, channelsRGB);
+		}
+		else if (green < 0 || green > 31)
+		{
+			unsigned int block58_part1, block58_part2;
+			unstuff58bits(block_part1, block_part2, block58_part1, block58_part2);
+			decompressBlockTHUMB58Hc(block58_part1, block58_part2, img, width, height, startx, starty, channelsRGB);
+		}
+		else if(blue < 0 || blue > 31)
+		{
+			unsigned int block57_part1, block57_part2;
+
+			unstuff57bits(block_part1, block_part2, block57_part1, block57_part2);
+			decompressBlockPlanar57c(block57_part1, block57_part2, img, width, height, startx, starty, channelsRGB);
+		}
+		else
+		{
+ 			decompressBlockDifferentialWithAlphaC(block_part1, block_part2, img, alphaimg, width, height, startx, starty, channelsRGB);
+		}
+		for(int x=startx; x<startx+4; x++) 
+		{
+			for(int y=starty; y<starty+4; y++) 
+			{
+				alphaimg[channelsA*(x+y*width)]=255;
+			}
+		}
+	}
+	else
+	{
+		// We have diffbit = 0, transparent pixels. Only T-, H- or regular diff-mode possible.
+		
+		// Base color
+		color1[0]= GETBITSHIGH(block_part1, 5, 63);
+		color1[1]= GETBITSHIGH(block_part1, 5, 55);
+		color1[2]= GETBITSHIGH(block_part1, 5, 47);
+
+		// Diff color
+		diff[0]= GETBITSHIGH(block_part1, 3, 58);
+		diff[1]= GETBITSHIGH(block_part1, 3, 50);
+		diff[2]= GETBITSHIGH(block_part1, 3, 42);
+
+		// Extend sign bit to entire byte. 
+		diff[0] = (diff[0] << 5);
+		diff[1] = (diff[1] << 5);
+		diff[2] = (diff[2] << 5);
+		diff[0] = diff[0] >> 5;
+		diff[1] = diff[1] >> 5;
+		diff[2] = diff[2] >> 5;
+
+		red   = color1[0] + diff[0];
+		green = color1[1] + diff[1];
+		blue  = color1[2] + diff[2];
+		if(red < 0 || red > 31)
+		{
+			unsigned int block59_part1, block59_part2;
+			unstuff59bits(block_part1, block_part2, block59_part1, block59_part2);
+			decompressBlockTHUMB59TAlphaC(block59_part1, block59_part2, img, alphaimg, width, height, startx, starty, channelsRGB);
+		}
+		else if(green < 0 || green > 31) 
+		{
+			unsigned int block58_part1, block58_part2;
+			unstuff58bits(block_part1, block_part2, block58_part1, block58_part2);
+			decompressBlockTHUMB58HAlphaC(block58_part1, block58_part2, img, alphaimg, width, height, startx, starty, channelsRGB);
+		}
+		else if(blue < 0 || blue > 31)
+		{
+			unsigned int block57_part1, block57_part2;
+
+			unstuff57bits(block_part1, block_part2, block57_part1, block57_part2);
+			decompressBlockPlanar57c(block57_part1, block57_part2, img, width, height, startx, starty, channelsRGB);
+			for(int x=startx; x<startx+4; x++) 
+			{
+				for(int y=starty; y<starty+4; y++) 
+				{
+					alphaimg[channelsA*(x+y*width)]=255;
+				}
+			}
+		}
+		else
+			decompressBlockDifferentialWithAlphaC(block_part1, block_part2, img,alphaimg, width, height, startx, starty, channelsRGB);
+	}
+}
+void decompressBlockETC21BitAlpha(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alphaimg, int width, int height, int startx, int starty)
+{
+  decompressBlockETC21BitAlphaC(block_part1, block_part2, img, alphaimg, width, height, startx, starty, 3);
+}
+//
+//	Utility functions used for alpha compression
+//
+
+// bit number frompos is extracted from input, and moved to bit number topos in the return value.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+uint8 getbit(uint8 input, int frompos, int topos) 
+{
+	uint8 output=0;
+	if(frompos>topos)
+		return ((1<<frompos)&input)>>(frompos-topos);
+	return ((1<<frompos)&input)<<(topos-frompos);
+}
+
+// takes as input a value, returns the value clamped to the interval [0,255].
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int clamp(int val) 
+{
+	if(val<0)
+		val=0;
+	if(val>255)
+		val=255;
+	return val;
+}
+
+// Decodes tha alpha component in a block coded with GL_COMPRESSED_RGBA8_ETC2_EAC.
+// Note that this decoding is slightly different from that of GL_COMPRESSED_R11_EAC.
+// However, a hardware decoder can share gates between the two formats as explained
+// in the specification under GL_COMPRESSED_R11_EAC.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockAlphaC(uint8* data, uint8* img, int width, int height, int ix, int iy, int channels) 
+{
+	int alpha = data[0];
+	int table = data[1];
+	
+	int bit=0;
+	int byte=2;
+	//extract an alpha value for each pixel.
+	for(int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			//Extract table index
+			int index=0;
+			for(int bitpos=0; bitpos<3; bitpos++) 
+			{
+				index|=getbit(data[byte],7-bit,2-bitpos);
+				bit++;
+				if(bit>7) 
+				{
+					bit=0;
+					byte++;
+				}
+			}
+			img[(ix+x+(iy+y)*width)*channels]=clamp(alpha +alphaTable[table][index]);
+		}
+	}
+}
+void decompressBlockAlpha(uint8* data, uint8* img, int width, int height, int ix, int iy) 
+{
+  decompressBlockAlphaC(data, img, width, height, ix, iy, 1);
+}
+
+// Does decompression and then immediately converts from 11 bit signed to a 16-bit format.
+// 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int16 get16bits11signed(int base, int table, int mul, int index) 
+{
+	int elevenbase = base-128;
+	if(elevenbase==-128)
+		elevenbase=-127;
+	elevenbase*=8;
+	//i want the positive value here
+	int tabVal = -alphaBase[table][3-index%4]-1;
+	//and the sign, please
+	int sign = 1-(index/4);
+	
+	if(sign)
+		tabVal=tabVal+1;
+	int elevenTabVal = tabVal*8;
+
+	if(mul!=0)
+		elevenTabVal*=mul;
+	else
+		elevenTabVal/=8;
+
+	if(sign)
+		elevenTabVal=-elevenTabVal;
+
+	//calculate sum
+	int elevenbits = elevenbase+elevenTabVal;
+
+	//clamp..
+	if(elevenbits>=1024)
+		elevenbits=1023;
+	else if(elevenbits<-1023)
+		elevenbits=-1023;
+	//this is the value we would actually output.. 
+	//but there aren't any good 11-bit file or uncompressed GL formats
+	//so we extend to 15 bits signed.
+	sign = elevenbits<0;
+	elevenbits=abs(elevenbits);
+	int16 fifteenbits = (elevenbits<<5)+(elevenbits>>5);
+	int16 sixteenbits=fifteenbits;
+
+	if(sign)
+		sixteenbits=-sixteenbits;
+	
+	return sixteenbits;
+}
+
+// Does decompression and then immediately converts from 11 bit signed to a 16-bit format 
+// Calculates the 11 bit value represented by base, table, mul and index, and extends it to 16 bits.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+uint16 get16bits11bits(int base, int table, int mul, int index) 
+{
+	int elevenbase = base*8+4;
+
+	//i want the positive value here
+	int tabVal = -alphaBase[table][3-index%4]-1;
+	//and the sign, please
+	int sign = 1-(index/4);
+	
+	if(sign)
+		tabVal=tabVal+1;
+	int elevenTabVal = tabVal*8;
+
+	if(mul!=0)
+		elevenTabVal*=mul;
+	else
+		elevenTabVal/=8;
+
+	if(sign)
+		elevenTabVal=-elevenTabVal;
+
+	//calculate sum
+	int elevenbits = elevenbase+elevenTabVal;
+
+	//clamp..
+	if(elevenbits>=256*8)
+		elevenbits=256*8-1;
+	else if(elevenbits<0)
+		elevenbits=0;
+	//elevenbits now contains the 11 bit alpha value as defined in the spec.
+
+	//extend to 16 bits before returning, since we don't have any good 11-bit file formats.
+	uint16 sixteenbits = (elevenbits<<5)+(elevenbits>>6);
+
+	return sixteenbits;
+}
+
+// Decompresses a block using one of the GL_COMPRESSED_R11_EAC or GL_COMPRESSED_SIGNED_R11_EAC-formats
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockAlpha16bitC(uint8* data, uint8* img, int width, int height, int ix, int iy, int channels) 
+{
+	int alpha = data[0];
+	int table = data[1];
+
+	if(formatSigned) 
+	{
+		//if we have a signed format, the base value is given as a signed byte. We convert it to (0-255) here,
+		//so more code can be shared with the unsigned mode.
+		alpha = *((signed char*)(&data[0]));
+		alpha = alpha+128;
+	}
+
+	int bit=0;
+	int byte=2;
+	//extract an alpha value for each pixel.
+	for(int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			//Extract table index
+			int index=0;
+			for(int bitpos=0; bitpos<3; bitpos++) 
+			{
+				index|=getbit(data[byte],7-bit,2-bitpos);
+				bit++;
+				if(bit>7) 
+				{
+					bit=0;
+					byte++;
+				}
+			}
+			int windex = channels*(2*(ix+x+(iy+y)*width));
+#if !PGMOUT
+			if(formatSigned)
+			{
+				*(int16 *)&img[windex] = get16bits11signed(alpha,(table%16),(table/16),index);
+			}
+			else
+			{
+				*(uint16 *)&img[windex] = get16bits11bits(alpha,(table%16),(table/16),index);
+			}
+#else
+			//make data compatible with the .pgm format. See the comment in compressBlockAlpha16() for details.
+			uint16 uSixteen;
+			if (formatSigned)
+			{
+				//the pgm-format only allows unsigned images,
+				//so we add 2^15 to get a 16-bit value.
+				uSixteen = get16bits11signed(alpha,(table%16),(table/16),index) + 256*128;
+			}
+			else
+			{
+				uSixteen = get16bits11bits(alpha,(table%16),(table/16),index);
+			}
+			//byte swap for pgm
+			img[windex] = uSixteen/256;
+			img[windex+1] = uSixteen%256;
+#endif
+
+		}
+	}			
+}
+
+void decompressBlockAlpha16bit(uint8* data, uint8* img, int width, int height, int ix, int iy)
+{
+  decompressBlockAlpha16bitC(data, img, width, height, ix, iy, 1);
+}
diff --git a/extern/etcpack/etcpack.cxx b/extern/etcpack/etcpack.cxx
new file mode 100755
index 0000000..6448b67
--- /dev/null
+++ b/extern/etcpack/etcpack.cxx
@@ -0,0 +1,16091 @@
+//// etcpack v2.74
+//// 
+//// NO WARRANTY 
+//// 
+//// BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE THE PROGRAM IS PROVIDED
+//// "AS IS". ERICSSON MAKES NO REPRESENTATIONS OF ANY KIND, EXTENDS NO
+//// WARRANTIES OR CONDITIONS OF ANY KIND; EITHER EXPRESS, IMPLIED OR
+//// STATUTORY; INCLUDING, BUT NOT LIMITED TO, EXPRESS, IMPLIED OR
+//// STATUTORY WARRANTIES OR CONDITIONS OF TITLE, MERCHANTABILITY,
+//// SATISFACTORY QUALITY, SUITABILITY AND FITNESS FOR A PARTICULAR
+//// PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+//// PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+//// THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. ERICSSON
+//// MAKES NO WARRANTY THAT THE MANUFACTURE, SALE, OFFERING FOR SALE,
+//// DISTRIBUTION, LEASE, USE OR IMPORTATION UNDER THE LICENSE WILL BE FREE
+//// FROM INFRINGEMENT OF PATENTS, COPYRIGHTS OR OTHER INTELLECTUAL
+//// PROPERTY RIGHTS OF OTHERS, AND THE VALIDITY OF THE LICENSE IS SUBJECT
+//// TO YOUR SOLE RESPONSIBILITY TO MAKE SUCH DETERMINATION AND ACQUIRE
+//// SUCH LICENSES AS MAY BE NECESSARY WITH RESPECT TO PATENTS, COPYRIGHT
+//// AND OTHER INTELLECTUAL PROPERTY OF THIRD PARTIES.
+//// 
+//// FOR THE AVOIDANCE OF DOUBT THE PROGRAM (I) IS NOT LICENSED FOR; (II)
+//// IS NOT DESIGNED FOR OR INTENDED FOR; AND (III) MAY NOT BE USED FOR;
+//// ANY MISSION CRITICAL APPLICATIONS SUCH AS, BUT NOT LIMITED TO
+//// OPERATION OF NUCLEAR OR HEALTHCARE COMPUTER SYSTEMS AND/OR NETWORKS,
+//// AIRCRAFT OR TRAIN CONTROL AND/OR COMMUNICATION SYSTEMS OR ANY OTHER
+//// COMPUTER SYSTEMS AND/OR NETWORKS OR CONTROL AND/OR COMMUNICATION
+//// SYSTEMS ALL IN WHICH CASE THE FAILURE OF THE PROGRAM COULD LEAD TO
+//// DEATH, PERSONAL INJURY, OR SEVERE PHYSICAL, MATERIAL OR ENVIRONMENTAL
+//// DAMAGE. YOUR RIGHTS UNDER THIS LICENSE WILL TERMINATE AUTOMATICALLY
+//// AND IMMEDIATELY WITHOUT NOTICE IF YOU FAIL TO COMPLY WITH THIS
+//// PARAGRAPH.
+//// 
+//// IN NO EVENT WILL ERICSSON, BE LIABLE FOR ANY DAMAGES WHATSOEVER,
+//// INCLUDING BUT NOT LIMITED TO PERSONAL INJURY, ANY GENERAL, SPECIAL,
+//// INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR IN
+//// CONNECTION WITH THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
+//// NOT LIMITED TO LOSS OF PROFITS, BUSINESS INTERUPTIONS, OR ANY OTHER
+//// COMMERCIAL DAMAGES OR LOSSES, LOSS OF DATA OR DATA BEING RENDERED
+//// INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+//// THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) REGARDLESS OF THE
+//// THEORY OF LIABILITY (CONTRACT, TORT OR OTHERWISE), EVEN IF SUCH HOLDER
+//// OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+//// 
+//// (C) Ericsson AB 2005-2013. All Rights Reserved.
+//// 
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h> 
+#include <time.h>
+#include <sys/timeb.h>
+#include "image.h"
+
+// Typedefs
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+typedef short int16;
+
+// Functions needed for decrompession ---- in etcdec.cxx
+void read_big_endian_2byte_word(unsigned short *blockadr, FILE *f);
+void read_big_endian_4byte_word(unsigned int *blockadr, FILE *f);
+void unstuff57bits(unsigned int planar_word1, unsigned int planar_word2, unsigned int &planar57_word1, unsigned int &planar57_word2);
+void unstuff59bits(unsigned int thumbT_word1, unsigned int thumbT_word2, unsigned int &thumbT59_word1, unsigned int &thumbT59_word2);
+void unstuff58bits(unsigned int thumbH_word1, unsigned int thumbH_word2, unsigned int &thumbH58_word1, unsigned int &thumbH58_word2);
+void decompressColor(int R_B, int G_B, int B_B, uint8 (colors_RGB444)[2][3], uint8 (colors)[2][3]);
+void calculatePaintColors59T(uint8 d, uint8 p, uint8 (colors)[2][3], uint8 (possible_colors)[4][3]);
+void calculatePaintColors58H(uint8 d, uint8 p, uint8 (colors)[2][3], uint8 (possible_colors)[4][3]);
+void decompressBlockTHUMB59T(unsigned int block_part1, unsigned int block_part2, uint8 *img,int width,int height,int startx,int starty);
+void decompressBlockTHUMB58H(unsigned int block_part1, unsigned int block_part2, uint8 *img,int width,int height,int startx,int starty);
+void decompressBlockPlanar57(unsigned int compressed57_1, unsigned int compressed57_2, uint8 *img,int width,int height,int startx,int starty);
+void decompressBlockDiffFlip(unsigned int block_part1, unsigned int block_part2, uint8 *img,int width,int height,int startx,int starty);
+void decompressBlockETC2(unsigned int block_part1, unsigned int block_part2, uint8 *img,int width,int height,int startx,int starty);
+void decompressBlockDifferentialWithAlpha(unsigned int block_part1,unsigned int block_part2, uint8* img, uint8* alpha, int width, int height, int startx, int starty);
+void decompressBlockETC21BitAlpha(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alphaimg, int width,int height,int startx,int starty);
+void decompressBlockTHUMB58HAlpha(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alpha,int width,int height,int startx,int starty);
+void decompressBlockTHUMB59TAlpha(unsigned int block_part1, unsigned int block_part2, uint8 *img, uint8* alpha,int width,int height,int startx,int starty);
+uint8 getbit(uint8 input, int frompos, int topos);
+int clamp(int val);
+void decompressBlockAlpha(uint8* data,uint8* img,int width,int height,int ix,int iy);
+uint16 get16bits11bits(int base, int table, int mul, int index);
+void decompressBlockAlpha16bit(uint8* data,uint8* img,int width,int height,int ix,int iy); 
+int16 get16bits11signed(int base, int table, int mul, int index);
+void setupAlphaTable();
+
+
+// This source code is quite long. You can make it shorter by not including the
+// code doing the exhaustive code. Then the -slow modes will not work, but the
+// code will be approximately half the number of lines of code.
+// Then the lines between "exhaustive code starts here" and "exhaustive code ends here"
+// can then be removed.
+#define EXHAUSTIVE_CODE_ACTIVE 1
+
+// Remove warnings for unsafe functions such as strcpy
+#pragma warning(disable : 4996)
+// Remove warnings for conversions between different time variables
+#pragma warning(disable : 4244)
+// Remove warnings for negative or too big shifts
+//#pragma warning(disable : 4293)
+ 
+#define CLAMP(ll,x,ul) (((x)<(ll)) ? (ll) : (((x)>(ul)) ? (ul) : (x)))
+// The below code works as CLAMP(0, x, 255) if x < 255
+#define CLAMP_LEFT_ZERO(x) ((~(((int)(x))>>31))&(x))
+// The below code works as CLAMP(0, x, 255) if x is in [0,511]
+#define CLAMP_RIGHT_255(x) (((( ((((int)(x))<<23)>>31)  ))|(x))&0x000000ff)   
+
+#define SQUARE(x) ((x)*(x))
+#define JAS_ROUND(x) (((x) < 0.0 ) ? ((int)((x)-0.5)) : ((int)((x)+0.5)))
+#define JAS_MIN(a,b) ((a) < (b) ? (a) : (b))
+#define JAS_MAX(a,b) ((a) > (b) ? (a) : (b))
+
+// The error metric Wr Wg Wb should be definied so that Wr^2 + Wg^2 + Wb^2 = 1.
+// Hence it is easier to first define the squared values and derive the weights
+// as their square-roots.
+
+#define PERCEPTUAL_WEIGHT_R_SQUARED 0.299
+#define PERCEPTUAL_WEIGHT_G_SQUARED 0.587
+#define PERCEPTUAL_WEIGHT_B_SQUARED 0.114
+
+#define PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000 299
+#define PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000 587
+#define PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000 114
+
+#define RED(img,width,x,y)   img[3*(y*width+x)+0]
+#define GREEN(img,width,x,y) img[3*(y*width+x)+1]
+#define BLUE(img,width,x,y)  img[3*(y*width+x)+2]
+
+#define SHIFT(size,startpos) ((startpos)-(size)+1)
+#define MASK(size, startpos) (((2<<(size-1))-1) << SHIFT(size,startpos))
+#define PUTBITS( dest, data, size, startpos) dest = ((dest & ~MASK(size, startpos)) | ((data << SHIFT(size, startpos)) & MASK(size,startpos)))
+#define SHIFTHIGH(size, startpos) (((startpos)-32)-(size)+1)
+#define MASKHIGH(size, startpos) (((1<<(size))-1) << SHIFTHIGH(size,startpos))
+#define PUTBITSHIGH(dest, data, size, startpos) dest = ((dest & ~MASKHIGH(size, startpos)) | ((data << SHIFTHIGH(size, startpos)) & MASKHIGH(size,startpos)))
+#define GETBITS(source, size, startpos)  (( (source) >> ((startpos)-(size)+1) ) & ((1<<(size)) -1))
+#define GETBITSHIGH(source, size, startpos)  (( (source) >> (((startpos)-32)-(size)+1) ) & ((1<<(size)) -1))
+
+// Thumb macros and definitions
+#define	R_BITS59T 4
+#define G_BITS59T 4
+#define	B_BITS59T 4
+#define	R_BITS58H 4
+#define G_BITS58H 4
+#define	B_BITS58H 4
+#define	MAXIMUM_ERROR (255*255*16*1000)
+#define R 0
+#define G 1
+#define B 2
+#define BLOCKHEIGHT 4
+#define BLOCKWIDTH 4
+#define BINPOW(power) (1<<(power))
+//#define RADIUS 2
+#define	TABLE_BITS_59T 3
+#define	TABLE_BITS_58H 3
+
+// Global tables
+static uint8 table59T[8] = {3,6,11,16,23,32,41,64};  // 3-bit table for the 59 bit T-mode
+static uint8 table58H[8] = {3,6,11,16,23,32,41,64};  // 3-bit table for the 58 bit H-mode
+uint8 weight[3] = {1,1,1};			// Color weight
+
+// Enums
+static enum{PATTERN_H = 0, 
+			PATTERN_T = 1};
+
+static enum{MODE_ETC1, MODE_THUMB_T, MODE_THUMB_H, MODE_PLANAR};
+// The ETC2 package of codecs includes the following codecs:
+//
+// codec                                             enum
+// --------------------------------------------------------
+// GL_COMPRESSED_R11_EAC                            0x9270
+// GL_COMPRESSED_SIGNED_R11_EAC                     0x9271
+// GL_COMPRESSED_RG11_EAC                           0x9272
+// GL_COMPRESSED_SIGNED_RG11_EAC                    0x9273
+// GL_COMPRESSED_RGB8_ETC2                          0x9274
+// GL_COMPRESSED_SRGB8_ETC2                         0x9275
+// GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2      0x9276
+// GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2     0x9277
+// GL_COMPRESSED_RGBA8_ETC2_EAC                     0x9278
+// GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC              0x9279
+// 
+// The older codec ETC1 is not included in the package 
+// GL_ETC1_RGB8_OES                                 0x8d64
+// but since ETC2 is backwards compatible an ETC1 texture can
+// be decoded using the RGB8_ETC2 enum (0x9274)
+// 
+// In a PKM-file, the codecs are stored using the following identifiers
+// 
+// identifier                         value               codec
+// --------------------------------------------------------------------
+// ETC1_RGB_NO_MIPMAPS                  0                 GL_ETC1_RGB8_OES
+// ETC2PACKAGE_RGB_NO_MIPMAPS           1                 GL_COMPRESSED_RGB8_ETC2
+// ETC2PACKAGE_RGBA_NO_MIPMAPS_OLD      2, not used       -
+// ETC2PACKAGE_RGBA_NO_MIPMAPS          3                 GL_COMPRESSED_RGBA8_ETC2_EAC
+// ETC2PACKAGE_RGBA1_NO_MIPMAPS         4                 GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2
+// ETC2PACKAGE_R_NO_MIPMAPS             5                 GL_COMPRESSED_R11_EAC
+// ETC2PACKAGE_RG_NO_MIPMAPS            6                 GL_COMPRESSED_RG11_EAC
+// ETC2PACKAGE_R_SIGNED_NO_MIPMAPS      7                 GL_COMPRESSED_SIGNED_R11_EAC
+// ETC2PACKAGE_RG_SIGNED_NO_MIPMAPS     8                 GL_COMPRESSED_SIGNED_RG11_EAC
+//
+// In the code, the identifiers are not always used strictly. For instance, the
+// identifier ETC2PACKAGE_R_NO_MIPMAPS is sometimes used for both the unsigned
+// (GL_COMPRESSED_R11_EAC) and signed (GL_COMPRESSED_SIGNED_R11_EAC) version of 
+// the codec.
+// 
+static enum{ETC1_RGB_NO_MIPMAPS,ETC2PACKAGE_RGB_NO_MIPMAPS,ETC2PACKAGE_RGBA_NO_MIPMAPS_OLD,ETC2PACKAGE_RGBA_NO_MIPMAPS,ETC2PACKAGE_RGBA1_NO_MIPMAPS,ETC2PACKAGE_R_NO_MIPMAPS,ETC2PACKAGE_RG_NO_MIPMAPS,ETC2PACKAGE_R_SIGNED_NO_MIPMAPS,ETC2PACKAGE_RG_SIGNED_NO_MIPMAPS,ETC2PACKAGE_sRGB_NO_MIPMAPS,ETC2PACKAGE_sRGBA_NO_MIPMAPS,ETC2PACKAGE_sRGBA1_NO_MIPMAPS};
+static enum {MODE_COMPRESS, MODE_UNCOMPRESS, MODE_PSNR};
+static enum {SPEED_SLOW, SPEED_FAST, SPEED_MEDIUM};
+static enum {METRIC_PERCEPTUAL, METRIC_NONPERCEPTUAL};
+static enum {CODEC_ETC, CODEC_ETC2};
+
+int mode = MODE_COMPRESS;
+int speed = SPEED_FAST;
+int metric = METRIC_PERCEPTUAL;
+int codec = CODEC_ETC2;
+int format = ETC2PACKAGE_RGB_NO_MIPMAPS;
+int verbose = true;
+extern int formatSigned;
+int ktxFile=0;
+bool first_time_message = true;
+
+static int scramble[4] = {3, 2, 0, 1};
+static int unscramble[4] = {2, 3, 1, 0};
+
+typedef struct KTX_header_t
+{
+	uint8  identifier[12];
+	unsigned int endianness;
+	unsigned int glType;
+	unsigned int glTypeSize;
+	unsigned int glFormat;
+	unsigned int glInternalFormat;
+	unsigned int glBaseInternalFormat;
+	unsigned int pixelWidth;
+	unsigned int pixelHeight;
+	unsigned int pixelDepth;
+	unsigned int numberOfArrayElements;
+	unsigned int numberOfFaces;
+	unsigned int numberOfMipmapLevels;
+	unsigned int bytesOfKeyValueData;
+} 
+KTX_header;
+#define KTX_IDENTIFIER_REF  { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }
+
+#define KTX_ENDIAN_REF      (0x04030201)
+#define KTX_ENDIAN_REF_REV  (0x01020304)
+
+static enum {GL_R=0x1903,GL_RG=0x8227,GL_RGB=0x1907,GL_RGBA=0x1908};
+#define GL_SRGB                                          0x8C40
+#define GL_SRGB8                                         0x8C41
+#define GL_SRGB8_ALPHA8                                  0x8C43
+#define GL_ETC1_RGB8_OES                                 0x8d64
+#define GL_COMPRESSED_R11_EAC                            0x9270
+#define GL_COMPRESSED_SIGNED_R11_EAC                     0x9271
+#define GL_COMPRESSED_RG11_EAC                           0x9272
+#define GL_COMPRESSED_SIGNED_RG11_EAC                    0x9273
+#define GL_COMPRESSED_RGB8_ETC2                          0x9274
+#define GL_COMPRESSED_SRGB8_ETC2                         0x9275
+#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2      0x9276
+#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2     0x9277
+#define GL_COMPRESSED_RGBA8_ETC2_EAC                     0x9278
+#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC              0x9279
+
+
+int ktx_identifier[] = KTX_IDENTIFIER_REF;
+
+
+//converts indices from  |a0|a1|e0|e1|i0|i1|m0|m1|b0|b1|f0|f1|j0|j1|n0|n1|c0|c1|g0|g1|k0|k1|o0|o1|d0|d1|h0|h1|l0|l1|p0|p1| previously used by T- and H-modes 
+//				         into  |p0|o0|n0|m0|l0|k0|j0|i0|h0|g0|f0|e0|d0|c0|b0|a0|p1|o1|n1|m1|l1|k1|j1|i1|h1|g1|f1|e1|d1|c1|b1|a1| which should be used for all modes.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int indexConversion(int pixelIndices) 
+{
+	int correctIndices = 0;
+	int LSB[4][4];
+	int MSB[4][4];
+	int shift=0;
+	for(int y=3; y>=0; y--) 
+	{
+		for(int x=3; x>=0; x--) 
+		{
+			LSB[x][y] = (pixelIndices>>shift)&1;
+			shift++;
+			MSB[x][y] = (pixelIndices>>shift)&1;
+			shift++;
+		}
+	}
+	shift=0;
+	for(int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			correctIndices|=(LSB[x][y]<<shift);
+			correctIndices|=(MSB[x][y]<<(16+shift));
+			shift++;
+		}
+	}
+	return correctIndices;
+}
+
+// Tests if a file exists.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool fileExist(char *filename)
+{
+	FILE *f=NULL;
+	if((f=fopen(filename,"rb"))!=NULL)
+	{
+		fclose(f);
+		return true;
+	}
+	return false;
+}
+
+// Expand source image so that it is divisible by a factor of four in the x-dimension.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool expandToWidthDivByFour(uint8 *&img, int width, int height, int &expandedwidth, int &expandedheight, int bitrate)
+{
+	int wdiv4;
+	int xx, yy;
+	uint8 *newimg;
+
+	wdiv4 = width /4;
+	if( !(wdiv4 *4 == width) )
+	{
+     	expandedwidth = (wdiv4 + 1)*4;
+		expandedheight = height;
+	    newimg=(uint8*) malloc(3*expandedwidth*expandedheight*bitrate/8);
+		if(!newimg)
+		{
+			printf("Could not allocate memory to expand width\n");
+			return false;
+		}
+
+		// First copy image
+		for(yy = 0; yy<height; yy++)
+		{
+			for(xx = 0; xx < width; xx++)
+			{
+				//we have 3*bitrate/8 bytes for each pixel..
+				for(int i=0; i<3*bitrate/8; i++) 
+				{
+					newimg[(yy * expandedwidth+ xx)*3*bitrate/8 + i] = img[(yy * width+xx)*3*bitrate/8 + i];
+
+				}
+			}
+		}
+
+		// Then make the last column of pixels the same as the previous column.
+
+		for(yy = 0; yy< height; yy++)
+		{
+			for(xx = width; xx < expandedwidth; xx++)
+			{
+				for(int i=0; i<3*bitrate/8; i++) 
+				{
+					newimg[(yy * expandedwidth+xx)*3*bitrate/8 + i] = img[(yy * width+(width-1))*3*bitrate/8 + i];
+				}
+			}
+		}
+
+		// Now free the old image
+		free(img);
+
+		// Use the new image
+		img = newimg;
+
+		return true;
+	}
+	else
+	{
+		printf("Image already of even width\n");
+		expandedwidth = width;
+		expandedheight = height;
+		return false;
+	}
+}
+
+// Expand source image so that it is divisible by a factor of four in the y-dimension.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool expandToHeightDivByFour(uint8 *&img, int width, int height, int &expandedwidth, int &expandedheight, int bitrate)
+{
+	int hdiv4;
+	int xx, yy;
+	int numlinesmissing;
+	uint8 *newimg;
+
+	hdiv4 = height/4;
+
+	if( !(hdiv4 * 4 == height) )
+	{
+		expandedwidth = width;
+		expandedheight = (hdiv4 + 1) * 4;
+		numlinesmissing = expandedheight - height;
+		newimg=(uint8*)malloc(3*expandedwidth*expandedheight*bitrate/8);
+		if(!newimg)
+		{
+			printf("Could not allocate memory to expand height\n");
+			return false;
+		}
+		
+		// First copy image. No need to reformat data.
+
+		for(xx = 0; xx<3*width*height*bitrate/8; xx++)
+			newimg[xx] = img[xx];
+
+		// Then copy up to three lines.
+
+		for(yy = height; yy < height + numlinesmissing; yy++)
+		{
+			for(xx = 0; xx<width; xx++)
+			{
+				for(int i=0; i<3*bitrate/8; i++) 
+				{
+					newimg[(yy*width+xx)*3*bitrate/8 + i] = img[((height-1)*width+xx)*3*bitrate/8 + i];
+				}
+			}
+		}
+
+		// Now free the old image;
+		free(img);
+
+		// Use the new image:
+		img = newimg;
+
+		return true;
+
+	}
+	else
+	{
+		printf("Image height already divisible by four.\n");
+		expandedwidth = width;
+		expandedheight = height;
+		return true;
+	}
+}
+
+// Find the position of a file extension such as .ppm or .pkm
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int find_pos_of_extension(char *src)
+{
+	int q=strlen(src);
+	while(q>=0)		// find file name extension
+	{
+		if(src[q]=='.') break;
+		q--;
+	}
+	if(q<0) 
+		return -1;
+	else
+		return q;
+}
+
+// Read source file. Does conversion if file format is not .ppm.
+// Will expand file to be divisible by four in the x- and y- dimension.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool readSrcFile(char *filename,uint8 *&img,int &width,int &height, int &expandedwidth, int &expandedheight)
+{
+	int w1,h1;
+	int wdiv4, hdiv4;
+	char str[255];
+
+
+	// Delete temp file if it exists.
+	if(fileExist("tmp.ppm"))
+	{
+		sprintf(str, "del tmp.ppm\n");
+		system(str);
+	}
+
+	int q = find_pos_of_extension(filename);
+	if(!strcmp(&filename[q],".ppm")) 
+	{
+		// Already a .ppm file. Just copy. 
+		sprintf(str,"copy %s tmp.ppm \n", filename);
+		printf("Copying source file to tmp.ppm\n", filename);
+	}
+	else
+	{
+		// Converting from other format to .ppm 
+		// 
+		// Use your favorite command line image converter program,
+		// for instance Image Magick. Just make sure the syntax can
+		// be written as below:
+		// 
+		// C:\imconv source.jpg dest.ppm
+		//
+		sprintf(str,"imconv %s tmp.ppm\n", filename);
+		printf("Converting source file from %s to .ppm\n", filename);
+	}
+	// Execute system call
+	system(str);
+
+	int bitrate=8;
+	if(format==ETC2PACKAGE_RG_NO_MIPMAPS)
+		bitrate=16;
+	if(fReadPPM("tmp.ppm",w1,h1,img,bitrate))
+	{
+		width=w1;
+		height=h1;
+		system("del tmp.ppm");
+
+		// Width must be divisible by 4 and height must be
+		// divisible by 4. Otherwise, we will expand the image
+
+		wdiv4 = width / 4;
+		hdiv4 = height / 4;
+
+		expandedwidth = width;
+		expandedheight = height;
+
+		if( !(wdiv4 * 4 == width) )
+		{
+			printf(" Width = %d is not divisible by four... ", width);
+			printf(" expanding image in x-dir... ");
+			if(expandToWidthDivByFour(img, width, height, expandedwidth, expandedheight,bitrate))
+			{
+				printf("OK.\n");
+			}
+			else
+			{
+				printf("\n Error: could not expand image\n");
+				return false;
+			}
+		}
+		if( !(hdiv4 * 4 == height))
+		{
+			printf(" Height = %d is not divisible by four... ", height);
+			printf(" expanding image in y-dir...");
+			if(expandToHeightDivByFour(img, expandedwidth, height, expandedwidth, expandedheight,bitrate))
+			{
+				printf("OK.\n");
+			}
+			else
+			{
+				printf("\n Error: could not expand image\n");
+				return false;
+			}
+		}
+		if(!(expandedwidth == width && expandedheight == height))
+		   printf("Active pixels: %dx%d. Expanded image: %dx%d\n",width,height,expandedwidth,expandedheight);
+		return true;
+	}
+	else
+	{
+		printf("Could not read tmp.ppm file\n");
+		exit(1);	
+	}
+	return false;
+
+}
+
+// Reads a file without expanding it to be divisible by 4.
+// Is used when doing PSNR calculation between two files.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool readSrcFileNoExpand(char *filename,uint8 *&img,int &width,int &height)
+{
+	int w1,h1;
+	char str[255];
+
+
+	// Delete temp file if it exists.
+	if(fileExist("tmp.ppm"))
+	{
+		sprintf(str, "del tmp.ppm\n");
+		system(str);
+	}
+
+
+	int q = find_pos_of_extension(filename);
+	if(!strcmp(&filename[q],".ppm")) 
+	{
+		// Already a .ppm file. Just copy. 
+		sprintf(str,"copy %s tmp.ppm \n", filename);
+		printf("Copying source file to tmp.ppm\n", filename);
+	}
+	else
+	{
+		// Converting from other format to .ppm 
+		// 
+		// Use your favorite command line image converter program,
+		// for instance Image Magick. Just make sure the syntax can
+		// be written as below:
+		// 
+		// C:\imconv source.jpg dest.ppm
+		//
+		sprintf(str,"imconv %s tmp.ppm\n", filename);
+//		printf("Converting source file from %s to .ppm\n", filename);
+	}
+	// Execute system call
+	system(str);
+
+	if(fReadPPM("tmp.ppm",w1,h1,img,8))
+	{
+		width=w1;
+		height=h1;
+		system("del tmp.ppm");
+
+		return true;
+	}
+	return false;
+}
+
+// Parses the arguments from the command line.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void readArguments(int argc,char *argv[],char* src,char *dst)
+{
+	int q;
+
+	//new code!! do this in a more nicer way!
+	bool srcfound=false,dstfound=false;
+	for(int i=1; i<argc; i++) 
+	{
+		//loop through the arguments!
+		//first check for flags..
+		if(argv[i][0]=='-') 
+		{
+			if(i==argc-1) 
+			{
+				printf("flag missing argument: %s!\n");
+				exit(1);
+			}
+			//handle speed flag
+			if(!strcmp(argv[i],"-s"))  
+			{
+				// We have argument -s. Now check for slow, medium or fast.
+				if(!strcmp(argv[i+1],"slow")) 
+					speed = SPEED_SLOW;
+				else if(!strcmp(argv[i+1],"medium")) 
+					speed = SPEED_MEDIUM;
+				else if(!strcmp(argv[i+1],"fast")) 
+					speed = SPEED_FAST;
+				else 
+				{
+					printf("Error: %s not part of flag %s\n",argv[i+1], argv[i]);
+					exit(1);
+				}
+			}
+			//handle verbose flag
+			else if(!strcmp(argv[i],"-v"))  
+			{
+				// We have argument -s. Now check for slow, medium or fast.
+				if(!strcmp(argv[i+1],"off")) 
+					verbose = false;
+				else if(!strcmp(argv[i+1],"on")) 
+					verbose = true;
+				else 
+				{
+					printf("Error: %s not part of flag %s\n",argv[i+1], argv[i]);
+					exit(1);
+				}
+			}
+			//error metric flag
+			else if(!strcmp(argv[i],"-e")) 	
+			{
+				// We have argument -e. Now check for perceptual or nonperceptual
+				if(!strcmp(argv[i+1],"perceptual")) 
+					metric = METRIC_PERCEPTUAL;
+				else if(!strcmp(argv[i+1],"nonperceptual")) 
+					metric = METRIC_NONPERCEPTUAL;
+				else 
+				{
+					printf("Error: %s not part of flag %s\n",argv[i+1], argv[i]);
+					exit(1);
+				}
+			}
+			//codec flag
+			else if(!strcmp(argv[i],"-c")) 
+			{
+				// We have argument -c. Now check for perceptual or nonperceptual
+				if(!strcmp(argv[i+1],"etc") || !strcmp(argv[i+1],"etc1"))
+					codec = CODEC_ETC;
+				else if(!strcmp(argv[i+1],"etc2")) 
+					codec = CODEC_ETC2;
+				else 
+				{
+					printf("Error: %s not part of flag %s\n",argv[i+1], argv[i]);
+					exit(1);
+				}
+			}
+			//format flag
+			else if(!strcmp(argv[i],"-f")) 
+			{
+				if(!strcmp(argv[i+1],"R"))
+					format=ETC2PACKAGE_R_NO_MIPMAPS;
+				else if(!strcmp(argv[i+1],"RG"))
+					format=ETC2PACKAGE_RG_NO_MIPMAPS;
+				else if(!strcmp(argv[i+1],"R_signed")) 
+				{
+					format=ETC2PACKAGE_R_NO_MIPMAPS;
+					formatSigned=1;
+				}
+				else if(!strcmp(argv[i+1],"RG_signed")) 
+				{
+					format=ETC2PACKAGE_RG_NO_MIPMAPS;
+					formatSigned=1;
+				}
+				else if(!strcmp(argv[i+1],"RGB"))
+					format=ETC2PACKAGE_RGB_NO_MIPMAPS;
+				else if(!strcmp(argv[i+1],"sRGB"))
+					format=ETC2PACKAGE_sRGB_NO_MIPMAPS;
+				else if(!strcmp(argv[i+1],"RGBA")||!strcmp(argv[i+1],"RGBA8"))
+					format=ETC2PACKAGE_RGBA_NO_MIPMAPS;
+				else if(!strcmp(argv[i+1],"sRGBA")||!strcmp(argv[i+1],"sRGBA8"))
+					format=ETC2PACKAGE_sRGBA_NO_MIPMAPS;
+				else if(!strcmp(argv[i+1],"RGBA1"))
+					format=ETC2PACKAGE_RGBA1_NO_MIPMAPS;
+				else if(!strcmp(argv[i+1],"sRGBA1"))
+					format=ETC2PACKAGE_sRGBA1_NO_MIPMAPS;
+				else 
+				{
+					printf("Error: %s not part of flag %s\n",argv[i+1], argv[i]);
+					exit(1);
+				}
+			}
+			else if(!strcmp(argv[i],"-p")) 
+			{
+				mode=MODE_PSNR;
+				i--; //ugly way of negating the increment of i done later because -p doesn't have an argument.
+			}
+			else 
+			{
+				printf("Error: cannot interpret flag %s %s\n",argv[i], argv[i+1]);
+				exit(1);
+			}
+			//don't read the flag argument next iteration..
+			i++;
+		}
+		//this isn't a flag, so must be src or dst
+		else 
+		{
+			if(srcfound&&dstfound) 
+			{
+				printf("too many arguments! expecting src, dst; found %s, %s, %s\n",src,dst,argv[i]);
+				exit(1);
+			}
+			else if(srcfound) 
+			{
+				strcpy(dst,argv[i]);
+				dstfound=true;
+			}
+			else 
+			{
+				strcpy(src,argv[i]);
+				srcfound=true;
+			}
+		}
+	}
+	if(!srcfound&&dstfound) 
+	{
+		printf("too few arguments! expecting src, dst\n");
+		exit(1);
+	}
+	if(mode==MODE_PSNR)
+		return;
+	//check source/destination.. is this compression or decompression?
+	q = find_pos_of_extension(src);
+	if(q<0) 
+	{
+		printf("invalid source file: %s\n",src);
+		exit(1);
+	}
+
+	// If we have etcpack img.pkm img.any
+
+	if(!strncmp(&src[q],".pkm",4)) 
+	{
+		// First argument is .pkm. Decompress. 
+		mode = MODE_UNCOMPRESS;			// uncompress from binary file format .pkm
+	}
+	else if(!strncmp(&src[q],".ktx",4)) 
+	{
+		// First argument is .ktx. Decompress. 
+		mode = MODE_UNCOMPRESS;			// uncompress from binary file format .pkm
+		ktxFile=true;
+		printf("decompressing ktx\n");
+	}
+	else
+	{
+		// The first argument was not .pkm. The second argument must then be .pkm.
+		q = find_pos_of_extension(dst);
+		if(q<0) 
+		{
+			printf("invalid destination file: %s\n",src);
+			exit(1);
+		}
+		if(!strncmp(&dst[q],".pkm",4)) 
+		{
+			// Second argument is .pkm. Compress. 
+			mode = MODE_COMPRESS;			// compress to binary file format .pkm
+		}
+		else if(!strncmp(&dst[q],".ktx",4)) 
+		{
+			// Second argument is .ktx. Compress. 
+			ktxFile=true;
+			mode = MODE_COMPRESS;			// compress to binary file format .pkm
+			printf("compressing to ktx\n");
+		}
+		else 
+		{
+			printf("source or destination must be a .pkm or .ktx file\n");
+			exit(1);
+		}
+	}
+	//do some sanity check stuff..
+	if(codec==CODEC_ETC&&format!=ETC2PACKAGE_RGB_NO_MIPMAPS) 
+	{
+		printf("ETC1 codec only supports RGB format\n");
+		exit(1);
+	}
+	else if(codec==CODEC_ETC)
+		format=ETC1_RGB_NO_MIPMAPS;
+}
+
+static int compressParams[16][4];
+const int compressParamsFast[32] = {  -8,  -2,  2,   8,
+									 -17,  -5,  5,  17,
+									 -29,  -9,  9,  29,
+									 -42, -13, 13,  42,
+ 									 -60, -18, 18,  60,
+									 -80, -24, 24,  80,
+									-106, -33, 33, 106,
+									-183, -47, 47, 183};
+
+bool readCompressParams(void)
+{
+	compressParams[0][0]  =  -8; compressParams[0][1]  =  -2; compressParams[0][2]  =  2; compressParams[0][3]  =   8;
+	compressParams[1][0]  =  -8; compressParams[1][1]  =  -2; compressParams[1][2]  =  2; compressParams[1][3]  =   8;
+	compressParams[2][0]  = -17; compressParams[2][1]  =  -5; compressParams[2][2]  =  5; compressParams[2][3]  =  17;
+	compressParams[3][0]  = -17; compressParams[3][1]  =  -5; compressParams[3][2]  =  5; compressParams[3][3]  =  17;
+	compressParams[4][0]  = -29; compressParams[4][1]  =  -9; compressParams[4][2]  =  9; compressParams[4][3]  =  29;
+	compressParams[5][0]  = -29; compressParams[5][1]  =  -9; compressParams[5][2]  =  9; compressParams[5][3]  =  29;
+	compressParams[6][0]  = -42; compressParams[6][1]  = -13; compressParams[6][2]  = 13; compressParams[6][3]  =  42;
+	compressParams[7][0]  = -42; compressParams[7][1]  = -13; compressParams[7][2]  = 13; compressParams[7][3]  =  42;
+ 	compressParams[8][0]  = -60; compressParams[8][1]  = -18; compressParams[8][2]  = 18; compressParams[8][3]  =  60;
+ 	compressParams[9][0]  = -60; compressParams[9][1]  = -18; compressParams[9][2]  = 18; compressParams[9][3]  =  60;
+	compressParams[10][0] = -80; compressParams[10][1] = -24; compressParams[10][2] = 24; compressParams[10][3] =  80;
+	compressParams[11][0] = -80; compressParams[11][1] = -24; compressParams[11][2] = 24; compressParams[11][3] =  80;
+	compressParams[12][0] =-106; compressParams[12][1] = -33; compressParams[12][2] = 33; compressParams[12][3] = 106;
+	compressParams[13][0] =-106; compressParams[13][1] = -33; compressParams[13][2] = 33; compressParams[13][3] = 106;
+	compressParams[14][0] =-183; compressParams[14][1] = -47; compressParams[14][2] = 47; compressParams[14][3] = 183;
+	compressParams[15][0] =-183; compressParams[15][1] = -47; compressParams[15][2] = 47; compressParams[15][3] = 183;
+	
+	return true;
+}
+
+// Computes the average color in a 2x4 area and returns the average color as a float.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void computeAverageColor2x4noQuantFloat(uint8 *img,int width,int height,int startx,int starty,float *avg_color)
+{
+	int r=0,g=0,b=0;
+	for(int y=starty; y<starty+4; y++)
+	{
+		for(int x=startx; x<startx+2; x++)
+		{
+			r+=RED(img,width,x,y);
+			g+=GREEN(img,width,x,y);
+			b+=BLUE(img,width,x,y);
+		}
+	}
+
+	avg_color[0]=(float)(r/8.0);
+	avg_color[1]=(float)(g/8.0);
+	avg_color[2]=(float)(b/8.0);
+
+}
+
+// Computes the average color in a 4x2 area and returns the average color as a float.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void computeAverageColor4x2noQuantFloat(uint8 *img,int width,int height,int startx,int starty,float *avg_color)
+{
+	int r=0,g=0,b=0;
+	for(int y=starty; y<starty+2; y++)
+	{
+		for(int x=startx; x<startx+4; x++)
+		{
+			r+=RED(img,width,x,y);
+			g+=GREEN(img,width,x,y);
+			b+=BLUE(img,width,x,y);
+		}
+	}
+
+	avg_color[0]=(float)(r/8.0);
+	avg_color[1]=(float)(g/8.0);
+	avg_color[2]=(float)(b/8.0);
+}
+
+// Finds all pixel indices for a 2x4 block.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int compressBlockWithTable2x4(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color,int table,unsigned int *pixel_indices_MSBp, unsigned int *pixel_indices_LSBp)
+{
+	uint8 orig[3],approx[3];
+	unsigned int pixel_indices_MSB=0, pixel_indices_LSB=0, pixel_indices = 0;
+	int sum_error=0;
+	int q, i;
+
+
+	i = 0;
+	for(int x=startx; x<startx+2; x++)
+	{
+		for(int y=starty; y<starty+4; y++)
+		{
+			int err;
+			int best=0;
+			int min_error=255*255*3*16;
+			orig[0]=RED(img,width,x,y);
+			orig[1]=GREEN(img,width,x,y);
+			orig[2]=BLUE(img,width,x,y);
+
+			for(q=0;q<4;q++)
+			{
+				approx[0]=CLAMP(0, avg_color[0]+compressParams[table][q],255);
+				approx[1]=CLAMP(0, avg_color[1]+compressParams[table][q],255);
+				approx[2]=CLAMP(0, avg_color[2]+compressParams[table][q],255);
+
+				// Here we just use equal weights to R, G and B. Although this will
+				// give visually worse results, it will give a better PSNR score. 
+				err=SQUARE(approx[0]-orig[0]) + SQUARE(approx[1]-orig[1]) + SQUARE(approx[2]-orig[2]);
+				if(err<min_error)
+				{
+					min_error=err;
+					best=q;
+				}
+
+			}
+			pixel_indices = scramble[best];
+
+			PUTBITS( pixel_indices_MSB, (pixel_indices >> 1), 1, i);
+			PUTBITS( pixel_indices_LSB, (pixel_indices & 1) , 1, i);
+
+			i++;
+
+			// In order to simplify hardware, the table {-12, -4, 4, 12} is indexed {11, 10, 00, 01}
+			// so that first bit is sign bit and the other bit is size bit (4 or 12). 
+			// This means that we have to scramble the bits before storing them. 
+			sum_error+=min_error;
+		}
+	}
+
+	*pixel_indices_MSBp = pixel_indices_MSB;
+	*pixel_indices_LSBp = pixel_indices_LSB;
+	return sum_error;
+}
+
+#define MAXERR1000 1000*255*255*16
+
+// Finds all pixel indices for a 2x4 block using perceptual weighting of error.
+// Done using fixed poinit arithmetics where weights are multiplied by 1000.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockWithTable2x4percep1000(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color,int table,unsigned int *pixel_indices_MSBp, unsigned int *pixel_indices_LSBp)
+{
+	uint8 orig[3],approx[3];
+	unsigned int pixel_indices_MSB=0, pixel_indices_LSB=0, pixel_indices = 0;
+	unsigned int sum_error=0;
+	int q, i;
+
+	i = 0;
+	for(int x=startx; x<startx+2; x++)
+	{
+		for(int y=starty; y<starty+4; y++)
+		{
+			unsigned int err;
+			int best=0;
+			unsigned int min_error=MAXERR1000;
+			orig[0]=RED(img,width,x,y);
+			orig[1]=GREEN(img,width,x,y);
+			orig[2]=BLUE(img,width,x,y);
+
+			for(q=0;q<4;q++)
+			{
+				approx[0]=CLAMP(0, avg_color[0]+compressParams[table][q],255);
+				approx[1]=CLAMP(0, avg_color[1]+compressParams[table][q],255);
+				approx[2]=CLAMP(0, avg_color[2]+compressParams[table][q],255);
+
+				// Here we just use equal weights to R, G and B. Although this will
+				// give visually worse results, it will give a better PSNR score. 
+  				err = (PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE((approx[0]-orig[0])) 
+					 + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE((approx[1]-orig[1])) 
+					 + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*SQUARE((approx[2]-orig[2])));
+				if(err<min_error)
+				{
+					min_error=err;
+					best=q;
+				}
+
+			}
+
+			pixel_indices = scramble[best];
+
+			PUTBITS( pixel_indices_MSB, (pixel_indices >> 1), 1, i);
+			PUTBITS( pixel_indices_LSB, (pixel_indices & 1) , 1, i);
+
+			i++;
+
+			// In order to simplify hardware, the table {-12, -4, 4, 12} is indexed {11, 10, 00, 01}
+			// so that first bit is sign bit and the other bit is size bit (4 or 12). 
+			// This means that we have to scramble the bits before storing them. 
+
+			
+			sum_error+=min_error;
+		}
+
+	}
+
+	*pixel_indices_MSBp = pixel_indices_MSB;
+	*pixel_indices_LSBp = pixel_indices_LSB;
+
+	return sum_error;
+}
+
+// Finds all pixel indices for a 2x4 block using perceptual weighting of error.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+float compressBlockWithTable2x4percep(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color,int table,unsigned int *pixel_indices_MSBp, unsigned int *pixel_indices_LSBp)
+{
+	uint8 orig[3],approx[3];
+	unsigned int pixel_indices_MSB=0, pixel_indices_LSB=0, pixel_indices = 0;
+	float sum_error=0;
+	int q, i;
+
+	double wR2 = PERCEPTUAL_WEIGHT_R_SQUARED;
+	double wG2 = PERCEPTUAL_WEIGHT_G_SQUARED;
+	double wB2 = PERCEPTUAL_WEIGHT_B_SQUARED;
+
+	i = 0;
+	for(int x=startx; x<startx+2; x++)
+	{
+		for(int y=starty; y<starty+4; y++)
+		{
+			float err;
+			int best=0;
+			float min_error=255*255*3*16;
+			orig[0]=RED(img,width,x,y);
+			orig[1]=GREEN(img,width,x,y);
+			orig[2]=BLUE(img,width,x,y);
+
+			for(q=0;q<4;q++)
+			{
+				approx[0]=CLAMP(0, avg_color[0]+compressParams[table][q],255);
+				approx[1]=CLAMP(0, avg_color[1]+compressParams[table][q],255);
+				approx[2]=CLAMP(0, avg_color[2]+compressParams[table][q],255);
+
+				// Here we just use equal weights to R, G and B. Although this will
+				// give visually worse results, it will give a better PSNR score. 
+  				err=(float)(wR2*SQUARE((approx[0]-orig[0])) + (float)wG2*SQUARE((approx[1]-orig[1])) + (float)wB2*SQUARE((approx[2]-orig[2])));
+				if(err<min_error)
+				{
+					min_error=err;
+					best=q;
+				}
+			}
+
+			pixel_indices = scramble[best];
+
+			PUTBITS( pixel_indices_MSB, (pixel_indices >> 1), 1, i);
+			PUTBITS( pixel_indices_LSB, (pixel_indices & 1) , 1, i);
+
+			i++;
+
+			// In order to simplify hardware, the table {-12, -4, 4, 12} is indexed {11, 10, 00, 01}
+			// so that first bit is sign bit and the other bit is size bit (4 or 12). 
+			// This means that we have to scramble the bits before storing them. 
+		
+			sum_error+=min_error;
+		}
+	}
+
+	*pixel_indices_MSBp = pixel_indices_MSB;
+	*pixel_indices_LSBp = pixel_indices_LSB;
+
+	return sum_error;
+}
+
+// Finds all pixel indices for a 4x2 block.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int compressBlockWithTable4x2(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color,int table,unsigned int *pixel_indices_MSBp, unsigned int *pixel_indices_LSBp)
+{
+	uint8 orig[3],approx[3];
+	unsigned int pixel_indices_MSB=0, pixel_indices_LSB=0, pixel_indices = 0;
+	int sum_error=0;
+	int q;
+	int i;
+
+	i = 0;
+	for(int x=startx; x<startx+4; x++)
+	{
+		for(int y=starty; y<starty+2; y++)
+		{
+			int err;
+			int best=0;
+			int min_error=255*255*3*16;
+			orig[0]=RED(img,width,x,y);
+			orig[1]=GREEN(img,width,x,y);
+			orig[2]=BLUE(img,width,x,y);
+
+			for(q=0;q<4;q++)
+			{
+				approx[0]=CLAMP(0, avg_color[0]+compressParams[table][q],255);
+				approx[1]=CLAMP(0, avg_color[1]+compressParams[table][q],255);
+				approx[2]=CLAMP(0, avg_color[2]+compressParams[table][q],255);
+
+				// Here we just use equal weights to R, G and B. Although this will
+				// give visually worse results, it will give a better PSNR score. 
+				err=SQUARE(approx[0]-orig[0]) + SQUARE(approx[1]-orig[1]) + SQUARE(approx[2]-orig[2]);
+				if(err<min_error)
+				{
+					min_error=err;
+					best=q;
+				}
+			}
+			pixel_indices = scramble[best];
+
+			PUTBITS( pixel_indices_MSB, (pixel_indices >> 1), 1, i);
+			PUTBITS( pixel_indices_LSB, (pixel_indices & 1) , 1, i);
+			i++;
+
+			// In order to simplify hardware, the table {-12, -4, 4, 12} is indexed {11, 10, 00, 01}
+			// so that first bit is sign bit and the other bit is size bit (4 or 12). 
+			// This means that we have to scramble the bits before storing them. 
+
+			sum_error+=min_error;
+		}
+		i+=2;
+	}
+
+	*pixel_indices_MSBp = pixel_indices_MSB;
+	*pixel_indices_LSBp = pixel_indices_LSB;
+
+	return sum_error;
+}
+
+// Finds all pixel indices for a 4x2 block using perceptual weighting of error.
+// Done using fixed point arithmetics where 1000 corresponds to 1.0.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockWithTable4x2percep1000(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color,int table,unsigned int *pixel_indices_MSBp, unsigned int *pixel_indices_LSBp)
+{
+	uint8 orig[3],approx[3];
+	unsigned int pixel_indices_MSB=0, pixel_indices_LSB=0, pixel_indices = 0;
+	unsigned int sum_error=0;
+	int q;
+	int i;
+
+	i = 0;
+	for(int x=startx; x<startx+4; x++)
+	{
+		for(int y=starty; y<starty+2; y++)
+		{
+			unsigned int err;
+			int best=0;
+			unsigned int min_error=MAXERR1000;
+			orig[0]=RED(img,width,x,y);
+			orig[1]=GREEN(img,width,x,y);
+			orig[2]=BLUE(img,width,x,y);
+
+			for(q=0;q<4;q++)
+			{
+				approx[0]=CLAMP(0, avg_color[0]+compressParams[table][q],255);
+				approx[1]=CLAMP(0, avg_color[1]+compressParams[table][q],255);
+				approx[2]=CLAMP(0, avg_color[2]+compressParams[table][q],255);
+
+				// Here we just use equal weights to R, G and B. Although this will
+				// give visually worse results, it will give a better PSNR score. 
+				err = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(approx[0]-orig[0]) 
+					+ PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE(approx[1]-orig[1]) 
+					+ PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*SQUARE(approx[2]-orig[2]);
+				if(err<min_error)
+				{
+					min_error=err;
+					best=q;
+				}
+			}
+			pixel_indices = scramble[best];
+
+			PUTBITS( pixel_indices_MSB, (pixel_indices >> 1), 1, i);
+			PUTBITS( pixel_indices_LSB, (pixel_indices & 1) , 1, i);
+			i++;
+
+			// In order to simplify hardware, the table {-12, -4, 4, 12} is indexed {11, 10, 00, 01}
+			// so that first bit is sign bit and the other bit is size bit (4 or 12). 
+			// This means that we have to scramble the bits before storing them. 
+
+			sum_error+=min_error;
+		}
+		i+=2;
+
+	}
+
+	*pixel_indices_MSBp = pixel_indices_MSB;
+	*pixel_indices_LSBp = pixel_indices_LSB;
+
+	return sum_error;
+}
+
+// Finds all pixel indices for a 4x2 block using perceptual weighting of error.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+float compressBlockWithTable4x2percep(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color,int table,unsigned int *pixel_indices_MSBp, unsigned int *pixel_indices_LSBp)
+{
+	uint8 orig[3],approx[3];
+	unsigned int pixel_indices_MSB=0, pixel_indices_LSB=0, pixel_indices = 0;
+	float sum_error=0;
+	int q;
+	int i;
+	float wR2 = (float) PERCEPTUAL_WEIGHT_R_SQUARED;
+	float wG2 = (float) PERCEPTUAL_WEIGHT_G_SQUARED;
+	float wB2 = (float) PERCEPTUAL_WEIGHT_B_SQUARED;
+
+	i = 0;
+	for(int x=startx; x<startx+4; x++)
+	{
+		for(int y=starty; y<starty+2; y++)
+		{
+			float err;
+			int best=0;
+			float min_error=255*255*3*16;
+			orig[0]=RED(img,width,x,y);
+			orig[1]=GREEN(img,width,x,y);
+			orig[2]=BLUE(img,width,x,y);
+
+			for(q=0;q<4;q++)
+			{
+				approx[0]=CLAMP(0, avg_color[0]+compressParams[table][q],255);
+				approx[1]=CLAMP(0, avg_color[1]+compressParams[table][q],255);
+				approx[2]=CLAMP(0, avg_color[2]+compressParams[table][q],255);
+
+				// Here we just use equal weights to R, G and B. Although this will
+				// give visually worse results, it will give a better PSNR score. 
+				err=(float) wR2*SQUARE(approx[0]-orig[0]) + (float)wG2*SQUARE(approx[1]-orig[1]) + (float)wB2*SQUARE(approx[2]-orig[2]);
+				if(err<min_error)
+				{
+					min_error=err;
+					best=q;
+				}
+			}
+			pixel_indices = scramble[best];
+
+			PUTBITS( pixel_indices_MSB, (pixel_indices >> 1), 1, i);
+			PUTBITS( pixel_indices_LSB, (pixel_indices & 1) , 1, i);
+			i++;
+
+			// In order to simplify hardware, the table {-12, -4, 4, 12} is indexed {11, 10, 00, 01}
+			// so that first bit is sign bit and the other bit is size bit (4 or 12). 
+			// This means that we have to scramble the bits before storing them. 
+
+			sum_error+=min_error;
+		}
+		i+=2;
+	}
+
+	*pixel_indices_MSBp = pixel_indices_MSB;
+	*pixel_indices_LSBp = pixel_indices_LSB;
+
+	return sum_error;
+}
+
+// Table for fast implementation of clamping to the interval [0,255] followed by addition of 255.
+const int clamp_table_plus_255[768] = {0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 0+255, 
+                        0+255, 1+255, 2+255, 3+255, 4+255, 5+255, 6+255, 7+255, 8+255, 9+255, 10+255, 11+255, 12+255, 13+255, 14+255, 15+255, 16+255, 17+255, 18+255, 19+255, 20+255, 21+255, 22+255, 23+255, 24+255, 25+255, 26+255, 27+255, 28+255, 29+255, 30+255, 31+255, 32+255, 33+255, 34+255, 35+255, 36+255, 37+255, 38+255, 39+255, 40+255, 41+255, 42+255, 43+255, 44+255, 45+255, 46+255, 47+255, 48+255, 49+255, 50+255, 51+255, 52+255, 53+255, 54+255, 55+255, 56+255, 57+255, 58+255, 59+255, 60+255, 61+255, 62+255, 63+255, 64+255, 65+255, 66+255, 67+255, 68+255, 69+255, 70+255, 71+255, 72+255, 73+255, 74+255, 75+255, 76+255, 77+255, 78+255, 79+255, 80+255, 81+255, 82+255, 83+255, 84+255, 85+255, 86+255, 87+255, 88+255, 89+255, 90+255, 91+255, 92+255, 93+255, 94+255, 95+255, 96+255, 97+255, 98+255, 99+255, 100+255, 101+255, 102+255, 103+255, 104+255, 105+255, 106+255, 107+255, 108+255, 109+255, 110+255, 111+255, 112+255, 113+255, 114+255, 115+255, 116+255, 117+255, 118+255, 119+255, 120+255, 121+255, 122+255, 123+255, 124+255, 125+255, 126+255, 127+255, 128+255, 129+255, 130+255, 131+255, 132+255, 133+255, 134+255, 135+255, 136+255, 137+255, 138+255, 139+255, 140+255, 141+255, 142+255, 143+255, 144+255, 145+255, 146+255, 147+255, 148+255, 149+255, 150+255, 151+255, 152+255, 153+255, 154+255, 155+255, 156+255, 157+255, 158+255, 159+255, 160+255, 161+255, 162+255, 163+255, 164+255, 165+255, 166+255, 167+255, 168+255, 169+255, 170+255, 171+255, 172+255, 173+255, 174+255, 175+255, 176+255, 177+255, 178+255, 179+255, 180+255, 181+255, 182+255, 183+255, 184+255, 185+255, 186+255, 187+255, 188+255, 189+255, 190+255, 191+255, 192+255, 193+255, 194+255, 195+255, 196+255, 197+255, 198+255, 199+255, 200+255, 201+255, 202+255, 203+255, 204+255, 205+255, 206+255, 207+255, 208+255, 209+255, 210+255, 211+255, 
+						212+255, 213+255, 214+255, 215+255, 216+255, 217+255, 218+255, 219+255, 220+255, 221+255, 222+255, 223+255, 224+255, 225+255, 226+255, 227+255, 228+255, 229+255, 230+255, 231+255, 232+255, 233+255, 234+255, 235+255, 236+255, 237+255, 238+255, 239+255, 240+255, 241+255, 242+255, 243+255, 244+255, 245+255, 246+255, 247+255, 248+255, 249+255, 250+255, 251+255, 252+255, 253+255, 254+255, 255+255,
+						255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 
+						255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255, 255+255};
+
+// Table for fast implementationi of clamping to the interval [0,255]
+const int clamp_table[768] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+                        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
+						255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};
+
+// Table for fast implementation of squaring for numbers in the interval [-255, 255]
+const unsigned int square_table[511] = {65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081, 57600, 
+						 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625, 50176, 
+						 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681, 43264, 
+						 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249, 36864, 
+						 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329, 30976, 
+						 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921, 25600, 
+						 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025, 20736, 
+						 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641, 16384, 
+						 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769, 12544, 
+						 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000, 9801, 9604, 9409, 9216, 
+						 9025, 8836, 8649, 8464, 8281, 8100, 7921, 7744, 7569, 7396, 7225, 7056, 6889, 6724, 6561, 6400, 
+						 6241, 6084, 5929, 5776, 5625, 5476, 5329, 5184, 5041, 4900, 4761, 4624, 4489, 4356, 4225, 4096, 
+						 3969, 3844, 3721, 3600, 3481, 3364, 3249, 3136, 3025, 2916, 2809, 2704, 2601, 2500, 2401, 2304, 
+						 2209, 2116, 2025, 1936, 1849, 1764, 1681, 1600, 1521, 1444, 1369, 1296, 1225, 1156, 1089, 1024, 
+						 961, 900, 841, 784, 729, 676, 625, 576, 529, 484, 441, 400, 361, 324, 289, 256,
+						 225, 196, 169, 144, 121, 100, 81, 64, 49, 36, 25, 16, 9, 4, 1, 
+					     0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 
+                         256, 289, 324, 361, 400, 441, 484, 529, 576, 625, 676, 729, 784, 841, 900, 961, 
+						 1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209, 
+						 2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969, 
+						 4096, 4225, 4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929, 6084, 6241, 
+						 6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025, 
+						 9216, 9409, 9604, 9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321,
+						 12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129,
+						 16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449, 
+						 20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281, 
+						 25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625, 
+						 30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481, 
+						 36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849, 
+						 43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729, 
+						 50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121, 
+						 57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025}; 
+
+// Abbreviated variable names to make below tables smaller in source code size
+#define KR PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000
+#define KG PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000
+#define KB PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000
+
+// Table for fast implementation of squaring for numbers in the interval [-255, 255] multiplied by the perceptual weight for red.
+const unsigned int square_table_percep_red[511] = {
+                         65025*KR, 64516*KR, 64009*KR, 63504*KR, 63001*KR, 62500*KR, 62001*KR, 61504*KR, 61009*KR, 60516*KR, 60025*KR, 59536*KR, 59049*KR, 58564*KR, 58081*KR, 57600*KR, 
+						 57121*KR, 56644*KR, 56169*KR, 55696*KR, 55225*KR, 54756*KR, 54289*KR, 53824*KR, 53361*KR, 52900*KR, 52441*KR, 51984*KR, 51529*KR, 51076*KR, 50625*KR, 50176*KR, 
+						 49729*KR, 49284*KR, 48841*KR, 48400*KR, 47961*KR, 47524*KR, 47089*KR, 46656*KR, 46225*KR, 45796*KR, 45369*KR, 44944*KR, 44521*KR, 44100*KR, 43681*KR, 43264*KR, 
+						 42849*KR, 42436*KR, 42025*KR, 41616*KR, 41209*KR, 40804*KR, 40401*KR, 40000*KR, 39601*KR, 39204*KR, 38809*KR, 38416*KR, 38025*KR, 37636*KR, 37249*KR, 36864*KR, 
+						 36481*KR, 36100*KR, 35721*KR, 35344*KR, 34969*KR, 34596*KR, 34225*KR, 33856*KR, 33489*KR, 33124*KR, 32761*KR, 32400*KR, 32041*KR, 31684*KR, 31329*KR, 30976*KR, 
+						 30625*KR, 30276*KR, 29929*KR, 29584*KR, 29241*KR, 28900*KR, 28561*KR, 28224*KR, 27889*KR, 27556*KR, 27225*KR, 26896*KR, 26569*KR, 26244*KR, 25921*KR, 25600*KR, 
+						 25281*KR, 24964*KR, 24649*KR, 24336*KR, 24025*KR, 23716*KR, 23409*KR, 23104*KR, 22801*KR, 22500*KR, 22201*KR, 21904*KR, 21609*KR, 21316*KR, 21025*KR, 20736*KR, 
+						 20449*KR, 20164*KR, 19881*KR, 19600*KR, 19321*KR, 19044*KR, 18769*KR, 18496*KR, 18225*KR, 17956*KR, 17689*KR, 17424*KR, 17161*KR, 16900*KR, 16641*KR, 16384*KR, 
+						 16129*KR, 15876*KR, 15625*KR, 15376*KR, 15129*KR, 14884*KR, 14641*KR, 14400*KR, 14161*KR, 13924*KR, 13689*KR, 13456*KR, 13225*KR, 12996*KR, 12769*KR, 12544*KR, 
+						 12321*KR, 12100*KR, 11881*KR, 11664*KR, 11449*KR, 11236*KR, 11025*KR, 10816*KR, 10609*KR, 10404*KR, 10201*KR, 10000*KR, 9801*KR, 9604*KR, 9409*KR, 9216*KR, 
+						 9025*KR, 8836*KR, 8649*KR, 8464*KR, 8281*KR, 8100*KR, 7921*KR, 7744*KR, 7569*KR, 7396*KR, 7225*KR, 7056*KR, 6889*KR, 6724*KR, 6561*KR, 6400*KR, 
+						 6241*KR, 6084*KR, 5929*KR, 5776*KR, 5625*KR, 5476*KR, 5329*KR, 5184*KR, 5041*KR, 4900*KR, 4761*KR, 4624*KR, 4489*KR, 4356*KR, 4225*KR, 4096*KR, 
+						 3969*KR, 3844*KR, 3721*KR, 3600*KR, 3481*KR, 3364*KR, 3249*KR, 3136*KR, 3025*KR, 2916*KR, 2809*KR, 2704*KR, 2601*KR, 2500*KR, 2401*KR, 2304*KR, 
+						 2209*KR, 2116*KR, 2025*KR, 1936*KR, 1849*KR, 1764*KR, 1681*KR, 1600*KR, 1521*KR, 1444*KR, 1369*KR, 1296*KR, 1225*KR, 1156*KR, 1089*KR, 1024*KR, 
+						 961*KR, 900*KR, 841*KR, 784*KR, 729*KR, 676*KR, 625*KR, 576*KR, 529*KR, 484*KR, 441*KR, 400*KR, 361*KR, 324*KR, 289*KR, 256*KR,
+						 225*KR, 196*KR, 169*KR, 144*KR, 121*KR, 100*KR, 81*KR, 64*KR, 49*KR, 36*KR, 25*KR, 16*KR, 9*KR, 4*KR, 1*KR, 
+						 0*KR, 1*KR, 4*KR, 9*KR, 16*KR, 25*KR, 36*KR, 49*KR, 64*KR, 81*KR, 100*KR, 121*KR, 144*KR, 169*KR, 196*KR, 225*KR, 
+						 256*KR, 289*KR, 324*KR, 361*KR, 400*KR, 441*KR, 484*KR, 529*KR, 576*KR, 625*KR, 676*KR, 729*KR, 784*KR, 841*KR, 900*KR, 961*KR, 
+						 1024*KR, 1089*KR, 1156*KR, 1225*KR, 1296*KR, 1369*KR, 1444*KR, 1521*KR, 1600*KR, 1681*KR, 1764*KR, 1849*KR, 1936*KR, 2025*KR, 2116*KR, 2209*KR, 
+						 2304*KR, 2401*KR, 2500*KR, 2601*KR, 2704*KR, 2809*KR, 2916*KR, 3025*KR, 3136*KR, 3249*KR, 3364*KR, 3481*KR, 3600*KR, 3721*KR, 3844*KR, 3969*KR, 
+						 4096*KR, 4225*KR, 4356*KR, 4489*KR, 4624*KR, 4761*KR, 4900*KR, 5041*KR, 5184*KR, 5329*KR, 5476*KR, 5625*KR, 5776*KR, 5929*KR, 6084*KR, 6241*KR, 
+						 6400*KR, 6561*KR, 6724*KR, 6889*KR, 7056*KR, 7225*KR, 7396*KR, 7569*KR, 7744*KR, 7921*KR, 8100*KR, 8281*KR, 8464*KR, 8649*KR, 8836*KR, 9025*KR, 
+						 9216*KR, 9409*KR, 9604*KR, 9801*KR, 10000*KR, 10201*KR, 10404*KR, 10609*KR, 10816*KR, 11025*KR, 11236*KR, 11449*KR, 11664*KR, 11881*KR, 12100*KR, 12321*KR,
+						 12544*KR, 12769*KR, 12996*KR, 13225*KR, 13456*KR, 13689*KR, 13924*KR, 14161*KR, 14400*KR, 14641*KR, 14884*KR, 15129*KR, 15376*KR, 15625*KR, 15876*KR, 16129*KR,
+						 16384*KR, 16641*KR, 16900*KR, 17161*KR, 17424*KR, 17689*KR, 17956*KR, 18225*KR, 18496*KR, 18769*KR, 19044*KR, 19321*KR, 19600*KR, 19881*KR, 20164*KR, 20449*KR, 
+						 20736*KR, 21025*KR, 21316*KR, 21609*KR, 21904*KR, 22201*KR, 22500*KR, 22801*KR, 23104*KR, 23409*KR, 23716*KR, 24025*KR, 24336*KR, 24649*KR, 24964*KR, 25281*KR, 
+						 25600*KR, 25921*KR, 26244*KR, 26569*KR, 26896*KR, 27225*KR, 27556*KR, 27889*KR, 28224*KR, 28561*KR, 28900*KR, 29241*KR, 29584*KR, 29929*KR, 30276*KR, 30625*KR, 
+						 30976*KR, 31329*KR, 31684*KR, 32041*KR, 32400*KR, 32761*KR, 33124*KR, 33489*KR, 33856*KR, 34225*KR, 34596*KR, 34969*KR, 35344*KR, 35721*KR, 36100*KR, 36481*KR, 
+						 36864*KR, 37249*KR, 37636*KR, 38025*KR, 38416*KR, 38809*KR, 39204*KR, 39601*KR, 40000*KR, 40401*KR, 40804*KR, 41209*KR, 41616*KR, 42025*KR, 42436*KR, 42849*KR, 
+						 43264*KR, 43681*KR, 44100*KR, 44521*KR, 44944*KR, 45369*KR, 45796*KR, 46225*KR, 46656*KR, 47089*KR, 47524*KR, 47961*KR, 48400*KR, 48841*KR, 49284*KR, 49729*KR, 
+						 50176*KR, 50625*KR, 51076*KR, 51529*KR, 51984*KR, 52441*KR, 52900*KR, 53361*KR, 53824*KR, 54289*KR, 54756*KR, 55225*KR, 55696*KR, 56169*KR, 56644*KR, 57121*KR, 
+						 57600*KR, 58081*KR, 58564*KR, 59049*KR, 59536*KR, 60025*KR, 60516*KR, 61009*KR, 61504*KR, 62001*KR, 62500*KR, 63001*KR, 63504*KR, 64009*KR, 64516*KR, 65025*KR}; 
+
+// Table for fast implementation of squaring for numbers in the interval [-255, 255] multiplied by the perceptual weight for green.
+const unsigned int square_table_percep_green[511] = {
+                         65025*KG, 64516*KG, 64009*KG, 63504*KG, 63001*KG, 62500*KG, 62001*KG, 61504*KG, 61009*KG, 60516*KG, 60025*KG, 59536*KG, 59049*KG, 58564*KG, 58081*KG, 57600*KG, 
+						 57121*KG, 56644*KG, 56169*KG, 55696*KG, 55225*KG, 54756*KG, 54289*KG, 53824*KG, 53361*KG, 52900*KG, 52441*KG, 51984*KG, 51529*KG, 51076*KG, 50625*KG, 50176*KG, 
+						 49729*KG, 49284*KG, 48841*KG, 48400*KG, 47961*KG, 47524*KG, 47089*KG, 46656*KG, 46225*KG, 45796*KG, 45369*KG, 44944*KG, 44521*KG, 44100*KG, 43681*KG, 43264*KG, 
+						 42849*KG, 42436*KG, 42025*KG, 41616*KG, 41209*KG, 40804*KG, 40401*KG, 40000*KG, 39601*KG, 39204*KG, 38809*KG, 38416*KG, 38025*KG, 37636*KG, 37249*KG, 36864*KG, 
+						 36481*KG, 36100*KG, 35721*KG, 35344*KG, 34969*KG, 34596*KG, 34225*KG, 33856*KG, 33489*KG, 33124*KG, 32761*KG, 32400*KG, 32041*KG, 31684*KG, 31329*KG, 30976*KG, 
+						 30625*KG, 30276*KG, 29929*KG, 29584*KG, 29241*KG, 28900*KG, 28561*KG, 28224*KG, 27889*KG, 27556*KG, 27225*KG, 26896*KG, 26569*KG, 26244*KG, 25921*KG, 25600*KG, 
+						 25281*KG, 24964*KG, 24649*KG, 24336*KG, 24025*KG, 23716*KG, 23409*KG, 23104*KG, 22801*KG, 22500*KG, 22201*KG, 21904*KG, 21609*KG, 21316*KG, 21025*KG, 20736*KG, 
+						 20449*KG, 20164*KG, 19881*KG, 19600*KG, 19321*KG, 19044*KG, 18769*KG, 18496*KG, 18225*KG, 17956*KG, 17689*KG, 17424*KG, 17161*KG, 16900*KG, 16641*KG, 16384*KG, 
+						 16129*KG, 15876*KG, 15625*KG, 15376*KG, 15129*KG, 14884*KG, 14641*KG, 14400*KG, 14161*KG, 13924*KG, 13689*KG, 13456*KG, 13225*KG, 12996*KG, 12769*KG, 12544*KG, 
+						 12321*KG, 12100*KG, 11881*KG, 11664*KG, 11449*KG, 11236*KG, 11025*KG, 10816*KG, 10609*KG, 10404*KG, 10201*KG, 10000*KG, 9801*KG, 9604*KG, 9409*KG, 9216*KG, 
+						 9025*KG, 8836*KG, 8649*KG, 8464*KG, 8281*KG, 8100*KG, 7921*KG, 7744*KG, 7569*KG, 7396*KG, 7225*KG, 7056*KG, 6889*KG, 6724*KG, 6561*KG, 6400*KG, 
+						 6241*KG, 6084*KG, 5929*KG, 5776*KG, 5625*KG, 5476*KG, 5329*KG, 5184*KG, 5041*KG, 4900*KG, 4761*KG, 4624*KG, 4489*KG, 4356*KG, 4225*KG, 4096*KG, 
+						 3969*KG, 3844*KG, 3721*KG, 3600*KG, 3481*KG, 3364*KG, 3249*KG, 3136*KG, 3025*KG, 2916*KG, 2809*KG, 2704*KG, 2601*KG, 2500*KG, 2401*KG, 2304*KG, 
+						 2209*KG, 2116*KG, 2025*KG, 1936*KG, 1849*KG, 1764*KG, 1681*KG, 1600*KG, 1521*KG, 1444*KG, 1369*KG, 1296*KG, 1225*KG, 1156*KG, 1089*KG, 1024*KG, 
+						 961*KG, 900*KG, 841*KG, 784*KG, 729*KG, 676*KG, 625*KG, 576*KG, 529*KG, 484*KG, 441*KG, 400*KG, 361*KG, 324*KG, 289*KG, 256*KG,
+						 225*KG, 196*KG, 169*KG, 144*KG, 121*KG, 100*KG, 81*KG, 64*KG, 49*KG, 36*KG, 25*KG, 16*KG, 9*KG, 4*KG, 1*KG, 
+						 0*KG, 1*KG, 4*KG, 9*KG, 16*KG, 25*KG, 36*KG, 49*KG, 64*KG, 81*KG, 100*KG, 121*KG, 144*KG, 169*KG, 196*KG, 225*KG, 
+						 256*KG, 289*KG, 324*KG, 361*KG, 400*KG, 441*KG, 484*KG, 529*KG, 576*KG, 625*KG, 676*KG, 729*KG, 784*KG, 841*KG, 900*KG, 961*KG, 
+						 1024*KG, 1089*KG, 1156*KG, 1225*KG, 1296*KG, 1369*KG, 1444*KG, 1521*KG, 1600*KG, 1681*KG, 1764*KG, 1849*KG, 1936*KG, 2025*KG, 2116*KG, 2209*KG, 
+						 2304*KG, 2401*KG, 2500*KG, 2601*KG, 2704*KG, 2809*KG, 2916*KG, 3025*KG, 3136*KG, 3249*KG, 3364*KG, 3481*KG, 3600*KG, 3721*KG, 3844*KG, 3969*KG, 
+						 4096*KG, 4225*KG, 4356*KG, 4489*KG, 4624*KG, 4761*KG, 4900*KG, 5041*KG, 5184*KG, 5329*KG, 5476*KG, 5625*KG, 5776*KG, 5929*KG, 6084*KG, 6241*KG, 
+						 6400*KG, 6561*KG, 6724*KG, 6889*KG, 7056*KG, 7225*KG, 7396*KG, 7569*KG, 7744*KG, 7921*KG, 8100*KG, 8281*KG, 8464*KG, 8649*KG, 8836*KG, 9025*KG, 
+						 9216*KG, 9409*KG, 9604*KG, 9801*KG, 10000*KG, 10201*KG, 10404*KG, 10609*KG, 10816*KG, 11025*KG, 11236*KG, 11449*KG, 11664*KG, 11881*KG, 12100*KG, 12321*KG,
+						 12544*KG, 12769*KG, 12996*KG, 13225*KG, 13456*KG, 13689*KG, 13924*KG, 14161*KG, 14400*KG, 14641*KG, 14884*KG, 15129*KG, 15376*KG, 15625*KG, 15876*KG, 16129*KG,
+						 16384*KG, 16641*KG, 16900*KG, 17161*KG, 17424*KG, 17689*KG, 17956*KG, 18225*KG, 18496*KG, 18769*KG, 19044*KG, 19321*KG, 19600*KG, 19881*KG, 20164*KG, 20449*KG, 
+						 20736*KG, 21025*KG, 21316*KG, 21609*KG, 21904*KG, 22201*KG, 22500*KG, 22801*KG, 23104*KG, 23409*KG, 23716*KG, 24025*KG, 24336*KG, 24649*KG, 24964*KG, 25281*KG, 
+						 25600*KG, 25921*KG, 26244*KG, 26569*KG, 26896*KG, 27225*KG, 27556*KG, 27889*KG, 28224*KG, 28561*KG, 28900*KG, 29241*KG, 29584*KG, 29929*KG, 30276*KG, 30625*KG, 
+						 30976*KG, 31329*KG, 31684*KG, 32041*KG, 32400*KG, 32761*KG, 33124*KG, 33489*KG, 33856*KG, 34225*KG, 34596*KG, 34969*KG, 35344*KG, 35721*KG, 36100*KG, 36481*KG, 
+						 36864*KG, 37249*KG, 37636*KG, 38025*KG, 38416*KG, 38809*KG, 39204*KG, 39601*KG, 40000*KG, 40401*KG, 40804*KG, 41209*KG, 41616*KG, 42025*KG, 42436*KG, 42849*KG, 
+						 43264*KG, 43681*KG, 44100*KG, 44521*KG, 44944*KG, 45369*KG, 45796*KG, 46225*KG, 46656*KG, 47089*KG, 47524*KG, 47961*KG, 48400*KG, 48841*KG, 49284*KG, 49729*KG, 
+						 50176*KG, 50625*KG, 51076*KG, 51529*KG, 51984*KG, 52441*KG, 52900*KG, 53361*KG, 53824*KG, 54289*KG, 54756*KG, 55225*KG, 55696*KG, 56169*KG, 56644*KG, 57121*KG, 
+						 57600*KG, 58081*KG, 58564*KG, 59049*KG, 59536*KG, 60025*KG, 60516*KG, 61009*KG, 61504*KG, 62001*KG, 62500*KG, 63001*KG, 63504*KG, 64009*KG, 64516*KG, 65025*KG}; 
+
+// Table for fast implementation of squaring for numbers in the interval [-255, 255] multiplied by the perceptual weight for blue.
+const unsigned int square_table_percep_blue[511] = {
+                         65025*KB, 64516*KB, 64009*KB, 63504*KB, 63001*KB, 62500*KB, 62001*KB, 61504*KB, 61009*KB, 60516*KB, 60025*KB, 59536*KB, 59049*KB, 58564*KB, 58081*KB, 57600*KB, 
+						 57121*KB, 56644*KB, 56169*KB, 55696*KB, 55225*KB, 54756*KB, 54289*KB, 53824*KB, 53361*KB, 52900*KB, 52441*KB, 51984*KB, 51529*KB, 51076*KB, 50625*KB, 50176*KB, 
+						 49729*KB, 49284*KB, 48841*KB, 48400*KB, 47961*KB, 47524*KB, 47089*KB, 46656*KB, 46225*KB, 45796*KB, 45369*KB, 44944*KB, 44521*KB, 44100*KB, 43681*KB, 43264*KB, 
+						 42849*KB, 42436*KB, 42025*KB, 41616*KB, 41209*KB, 40804*KB, 40401*KB, 40000*KB, 39601*KB, 39204*KB, 38809*KB, 38416*KB, 38025*KB, 37636*KB, 37249*KB, 36864*KB, 
+						 36481*KB, 36100*KB, 35721*KB, 35344*KB, 34969*KB, 34596*KB, 34225*KB, 33856*KB, 33489*KB, 33124*KB, 32761*KB, 32400*KB, 32041*KB, 31684*KB, 31329*KB, 30976*KB, 
+						 30625*KB, 30276*KB, 29929*KB, 29584*KB, 29241*KB, 28900*KB, 28561*KB, 28224*KB, 27889*KB, 27556*KB, 27225*KB, 26896*KB, 26569*KB, 26244*KB, 25921*KB, 25600*KB, 
+						 25281*KB, 24964*KB, 24649*KB, 24336*KB, 24025*KB, 23716*KB, 23409*KB, 23104*KB, 22801*KB, 22500*KB, 22201*KB, 21904*KB, 21609*KB, 21316*KB, 21025*KB, 20736*KB, 
+						 20449*KB, 20164*KB, 19881*KB, 19600*KB, 19321*KB, 19044*KB, 18769*KB, 18496*KB, 18225*KB, 17956*KB, 17689*KB, 17424*KB, 17161*KB, 16900*KB, 16641*KB, 16384*KB, 
+						 16129*KB, 15876*KB, 15625*KB, 15376*KB, 15129*KB, 14884*KB, 14641*KB, 14400*KB, 14161*KB, 13924*KB, 13689*KB, 13456*KB, 13225*KB, 12996*KB, 12769*KB, 12544*KB, 
+						 12321*KB, 12100*KB, 11881*KB, 11664*KB, 11449*KB, 11236*KB, 11025*KB, 10816*KB, 10609*KB, 10404*KB, 10201*KB, 10000*KB, 9801*KB, 9604*KB, 9409*KB, 9216*KB, 
+						 9025*KB, 8836*KB, 8649*KB, 8464*KB, 8281*KB, 8100*KB, 7921*KB, 7744*KB, 7569*KB, 7396*KB, 7225*KB, 7056*KB, 6889*KB, 6724*KB, 6561*KB, 6400*KB, 
+						 6241*KB, 6084*KB, 5929*KB, 5776*KB, 5625*KB, 5476*KB, 5329*KB, 5184*KB, 5041*KB, 4900*KB, 4761*KB, 4624*KB, 4489*KB, 4356*KB, 4225*KB, 4096*KB, 
+						 3969*KB, 3844*KB, 3721*KB, 3600*KB, 3481*KB, 3364*KB, 3249*KB, 3136*KB, 3025*KB, 2916*KB, 2809*KB, 2704*KB, 2601*KB, 2500*KB, 2401*KB, 2304*KB, 
+						 2209*KB, 2116*KB, 2025*KB, 1936*KB, 1849*KB, 1764*KB, 1681*KB, 1600*KB, 1521*KB, 1444*KB, 1369*KB, 1296*KB, 1225*KB, 1156*KB, 1089*KB, 1024*KB, 
+						 961*KB, 900*KB, 841*KB, 784*KB, 729*KB, 676*KB, 625*KB, 576*KB, 529*KB, 484*KB, 441*KB, 400*KB, 361*KB, 324*KB, 289*KB, 256*KB,
+						 225*KB, 196*KB, 169*KB, 144*KB, 121*KB, 100*KB, 81*KB, 64*KB, 49*KB, 36*KB, 25*KB, 16*KB, 9*KB, 4*KB, 1*KB, 
+						 0*KB, 1*KB, 4*KB, 9*KB, 16*KB, 25*KB, 36*KB, 49*KB, 64*KB, 81*KB, 100*KB, 121*KB, 144*KB, 169*KB, 196*KB, 225*KB, 
+						 256*KB, 289*KB, 324*KB, 361*KB, 400*KB, 441*KB, 484*KB, 529*KB, 576*KB, 625*KB, 676*KB, 729*KB, 784*KB, 841*KB, 900*KB, 961*KB, 
+						 1024*KB, 1089*KB, 1156*KB, 1225*KB, 1296*KB, 1369*KB, 1444*KB, 1521*KB, 1600*KB, 1681*KB, 1764*KB, 1849*KB, 1936*KB, 2025*KB, 2116*KB, 2209*KB, 
+						 2304*KB, 2401*KB, 2500*KB, 2601*KB, 2704*KB, 2809*KB, 2916*KB, 3025*KB, 3136*KB, 3249*KB, 3364*KB, 3481*KB, 3600*KB, 3721*KB, 3844*KB, 3969*KB, 
+						 4096*KB, 4225*KB, 4356*KB, 4489*KB, 4624*KB, 4761*KB, 4900*KB, 5041*KB, 5184*KB, 5329*KB, 5476*KB, 5625*KB, 5776*KB, 5929*KB, 6084*KB, 6241*KB, 
+						 6400*KB, 6561*KB, 6724*KB, 6889*KB, 7056*KB, 7225*KB, 7396*KB, 7569*KB, 7744*KB, 7921*KB, 8100*KB, 8281*KB, 8464*KB, 8649*KB, 8836*KB, 9025*KB, 
+						 9216*KB, 9409*KB, 9604*KB, 9801*KB, 10000*KB, 10201*KB, 10404*KB, 10609*KB, 10816*KB, 11025*KB, 11236*KB, 11449*KB, 11664*KB, 11881*KB, 12100*KB, 12321*KB,
+						 12544*KB, 12769*KB, 12996*KB, 13225*KB, 13456*KB, 13689*KB, 13924*KB, 14161*KB, 14400*KB, 14641*KB, 14884*KB, 15129*KB, 15376*KB, 15625*KB, 15876*KB, 16129*KB,
+						 16384*KB, 16641*KB, 16900*KB, 17161*KB, 17424*KB, 17689*KB, 17956*KB, 18225*KB, 18496*KB, 18769*KB, 19044*KB, 19321*KB, 19600*KB, 19881*KB, 20164*KB, 20449*KB, 
+						 20736*KB, 21025*KB, 21316*KB, 21609*KB, 21904*KB, 22201*KB, 22500*KB, 22801*KB, 23104*KB, 23409*KB, 23716*KB, 24025*KB, 24336*KB, 24649*KB, 24964*KB, 25281*KB, 
+						 25600*KB, 25921*KB, 26244*KB, 26569*KB, 26896*KB, 27225*KB, 27556*KB, 27889*KB, 28224*KB, 28561*KB, 28900*KB, 29241*KB, 29584*KB, 29929*KB, 30276*KB, 30625*KB, 
+						 30976*KB, 31329*KB, 31684*KB, 32041*KB, 32400*KB, 32761*KB, 33124*KB, 33489*KB, 33856*KB, 34225*KB, 34596*KB, 34969*KB, 35344*KB, 35721*KB, 36100*KB, 36481*KB, 
+						 36864*KB, 37249*KB, 37636*KB, 38025*KB, 38416*KB, 38809*KB, 39204*KB, 39601*KB, 40000*KB, 40401*KB, 40804*KB, 41209*KB, 41616*KB, 42025*KB, 42436*KB, 42849*KB, 
+						 43264*KB, 43681*KB, 44100*KB, 44521*KB, 44944*KB, 45369*KB, 45796*KB, 46225*KB, 46656*KB, 47089*KB, 47524*KB, 47961*KB, 48400*KB, 48841*KB, 49284*KB, 49729*KB, 
+						 50176*KB, 50625*KB, 51076*KB, 51529*KB, 51984*KB, 52441*KB, 52900*KB, 53361*KB, 53824*KB, 54289*KB, 54756*KB, 55225*KB, 55696*KB, 56169*KB, 56644*KB, 57121*KB, 
+						 57600*KB, 58081*KB, 58564*KB, 59049*KB, 59536*KB, 60025*KB, 60516*KB, 61009*KB, 61504*KB, 62001*KB, 62500*KB, 63001*KB, 63504*KB, 64009*KB, 64516*KB, 65025*KB}; 
+
+// Find the best table to use for a 2x4 area by testing all.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int tryalltables_3bittable2x4(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color, unsigned int &best_table,unsigned int &best_pixel_indices_MSB, unsigned int &best_pixel_indices_LSB)
+{
+	int min_error = 3*255*255*16;
+	int q;
+	int err;
+	unsigned int pixel_indices_MSB, pixel_indices_LSB;
+
+	for(q=0;q<16;q+=2)		// try all the 8 tables. 
+	{
+		err=compressBlockWithTable2x4(img,width,height,startx,starty,avg_color,q,&pixel_indices_MSB, &pixel_indices_LSB);
+
+		if(err<min_error)
+		{
+			min_error=err;
+			best_pixel_indices_MSB = pixel_indices_MSB;
+			best_pixel_indices_LSB = pixel_indices_LSB;
+			best_table=q >> 1;
+		}
+	}
+	return min_error;
+}
+
+// Find the best table to use for a 2x4 area by testing all.
+// Uses perceptual weighting. 
+// Uses fixed point implementation where 1000 equals 1.0
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int tryalltables_3bittable2x4percep1000(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color, unsigned int &best_table,unsigned int &best_pixel_indices_MSB, unsigned int &best_pixel_indices_LSB)
+{
+	unsigned int min_error = MAXERR1000;
+	int q;
+	unsigned int err;
+	unsigned int pixel_indices_MSB, pixel_indices_LSB;
+
+	for(q=0;q<16;q+=2)		// try all the 8 tables. 
+	{
+
+		err=compressBlockWithTable2x4percep1000(img,width,height,startx,starty,avg_color,q,&pixel_indices_MSB, &pixel_indices_LSB);
+
+		if(err<min_error)
+		{
+
+			min_error=err;
+			best_pixel_indices_MSB = pixel_indices_MSB;
+			best_pixel_indices_LSB = pixel_indices_LSB;
+			best_table=q >> 1;
+
+		}
+	}
+	return min_error;
+}
+
+// Find the best table to use for a 2x4 area by testing all.
+// Uses perceptual weighting. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int tryalltables_3bittable2x4percep(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color, unsigned int &best_table,unsigned int &best_pixel_indices_MSB, unsigned int &best_pixel_indices_LSB)
+{
+	float min_error = 3*255*255*16;
+	int q;
+	float err;
+	unsigned int pixel_indices_MSB, pixel_indices_LSB;
+
+	for(q=0;q<16;q+=2)		// try all the 8 tables. 
+	{
+		err=compressBlockWithTable2x4percep(img,width,height,startx,starty,avg_color,q,&pixel_indices_MSB, &pixel_indices_LSB);
+
+		if(err<min_error)
+		{
+
+			min_error=err;
+			best_pixel_indices_MSB = pixel_indices_MSB;
+			best_pixel_indices_LSB = pixel_indices_LSB;
+			best_table=q >> 1;
+		}
+	}
+	return (int) min_error;
+}
+
+// Find the best table to use for a 4x2 area by testing all.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int tryalltables_3bittable4x2(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color, unsigned int &best_table,unsigned int &best_pixel_indices_MSB, unsigned int &best_pixel_indices_LSB)
+{
+	int min_error = 3*255*255*16;
+	int q;
+	int err;
+	unsigned int pixel_indices_MSB, pixel_indices_LSB;
+
+	for(q=0;q<16;q+=2)		// try all the 8 tables. 
+	{
+		err=compressBlockWithTable4x2(img,width,height,startx,starty,avg_color,q,&pixel_indices_MSB, &pixel_indices_LSB);
+
+		if(err<min_error)
+		{
+
+			min_error=err;
+			best_pixel_indices_MSB = pixel_indices_MSB;
+			best_pixel_indices_LSB = pixel_indices_LSB;
+			best_table=q >> 1;
+		}
+	}
+	return min_error;
+}
+
+// Find the best table to use for a 4x2 area by testing all.
+// Uses perceptual weighting. 
+// Uses fixed point implementation where 1000 equals 1.0
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int tryalltables_3bittable4x2percep1000(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color, unsigned int &best_table,unsigned int &best_pixel_indices_MSB, unsigned int &best_pixel_indices_LSB)
+{
+	unsigned int min_error = MAXERR1000;
+	int q;
+	unsigned int err;
+	unsigned int pixel_indices_MSB, pixel_indices_LSB;
+
+	for(q=0;q<16;q+=2)		// try all the 8 tables. 
+	{
+		err=compressBlockWithTable4x2percep1000(img,width,height,startx,starty,avg_color,q,&pixel_indices_MSB, &pixel_indices_LSB);
+
+		if(err<min_error)
+		{
+			min_error=err;
+			best_pixel_indices_MSB = pixel_indices_MSB;
+			best_pixel_indices_LSB = pixel_indices_LSB;
+			best_table=q >> 1;
+		}
+	}
+	return min_error;
+}
+
+// Find the best table to use for a 4x2 area by testing all.
+// Uses perceptual weighting. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int tryalltables_3bittable4x2percep(uint8 *img,int width,int height,int startx,int starty,uint8 *avg_color, unsigned int &best_table,unsigned int &best_pixel_indices_MSB, unsigned int &best_pixel_indices_LSB)
+{
+	float min_error = 3*255*255*16;
+	int q;
+	float err;
+	unsigned int pixel_indices_MSB, pixel_indices_LSB;
+
+	for(q=0;q<16;q+=2)		// try all the 8 tables. 
+	{
+		err=compressBlockWithTable4x2percep(img,width,height,startx,starty,avg_color,q,&pixel_indices_MSB, &pixel_indices_LSB);
+
+		if(err<min_error)
+		{
+			min_error=err;
+			best_pixel_indices_MSB = pixel_indices_MSB;
+			best_pixel_indices_LSB = pixel_indices_LSB;
+			best_table=q >> 1;
+		}
+	}
+	return (int) min_error;
+}
+
+// The below code quantizes a float RGB value to RGB444. 
+//
+// The format often allows a pixel to completely compensate an intensity error of the base
+// color. Hence the closest RGB444 point may not be the best, and the code below uses
+// this fact to find a better RGB444 color as the base color.
+//
+// (See the presentation http://www.jacobstrom.com/publications/PACKMAN.ppt for more info.) 
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void quantize444ColorCombined(float *avg_col_in, int *enc_color, uint8 *avg_color)
+{
+	float dr, dg, db;
+	float kr, kg, kb;
+	float wR2, wG2, wB2;
+	uint8 low_color[3];
+	uint8 high_color[3];
+	float min_error=255*255*8*3;
+	float lowhightable[8];
+	unsigned int best_table=0;
+	unsigned int best_index=0;
+	int q;
+	float kval = (float) (255.0/15.0);
+
+	// These are the values that we want to have:
+	float red_average, green_average, blue_average;
+
+	int red_4bit_low, green_4bit_low, blue_4bit_low;
+	int red_4bit_high, green_4bit_high, blue_4bit_high;
+	
+	// These are the values that we approximate with:
+	int red_low, green_low, blue_low;
+	int red_high, green_high, blue_high;
+
+	red_average = avg_col_in[0];
+	green_average = avg_col_in[1];
+	blue_average = avg_col_in[2];
+
+	// Find the 5-bit reconstruction levels red_low, red_high
+	// so that red_average is in interval [red_low, red_high].
+	// (The same with green and blue.)
+
+	red_4bit_low = (int) (red_average/kval);
+	green_4bit_low = (int) (green_average/kval);
+	blue_4bit_low = (int) (blue_average/kval);
+
+	red_4bit_high = CLAMP(0, red_4bit_low + 1, 15);
+	green_4bit_high  = CLAMP(0, green_4bit_low + 1, 15);
+	blue_4bit_high = CLAMP(0, blue_4bit_low + 1, 15);
+
+	red_low   = (red_4bit_low << 4) | (red_4bit_low >> 0);
+	green_low = (green_4bit_low << 4) | (green_4bit_low >> 0);
+	blue_low = (blue_4bit_low << 4) | (blue_4bit_low >> 0);
+
+	red_high   = (red_4bit_high << 4) | (red_4bit_high >> 0);
+	green_high = (green_4bit_high << 4) | (green_4bit_high >> 0);
+	blue_high = (blue_4bit_high << 4) | (blue_4bit_high >> 0);
+
+	kr = (float)red_high - (float)red_low;
+	kg = (float)green_high - (float)green_low;
+	kb = (float)blue_high - (float)blue_low;
+
+	// Note that dr, dg, and db are all negative.
+	dr = red_low - red_average;
+	dg = green_low - green_average;
+	db = blue_low - blue_average;
+
+	// Use straight (nonperceptive) weights.
+	wR2 = (float) 1.0;
+	wG2 = (float) 1.0;
+	wB2 = (float) 1.0;
+
+	lowhightable[0] = wR2*wG2*SQUARE( (dr+ 0) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+ 0) );
+	lowhightable[1] = wR2*wG2*SQUARE( (dr+kr) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+kr) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+ 0) );
+	lowhightable[2] = wR2*wG2*SQUARE( (dr+ 0) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+kg) - (db+ 0) );
+	lowhightable[3] = wR2*wG2*SQUARE( (dr+ 0) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+kb) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+kb) );
+	lowhightable[4] = wR2*wG2*SQUARE( (dr+kr) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+kr) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+kg) - (db+ 0) );
+	lowhightable[5] = wR2*wG2*SQUARE( (dr+kr) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+kr) - (db+kb) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+kb) );
+	lowhightable[6] = wR2*wG2*SQUARE( (dr+ 0) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+kb) ) + wG2*wB2*SQUARE( (dg+kg) - (db+kb) );
+	lowhightable[7] = wR2*wG2*SQUARE( (dr+kr) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+kr) - (db+kb) ) + wG2*wB2*SQUARE( (dg+kg) - (db+kb) );
+
+	float min_value = lowhightable[0];
+	int min_index = 0;
+
+	for(q = 1; q<8; q++)
+	{
+		if(lowhightable[q] < min_value)
+		{
+			min_value = lowhightable[q];
+			min_index = q;
+		}
+	}
+
+  float drh = red_high-red_average;
+	float dgh = green_high-green_average;
+	float dbh = blue_high-blue_average;
+
+	low_color[0] = red_4bit_low;
+	low_color[1] = green_4bit_low;
+	low_color[2] = blue_4bit_low;
+
+	high_color[0] = red_4bit_high;
+	high_color[1] = green_4bit_high;
+	high_color[2] = blue_4bit_high;
+
+	switch(min_index)
+	{
+	case 0:
+		// Since the step size is always 17 in RGB444 format (15*17=255),
+		// kr = kg = kb = 17, which means that case 0 and case 7 will
+		// always have equal projected error. Choose the one that is
+		// closer to the desired color. 
+		if(dr*dr + dg*dg + db*db > 3*8*8)
+		{
+			enc_color[0] = high_color[0];
+			enc_color[1] = high_color[1];
+			enc_color[2] = high_color[2];
+		}
+		else
+		{
+			enc_color[0] = low_color[0];
+			enc_color[1] = low_color[1];
+			enc_color[2] = low_color[2];
+		}
+		break;
+	case 1:
+		enc_color[0] = high_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 2:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 3:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 4:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 5:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 6:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 7:	
+		if(dr*dr + dg*dg + db*db > 3*8*8)
+		{
+			enc_color[0] = high_color[0];
+			enc_color[1] = high_color[1];
+			enc_color[2] = high_color[2];
+		}
+		else
+		{
+			enc_color[0] = low_color[0];
+			enc_color[1] = low_color[1];
+			enc_color[2] = low_color[2];
+		}
+		break;
+	}
+	// Expand 5-bit encoded color to 8-bit color
+	avg_color[0] = (enc_color[0] << 3) | (enc_color[0] >> 2);
+	avg_color[1] = (enc_color[1] << 3) | (enc_color[1] >> 2);
+	avg_color[2] = (enc_color[2] << 3) | (enc_color[2] >> 2);	
+}
+
+// The below code quantizes a float RGB value to RGB555. 
+//
+// The format often allows a pixel to completely compensate an intensity error of the base
+// color. Hence the closest RGB555 point may not be the best, and the code below uses
+// this fact to find a better RGB555 color as the base color.
+//
+// (See the presentation http://www.jacobstrom.com/publications/PACKMAN.ppt for more info.) 
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void quantize555ColorCombined(float *avg_col_in, int *enc_color, uint8 *avg_color)
+{
+	float dr, dg, db;
+	float kr, kg, kb;
+	float wR2, wG2, wB2;
+	uint8 low_color[3];
+	uint8 high_color[3];
+	float min_error=255*255*8*3;
+	float lowhightable[8];
+	unsigned int best_table=0;
+	unsigned int best_index=0;
+	int q;
+	float kval = (float) (255.0/31.0);
+
+	// These are the values that we want to have:
+	float red_average, green_average, blue_average;
+
+	int red_5bit_low, green_5bit_low, blue_5bit_low;
+	int red_5bit_high, green_5bit_high, blue_5bit_high;
+	
+	// These are the values that we approximate with:
+	int red_low, green_low, blue_low;
+	int red_high, green_high, blue_high;
+
+	red_average = avg_col_in[0];
+	green_average = avg_col_in[1];
+	blue_average = avg_col_in[2];
+
+	// Find the 5-bit reconstruction levels red_low, red_high
+	// so that red_average is in interval [red_low, red_high].
+	// (The same with green and blue.)
+
+	red_5bit_low = (int) (red_average/kval);
+	green_5bit_low = (int) (green_average/kval);
+	blue_5bit_low = (int) (blue_average/kval);
+
+	red_5bit_high = CLAMP(0, red_5bit_low + 1, 31);
+	green_5bit_high  = CLAMP(0, green_5bit_low + 1, 31);
+	blue_5bit_high = CLAMP(0, blue_5bit_low + 1, 31);
+
+	red_low   = (red_5bit_low << 3) | (red_5bit_low >> 2);
+	green_low = (green_5bit_low << 3) | (green_5bit_low >> 2);
+	blue_low = (blue_5bit_low << 3) | (blue_5bit_low >> 2);
+
+	red_high   = (red_5bit_high << 3) | (red_5bit_high >> 2);
+	green_high = (green_5bit_high << 3) | (green_5bit_high >> 2);
+	blue_high = (blue_5bit_high << 3) | (blue_5bit_high >> 2);
+
+	kr = (float)red_high - (float)red_low;
+	kg = (float)green_high - (float)green_low;
+	kb = (float)blue_high - (float)blue_low;
+
+	// Note that dr, dg, and db are all negative.
+	dr = red_low - red_average;
+	dg = green_low - green_average;
+	db = blue_low - blue_average;
+
+	// Use straight (nonperceptive) weights.
+	wR2 = (float) 1.0;
+	wG2 = (float) 1.0;
+	wB2 = (float) 1.0;
+
+	lowhightable[0] = wR2*wG2*SQUARE( (dr+ 0) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+ 0) );
+	lowhightable[1] = wR2*wG2*SQUARE( (dr+kr) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+kr) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+ 0) );
+	lowhightable[2] = wR2*wG2*SQUARE( (dr+ 0) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+kg) - (db+ 0) );
+	lowhightable[3] = wR2*wG2*SQUARE( (dr+ 0) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+kb) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+kb) );
+	lowhightable[4] = wR2*wG2*SQUARE( (dr+kr) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+kr) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+kg) - (db+ 0) );
+	lowhightable[5] = wR2*wG2*SQUARE( (dr+kr) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+kr) - (db+kb) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+kb) );
+	lowhightable[6] = wR2*wG2*SQUARE( (dr+ 0) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+kb) ) + wG2*wB2*SQUARE( (dg+kg) - (db+kb) );
+	lowhightable[7] = wR2*wG2*SQUARE( (dr+kr) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+kr) - (db+kb) ) + wG2*wB2*SQUARE( (dg+kg) - (db+kb) );
+
+	float	min_value = lowhightable[0];
+	int min_index = 0;
+
+	for(q = 1; q<8; q++)
+	{
+		if(lowhightable[q] < min_value)
+		{
+			min_value = lowhightable[q];
+			min_index = q;
+		}
+	}
+
+  float drh = red_high-red_average;
+	float dgh = green_high-green_average;
+	float dbh = blue_high-blue_average;
+
+	low_color[0] = red_5bit_low;
+	low_color[1] = green_5bit_low;
+	low_color[2] = blue_5bit_low;
+
+	high_color[0] = red_5bit_high;
+	high_color[1] = green_5bit_high;
+	high_color[2] = blue_5bit_high;
+
+	switch(min_index)
+	{
+	case 0:
+		enc_color[0] = low_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 1:
+		enc_color[0] = high_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 2:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 3:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 4:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 5:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 6:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 7:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	}
+
+	// Expand 5-bit encoded color to 8-bit color
+	avg_color[0] = (enc_color[0] << 3) | (enc_color[0] >> 2);
+	avg_color[1] = (enc_color[1] << 3) | (enc_color[1] >> 2);
+	avg_color[2] = (enc_color[2] << 3) | (enc_color[2] >> 2);
+	
+}
+
+// The below code quantizes a float RGB value to RGB444. 
+//
+// The format often allows a pixel to completely compensate an intensity error of the base
+// color. Hence the closest RGB444 point may not be the best, and the code below uses
+// this fact to find a better RGB444 color as the base color.
+//
+// (See the presentation http://www.jacobstrom.com/publications/PACKMAN.ppt for more info.) 
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void quantize444ColorCombinedPerceptual(float *avg_col_in, int *enc_color, uint8 *avg_color)
+{
+	float dr, dg, db;
+	float kr, kg, kb;
+	float wR2, wG2, wB2;
+	uint8 low_color[3];
+	uint8 high_color[3];
+	float min_error=255*255*8*3;
+	float lowhightable[8];
+	unsigned int best_table=0;
+	unsigned int best_index=0;
+	int q;
+	float kval = (float) (255.0/15.0);
+
+	// These are the values that we want to have:
+	float red_average, green_average, blue_average;
+
+	int red_4bit_low, green_4bit_low, blue_4bit_low;
+	int red_4bit_high, green_4bit_high, blue_4bit_high;
+	
+	// These are the values that we approximate with:
+	int red_low, green_low, blue_low;
+	int red_high, green_high, blue_high;
+
+	red_average = avg_col_in[0];
+	green_average = avg_col_in[1];
+	blue_average = avg_col_in[2];
+
+	// Find the 5-bit reconstruction levels red_low, red_high
+	// so that red_average is in interval [red_low, red_high].
+	// (The same with green and blue.)
+
+	red_4bit_low = (int) (red_average/kval);
+	green_4bit_low = (int) (green_average/kval);
+	blue_4bit_low = (int) (blue_average/kval);
+
+	red_4bit_high = CLAMP(0, red_4bit_low + 1, 15);
+	green_4bit_high  = CLAMP(0, green_4bit_low + 1, 15);
+	blue_4bit_high = CLAMP(0, blue_4bit_low + 1, 15);
+
+	red_low   = (red_4bit_low << 4) | (red_4bit_low >> 0);
+	green_low = (green_4bit_low << 4) | (green_4bit_low >> 0);
+	blue_low = (blue_4bit_low << 4) | (blue_4bit_low >> 0);
+
+	red_high   = (red_4bit_high << 4) | (red_4bit_high >> 0);
+	green_high = (green_4bit_high << 4) | (green_4bit_high >> 0);
+	blue_high = (blue_4bit_high << 4) | (blue_4bit_high >> 0);
+
+	low_color[0] = red_4bit_low;
+	low_color[1] = green_4bit_low;
+	low_color[2] = blue_4bit_low;
+
+	high_color[0] = red_4bit_high;
+	high_color[1] = green_4bit_high;
+	high_color[2] = blue_4bit_high;
+
+	kr = (float)red_high - (float)red_low;
+	kg = (float)green_high - (float)green_low;
+	kb = (float)blue_high- (float)blue_low;
+
+	// Note that dr, dg, and db are all negative.
+	dr = red_low - red_average;
+	dg = green_low - green_average;
+	db = blue_low - blue_average;
+
+	// Perceptual weights to use
+	wR2 = (float) PERCEPTUAL_WEIGHT_R_SQUARED; 
+	wG2 = (float) PERCEPTUAL_WEIGHT_G_SQUARED; 
+	wB2 = (float) PERCEPTUAL_WEIGHT_B_SQUARED;
+
+	lowhightable[0] = wR2*wG2*SQUARE( (dr+ 0) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+ 0) );
+	lowhightable[1] = wR2*wG2*SQUARE( (dr+kr) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+kr) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+ 0) );
+	lowhightable[2] = wR2*wG2*SQUARE( (dr+ 0) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+kg) - (db+ 0) );
+	lowhightable[3] = wR2*wG2*SQUARE( (dr+ 0) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+kb) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+kb) );
+	lowhightable[4] = wR2*wG2*SQUARE( (dr+kr) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+kr) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+kg) - (db+ 0) );
+	lowhightable[5] = wR2*wG2*SQUARE( (dr+kr) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+kr) - (db+kb) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+kb) );
+	lowhightable[6] = wR2*wG2*SQUARE( (dr+ 0) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+kb) ) + wG2*wB2*SQUARE( (dg+kg) - (db+kb) );
+	lowhightable[7] = wR2*wG2*SQUARE( (dr+kr) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+kr) - (db+kb) ) + wG2*wB2*SQUARE( (dg+kg) - (db+kb) );
+
+	float min_value = lowhightable[0];
+	int min_index = 0;
+
+	for(q = 1; q<8; q++)
+	{
+		if(lowhightable[q] < min_value)
+		{
+			min_value = lowhightable[q];
+			min_index = q;
+		}
+	}
+
+  float drh = red_high-red_average;
+	float dgh = green_high-green_average;
+	float dbh = blue_high-blue_average;
+
+	switch(min_index)
+	{
+	case 0:
+		enc_color[0] = low_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 1:
+		enc_color[0] = high_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 2:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 3:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 4:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 5:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 6:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 7:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	}
+
+	// Expand encoded color to eight bits
+	avg_color[0] = (enc_color[0] << 4) | enc_color[0];
+	avg_color[1] = (enc_color[1] << 4) | enc_color[1];
+	avg_color[2] = (enc_color[2] << 4) | enc_color[2];
+}
+
+// The below code quantizes a float RGB value to RGB555. 
+//
+// The format often allows a pixel to completely compensate an intensity error of the base
+// color. Hence the closest RGB555 point may not be the best, and the code below uses
+// this fact to find a better RGB555 color as the base color.
+//
+// (See the presentation http://www.jacobstrom.com/publications/PACKMAN.ppt for more info.) 
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void quantize555ColorCombinedPerceptual(float *avg_col_in, int *enc_color, uint8 *avg_color)
+{
+	float dr, dg, db;
+	float kr, kg, kb;
+	float wR2, wG2, wB2;
+	uint8 low_color[3];
+	uint8 high_color[3];
+	float min_error=255*255*8*3;
+	float lowhightable[8];
+	unsigned int best_table=0;
+	unsigned int best_index=0;
+	int q;
+	float kval = (float) (255.0/31.0);
+
+	// These are the values that we want to have:
+	float red_average, green_average, blue_average;
+
+	int red_5bit_low, green_5bit_low, blue_5bit_low;
+	int red_5bit_high, green_5bit_high, blue_5bit_high;
+	
+	// These are the values that we approximate with:
+	int red_low, green_low, blue_low;
+	int red_high, green_high, blue_high;
+
+	red_average = avg_col_in[0];
+	green_average = avg_col_in[1];
+	blue_average = avg_col_in[2];
+
+	// Find the 5-bit reconstruction levels red_low, red_high
+	// so that red_average is in interval [red_low, red_high].
+	// (The same with green and blue.)
+
+	red_5bit_low = (int) (red_average/kval);
+	green_5bit_low = (int) (green_average/kval);
+	blue_5bit_low = (int) (blue_average/kval);
+
+	red_5bit_high = CLAMP(0, red_5bit_low + 1, 31);
+	green_5bit_high  = CLAMP(0, green_5bit_low + 1, 31);
+	blue_5bit_high = CLAMP(0, blue_5bit_low + 1, 31);
+
+	red_low   = (red_5bit_low << 3) | (red_5bit_low >> 2);
+	green_low = (green_5bit_low << 3) | (green_5bit_low >> 2);
+	blue_low = (blue_5bit_low << 3) | (blue_5bit_low >> 2);
+
+	red_high   = (red_5bit_high << 3) | (red_5bit_high >> 2);
+	green_high = (green_5bit_high << 3) | (green_5bit_high >> 2);
+	blue_high = (blue_5bit_high << 3) | (blue_5bit_high >> 2);
+
+	low_color[0] = red_5bit_low;
+	low_color[1] = green_5bit_low;
+	low_color[2] = blue_5bit_low;
+
+	high_color[0] = red_5bit_high;
+	high_color[1] = green_5bit_high;
+	high_color[2] = blue_5bit_high;
+
+	kr = (float)red_high - (float)red_low;
+	kg = (float)green_high - (float)green_low;
+	kb = (float)blue_high - (float)blue_low;
+
+	// Note that dr, dg, and db are all negative.
+	dr = red_low - red_average;
+	dg = green_low - green_average;
+	db = blue_low - blue_average;
+
+	// Perceptual weights to use
+	wR2 = (float) PERCEPTUAL_WEIGHT_R_SQUARED; 
+	wG2 = (float) PERCEPTUAL_WEIGHT_G_SQUARED; 
+	wB2 = (float) PERCEPTUAL_WEIGHT_B_SQUARED;
+
+	lowhightable[0] = wR2*wG2*SQUARE( (dr+ 0) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+ 0) );
+	lowhightable[1] = wR2*wG2*SQUARE( (dr+kr) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+kr) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+ 0) );
+	lowhightable[2] = wR2*wG2*SQUARE( (dr+ 0) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+kg) - (db+ 0) );
+	lowhightable[3] = wR2*wG2*SQUARE( (dr+ 0) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+kb) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+kb) );
+	lowhightable[4] = wR2*wG2*SQUARE( (dr+kr) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+kr) - (db+ 0) ) + wG2*wB2*SQUARE( (dg+kg) - (db+ 0) );
+	lowhightable[5] = wR2*wG2*SQUARE( (dr+kr) - (dg+ 0) ) + wR2*wB2*SQUARE( (dr+kr) - (db+kb) ) + wG2*wB2*SQUARE( (dg+ 0) - (db+kb) );
+	lowhightable[6] = wR2*wG2*SQUARE( (dr+ 0) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+ 0) - (db+kb) ) + wG2*wB2*SQUARE( (dg+kg) - (db+kb) );
+	lowhightable[7] = wR2*wG2*SQUARE( (dr+kr) - (dg+kg) ) + wR2*wB2*SQUARE( (dr+kr) - (db+kb) ) + wG2*wB2*SQUARE( (dg+kg) - (db+kb) );
+
+	float min_value = lowhightable[0];
+	int min_index = 0;
+
+	for(q = 1; q<8; q++)
+	{
+		if(lowhightable[q] < min_value)
+		{
+			min_value = lowhightable[q];
+			min_index = q;
+		}
+	}
+
+  float drh = red_high-red_average;
+	float dgh = green_high-green_average;
+	float dbh = blue_high-blue_average;
+
+	switch(min_index)
+	{
+	case 0:
+		enc_color[0] = low_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 1:
+		enc_color[0] = high_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 2:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 3:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 4:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = low_color[2];
+		break;
+	case 5:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = low_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 6:	
+		enc_color[0] = low_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	case 7:	
+		enc_color[0] = high_color[0];
+		enc_color[1] = high_color[1];
+		enc_color[2] = high_color[2];
+		break;
+	}
+
+	// Expand 5-bit encoded color to 8-bit color
+	avg_color[0] = (enc_color[0] << 3) | (enc_color[0] >> 2);
+	avg_color[1] = (enc_color[1] << 3) | (enc_color[1] >> 2);
+	avg_color[2] = (enc_color[2] << 3) | (enc_color[2] >> 2);
+}
+
+// Compresses the block using only the individual mode in ETC1/ETC2 using the average color as the base color.
+// Uses a perceptual error metric.
+// Uses fixed point arithmetics where 1000 equals 1.0
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockOnlyIndividualAveragePerceptual1000(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, int *best_enc_color1, int*best_enc_color2, int &best_flip, unsigned int &best_err_upper, unsigned int &best_err_lower, unsigned int &best_err_left, unsigned int &best_err_right, int *best_color_upper, int *best_color_lower, int *best_color_left, int *best_color_right)
+{
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3];
+	unsigned int best_table_indices1=0, best_table_indices2=0;
+	unsigned int best_table1=0, best_table2=0;
+	int diffbit;
+
+	unsigned int norm_err=0;
+	unsigned int flip_err=0;
+	unsigned int best_err;
+
+	// First try normal blocks 2x4:
+
+	computeAverageColor2x4noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor2x4noQuantFloat(img,width,height,startx+2,starty,avg_color_float2);
+
+	enc_color1[0] = int( JAS_ROUND(15.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(15.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(15.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(15.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(15.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(15.0*avg_color_float2[2]/255.0) );
+
+	diffbit = 0;
+
+	avg_color_quant1[0] = enc_color1[0] << 4 | (enc_color1[0] );
+	avg_color_quant1[1] = enc_color1[1] << 4 | (enc_color1[1] );
+	avg_color_quant1[2] = enc_color1[2] << 4 | (enc_color1[2] );
+	avg_color_quant2[0] = enc_color2[0] << 4 | (enc_color2[0] );
+	avg_color_quant2[1] = enc_color2[1] << 4 | (enc_color2[1] );
+	avg_color_quant2[2] = enc_color2[2] << 4 | (enc_color2[2] );
+
+	// Pack bits into the first word. 
+
+	//     ETC1_RGB8_OES:
+	// 
+	//     a) bit layout in bits 63 through 32 if diffbit = 0
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+	//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	//     
+	//     b) bit layout in bits 63 through 32 if diffbit = 1
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	// 
+	//     c) bit layout in bits 31 through 0 (in both cases)
+	// 
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+	//      --------------------------------------------------------------------------------------------------
+	//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+	//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+	//      --------------------------------------------------------------------------------------------------      
+
+	compressed1_norm = 0;
+	PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[0], 4, 63);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[1], 4, 55);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[2], 4, 47);
+ 	PUTBITSHIGH( compressed1_norm, enc_color2[0], 4, 59);
+ 	PUTBITSHIGH( compressed1_norm, enc_color2[1], 4, 51);
+ 	PUTBITSHIGH( compressed1_norm, enc_color2[1], 4, 43);
+
+	unsigned int best_pixel_indices1_MSB;
+	unsigned int best_pixel_indices1_LSB;
+	unsigned int best_pixel_indices2_MSB;
+	unsigned int best_pixel_indices2_LSB;
+
+	best_enc_color1[0] = enc_color1[0];
+	best_enc_color1[1] = enc_color1[1];
+	best_enc_color1[2] = enc_color1[2];
+	best_enc_color2[0] = enc_color2[0];
+	best_enc_color2[1] = enc_color2[1];
+	best_enc_color2[2] = enc_color2[2];
+	
+	best_color_left[0] = enc_color1[0];
+	best_color_left[1] = enc_color1[1];
+	best_color_left[2] = enc_color1[2];
+	best_color_right[0] = enc_color2[0];
+	best_color_right[1] = enc_color2[1];
+	best_color_right[2] = enc_color2[2];
+
+	norm_err = 0;
+
+	// left part of block
+	best_err_left = tryalltables_3bittable2x4percep1000(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	norm_err = best_err_left;
+
+	// right part of block
+	best_err_right = tryalltables_3bittable2x4percep1000(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+	norm_err += best_err_right;
+
+ 	PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 	PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 	PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+	compressed2_norm = 0;
+	PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+	PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+	PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+	PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+
+	// Now try flipped blocks 4x2:
+
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty+2,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	enc_color1[0] = int( JAS_ROUND(15.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(15.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(15.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(15.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(15.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(15.0*avg_color_float2[2]/255.0) );
+
+	best_color_upper[0] = enc_color1[0];
+	best_color_upper[1] = enc_color1[1];
+	best_color_upper[2] = enc_color1[2];
+	best_color_lower[0] = enc_color2[0];
+	best_color_lower[1] = enc_color2[1];
+	best_color_lower[2] = enc_color2[2];
+
+	diffbit = 0;
+
+	avg_color_quant1[0] = enc_color1[0] << 4 | (enc_color1[0] );
+	avg_color_quant1[1] = enc_color1[1] << 4 | (enc_color1[1] );
+	avg_color_quant1[2] = enc_color1[2] << 4 | (enc_color1[2] );
+	avg_color_quant2[0] = enc_color2[0] << 4 | (enc_color2[0] );
+	avg_color_quant2[1] = enc_color2[1] << 4 | (enc_color2[1] );
+	avg_color_quant2[2] = enc_color2[2] << 4 | (enc_color2[2] );
+
+	// Pack bits into the first word. 
+
+	compressed1_flip = 0;
+	PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[0], 4, 63);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[1], 4, 55);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[2], 4, 47);
+ 	PUTBITSHIGH( compressed1_flip, enc_color2[0], 4, 49);
+ 	PUTBITSHIGH( compressed1_flip, enc_color2[1], 4, 51);
+ 	PUTBITSHIGH( compressed1_flip, enc_color2[2], 4, 43);
+
+	// upper part of block
+	best_err_upper = tryalltables_3bittable4x2percep1000(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	flip_err = best_err_upper;
+	// lower part of block
+	best_err_lower = tryalltables_3bittable4x2percep1000(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+	flip_err += best_err_lower;
+
+ 	PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 	PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 	PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+	best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+	best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+	
+	compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+
+	// Now lets see which is the best table to use. Only 8 tables are possible. 
+
+	if(norm_err <= flip_err)
+	{
+		compressed1 = compressed1_norm | 0;
+		compressed2 = compressed2_norm;
+		best_err = norm_err;
+		best_flip = 0;
+	}
+	else
+	{
+		compressed1 = compressed1_flip | 1;
+		compressed2 = compressed2_flip;
+		best_err = flip_err;
+		best_enc_color1[0] = enc_color1[0];
+		best_enc_color1[1] = enc_color1[1];
+		best_enc_color1[2] = enc_color1[2];
+		best_enc_color2[0] = enc_color2[0];
+		best_enc_color2[1] = enc_color2[1];
+		best_enc_color2[2] = enc_color2[2];
+		best_flip = 1;
+	}
+	return best_err;
+}
+
+// Compresses the block using only the individual mode in ETC1/ETC2 using the average color as the base color.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int compressBlockOnlyIndividualAverage(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, int *best_enc_color1, int*best_enc_color2, int &best_flip, unsigned int &best_err_upper, unsigned int &best_err_lower, unsigned int &best_err_left, unsigned int &best_err_right, int *best_color_upper, int *best_color_lower, int *best_color_left, int *best_color_right)
+{
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3];
+	int min_error=255*255*8*3;
+	unsigned int best_table_indices1=0, best_table_indices2=0;
+	unsigned int best_table1=0, best_table2=0;
+	int diffbit;
+
+	int norm_err=0;
+	int flip_err=0;
+	int best_err;
+
+	// First try normal blocks 2x4:
+
+	computeAverageColor2x4noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor2x4noQuantFloat(img,width,height,startx+2,starty,avg_color_float2);
+
+	enc_color1[0] = int( JAS_ROUND(15.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(15.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(15.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(15.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(15.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(15.0*avg_color_float2[2]/255.0) );
+
+	diffbit = 0;
+
+	avg_color_quant1[0] = enc_color1[0] << 4 | (enc_color1[0] );
+	avg_color_quant1[1] = enc_color1[1] << 4 | (enc_color1[1] );
+	avg_color_quant1[2] = enc_color1[2] << 4 | (enc_color1[2] );
+	avg_color_quant2[0] = enc_color2[0] << 4 | (enc_color2[0] );
+	avg_color_quant2[1] = enc_color2[1] << 4 | (enc_color2[1] );
+	avg_color_quant2[2] = enc_color2[2] << 4 | (enc_color2[2] );
+
+	// Pack bits into the first word. 
+
+	//     ETC1_RGB8_OES:
+	// 
+	//     a) bit layout in bits 63 through 32 if diffbit = 0
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+	//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	//     
+	//     b) bit layout in bits 63 through 32 if diffbit = 1
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	// 
+	//     c) bit layout in bits 31 through 0 (in both cases)
+	// 
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+	//      --------------------------------------------------------------------------------------------------
+	//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+	//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+	//      --------------------------------------------------------------------------------------------------      
+
+	compressed1_norm = 0;
+	PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[0], 4, 63);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[1], 4, 55);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[2], 4, 47);
+ 	PUTBITSHIGH( compressed1_norm, enc_color2[0], 4, 59);
+ 	PUTBITSHIGH( compressed1_norm, enc_color2[1], 4, 51);
+ 	PUTBITSHIGH( compressed1_norm, enc_color2[1], 4, 43);
+
+	unsigned int best_pixel_indices1_MSB;
+	unsigned int best_pixel_indices1_LSB;
+	unsigned int best_pixel_indices2_MSB;
+	unsigned int best_pixel_indices2_LSB;
+
+	best_enc_color1[0] = enc_color1[0];
+	best_enc_color1[1] = enc_color1[1];
+	best_enc_color1[2] = enc_color1[2];
+	best_enc_color2[0] = enc_color2[0];
+	best_enc_color2[1] = enc_color2[1];
+	best_enc_color2[2] = enc_color2[2];
+	best_color_left[0] = enc_color1[0];
+	best_color_left[1] = enc_color1[1];
+	best_color_left[2] = enc_color1[2];
+	best_color_right[0] = enc_color2[0];
+	best_color_right[1] = enc_color2[1];
+	best_color_right[2] = enc_color2[2];
+
+	norm_err = 0;
+
+	// left part of block
+	best_err_left = tryalltables_3bittable2x4(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	norm_err = best_err_left;
+
+	// right part of block
+	best_err_right = tryalltables_3bittable2x4(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+	norm_err += best_err_right;
+
+ 	PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 	PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 	PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+	compressed2_norm = 0;
+	PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+	PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+	PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+	PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+
+
+	// Now try flipped blocks 4x2:
+
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty+2,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	enc_color1[0] = int( JAS_ROUND(15.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(15.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(15.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(15.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(15.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(15.0*avg_color_float2[2]/255.0) );
+
+	best_color_upper[0] = enc_color1[0];
+	best_color_upper[1] = enc_color1[1];
+	best_color_upper[2] = enc_color1[2];
+	best_color_lower[0] = enc_color2[0];
+	best_color_lower[1] = enc_color2[1];
+	best_color_lower[2] = enc_color2[2];
+
+	diffbit = 0;
+
+	avg_color_quant1[0] = enc_color1[0] << 4 | (enc_color1[0] );
+	avg_color_quant1[1] = enc_color1[1] << 4 | (enc_color1[1] );
+	avg_color_quant1[2] = enc_color1[2] << 4 | (enc_color1[2] );
+	avg_color_quant2[0] = enc_color2[0] << 4 | (enc_color2[0] );
+	avg_color_quant2[1] = enc_color2[1] << 4 | (enc_color2[1] );
+	avg_color_quant2[2] = enc_color2[2] << 4 | (enc_color2[2] );
+
+	// Pack bits into the first word. 
+
+	compressed1_flip = 0;
+	PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[0], 4, 63);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[1], 4, 55);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[2], 4, 47);
+ 	PUTBITSHIGH( compressed1_flip, enc_color2[0], 4, 49);
+ 	PUTBITSHIGH( compressed1_flip, enc_color2[1], 4, 51);
+ 	PUTBITSHIGH( compressed1_flip, enc_color2[2], 4, 43);
+
+	// upper part of block
+	best_err_upper = tryalltables_3bittable4x2(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	flip_err = best_err_upper;
+	// lower part of block
+	best_err_lower = tryalltables_3bittable4x2(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+	flip_err += best_err_lower;
+
+ 	PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 	PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 	PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+	best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+	best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+	
+	compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+
+	// Now lets see which is the best table to use. Only 8 tables are possible. 
+
+	if(norm_err <= flip_err)
+	{
+		compressed1 = compressed1_norm | 0;
+		compressed2 = compressed2_norm;
+		best_err = norm_err;
+		best_flip = 0;
+	}
+	else
+	{
+		compressed1 = compressed1_flip | 1;
+		compressed2 = compressed2_flip;
+		best_err = flip_err;
+		best_enc_color1[0] = enc_color1[0];
+		best_enc_color1[1] = enc_color1[1];
+		best_enc_color1[2] = enc_color1[2];
+		best_enc_color2[0] = enc_color2[0];
+		best_enc_color2[1] = enc_color2[1];
+		best_enc_color2[2] = enc_color2[2];
+		best_flip = 1;
+	}
+	return best_err;
+}
+
+// Compresses the block using either the individual or differential mode in ETC1/ETC2
+// Uses the average color as the base color in each half-block.
+// Tries both flipped and unflipped.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockDiffFlipAverage(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3], diff[3];
+	int min_error=255*255*8*3;
+	unsigned int best_table_indices1=0, best_table_indices2=0;
+	unsigned int best_table1=0, best_table2=0;
+	int diffbit;
+
+	int norm_err=0;
+	int flip_err=0;
+
+	// First try normal blocks 2x4:
+	computeAverageColor2x4noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor2x4noQuantFloat(img,width,height,startx+2,starty,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	float eps;
+
+	enc_color1[0] = int( JAS_ROUND(31.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(31.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(31.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(31.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(31.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(31.0*avg_color_float2[2]/255.0) );
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( (diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3) )
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		//     ETC1_RGB8_OES:
+		// 
+		//     a) bit layout in bits 63 through 32 if diffbit = 0
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		//     
+		//     b) bit layout in bits 63 through 32 if diffbit = 1
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+		//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		// 
+		//     c) bit layout in bits 31 through 0 (in both cases)
+		// 
+		//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+		//      --------------------------------------------------------------------------------------------------
+		//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+		//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+		//      --------------------------------------------------------------------------------------------------      
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_norm, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_norm, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_norm, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		norm_err = 0;
+
+		// left part of block
+		norm_err = tryalltables_3bittable2x4(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+	else
+	{
+		diffbit = 0;
+		// The difference is bigger than what fits in 555 plus delta-333, so we will have
+		// to deal with 444 444.
+
+		eps = (float) 0.0001;
+
+		enc_color1[0] = int( ((float) avg_color_float1[0] / (17.0)) +0.5 + eps);
+		enc_color1[1] = int( ((float) avg_color_float1[1] / (17.0)) +0.5 + eps);
+		enc_color1[2] = int( ((float) avg_color_float1[2] / (17.0)) +0.5 + eps);
+		enc_color2[0] = int( ((float) avg_color_float2[0] / (17.0)) +0.5 + eps);
+		enc_color2[1] = int( ((float) avg_color_float2[1] / (17.0)) +0.5 + eps);
+		enc_color2[2] = int( ((float) avg_color_float2[2] / (17.0)) +0.5 + eps);
+		avg_color_quant1[0] = enc_color1[0] << 4 | enc_color1[0]; 
+		avg_color_quant1[1] = enc_color1[1] << 4 | enc_color1[1]; 
+		avg_color_quant1[2] = enc_color1[2] << 4 | enc_color1[2];
+		avg_color_quant2[0] = enc_color2[0] << 4 | enc_color2[0]; 
+		avg_color_quant2[1] = enc_color2[1] << 4 | enc_color2[1]; 
+		avg_color_quant2[2] = enc_color2[2] << 4 | enc_color2[2];
+
+		// Pack bits into the first word. 
+
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 4, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 4, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 4, 47);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[0], 4, 59);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[1], 4, 51);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[2], 4, 43);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+		
+		// left part of block
+		norm_err = tryalltables_3bittable2x4(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+
+	// Now try flipped blocks 4x2:
+
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty+2,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	enc_color1[0] = int( JAS_ROUND(31.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(31.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(31.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(31.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(31.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(31.0*avg_color_float2[2]/255.0) );
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( (diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3) )
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_flip, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_flip, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_flip, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+	else
+	{
+		diffbit = 0;
+		// The difference is bigger than what fits in 555 plus delta-333, so we will have
+		// to deal with 444 444.
+		eps = (float) 0.0001;
+
+		enc_color1[0] = int( ((float) avg_color_float1[0] / (17.0)) +0.5 + eps);
+		enc_color1[1] = int( ((float) avg_color_float1[1] / (17.0)) +0.5 + eps);
+		enc_color1[2] = int( ((float) avg_color_float1[2] / (17.0)) +0.5 + eps);
+		enc_color2[0] = int( ((float) avg_color_float2[0] / (17.0)) +0.5 + eps);
+		enc_color2[1] = int( ((float) avg_color_float2[1] / (17.0)) +0.5 + eps);
+		enc_color2[2] = int( ((float) avg_color_float2[2] / (17.0)) +0.5 + eps);
+
+		avg_color_quant1[0] = enc_color1[0] << 4 | enc_color1[0]; 
+		avg_color_quant1[1] = enc_color1[1] << 4 | enc_color1[1]; 
+		avg_color_quant1[2] = enc_color1[2] << 4 | enc_color1[2];
+		avg_color_quant2[0] = enc_color2[0] << 4 | enc_color2[0]; 
+		avg_color_quant2[1] = enc_color2[1] << 4 | enc_color2[1]; 
+		avg_color_quant2[2] = enc_color2[2] << 4 | enc_color2[2];
+
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+
+
+		// Pack bits into the first word. 
+
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 4, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 4, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 4, 47);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[0], 4, 59);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[1], 4, 51);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[2], 4, 43);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+
+	// Now lets see which is the best table to use. Only 8 tables are possible. 
+
+	if(norm_err <= flip_err)
+	{
+		compressed1 = compressed1_norm | 0;
+		compressed2 = compressed2_norm;
+	}
+	else
+	{
+		compressed1 = compressed1_flip | 1;
+		compressed2 = compressed2_flip;
+	}
+}
+
+// Compresses the block using only the differential mode in ETC1/ETC2
+// Uses the average color as the base color in each half-block.
+// If average colors are too different, use the average color of the entire block in both half-blocks.
+// Tries both flipped and unflipped.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int compressBlockOnlyDiffFlipAverage(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, int *best_enc_color1, int*best_enc_color2, int &best_flip)
+{
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3], diff[3];
+	int min_error=255*255*8*3;
+	unsigned int best_table_indices1=0, best_table_indices2=0;
+	unsigned int best_table1=0, best_table2=0;
+	int diffbit;
+
+	int norm_err=0;
+	int flip_err=0;
+	int best_err;
+
+	// First try normal blocks 2x4:
+
+	computeAverageColor2x4noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor2x4noQuantFloat(img,width,height,startx+2,starty,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	enc_color1[0] = int( JAS_ROUND(31.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(31.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(31.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(31.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(31.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(31.0*avg_color_float2[2]/255.0) );
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( !((diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3)) )
+	{
+		// The colors are too different. Use the same color in both blocks.
+		enc_color1[0] = int( JAS_ROUND(31.0*((avg_color_float1[0]+avg_color_float2[0])/2.0)/255.0) );
+		enc_color1[1] = int( JAS_ROUND(31.0*((avg_color_float1[1]+avg_color_float2[1])/2.0)/255.0) );
+		enc_color1[2] = int( JAS_ROUND(31.0*((avg_color_float1[2]+avg_color_float2[2])/2.0)/255.0) );
+		enc_color2[0] = enc_color1[0];
+		enc_color2[1] = enc_color1[1];
+		enc_color2[2] = enc_color1[2];
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+	}
+
+	diffbit = 1;
+
+	// The difference to be coded:
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+	avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+	avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+	avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+	avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+	avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+	// Pack bits into the first word. 
+
+	//     ETC1_RGB8_OES:
+	// 
+	//     a) bit layout in bits 63 through 32 if diffbit = 0
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+	//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	//     
+	//     b) bit layout in bits 63 through 32 if diffbit = 1
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	// 
+	//     c) bit layout in bits 31 through 0 (in both cases)
+	// 
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+	//      --------------------------------------------------------------------------------------------------
+	//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+	//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+	//      --------------------------------------------------------------------------------------------------      
+
+	compressed1_norm = 0;
+	PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[0], 5, 63);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[1], 5, 55);
+ 	PUTBITSHIGH( compressed1_norm, enc_color1[2], 5, 47);
+ 	PUTBITSHIGH( compressed1_norm, diff[0],       3, 58);
+ 	PUTBITSHIGH( compressed1_norm, diff[1],       3, 50);
+ 	PUTBITSHIGH( compressed1_norm, diff[2],       3, 42);
+
+	unsigned int best_pixel_indices1_MSB;
+	unsigned int best_pixel_indices1_LSB;
+	unsigned int best_pixel_indices2_MSB;
+	unsigned int best_pixel_indices2_LSB;
+
+	best_enc_color1[0] = enc_color1[0];
+	best_enc_color1[1] = enc_color1[1];
+	best_enc_color1[2] = enc_color1[2];
+	best_enc_color2[0] = enc_color2[0];
+	best_enc_color2[1] = enc_color2[1];
+	best_enc_color2[2] = enc_color2[2];
+
+	norm_err = 0;
+
+	// left part of block
+	norm_err = tryalltables_3bittable2x4(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+	// right part of block
+	norm_err += tryalltables_3bittable2x4(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 	PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 	PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 	PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+	compressed2_norm = 0;
+	PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+	PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+	PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+	PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+
+	// Now try flipped blocks 4x2:
+
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty+2,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	enc_color1[0] = int( JAS_ROUND(31.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(31.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(31.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(31.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(31.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(31.0*avg_color_float2[2]/255.0) );
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( !((diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3)) )
+	{
+		// The colors are too different. Use the same color in both blocks.
+		enc_color1[0] = int( JAS_ROUND(31.0*((avg_color_float1[0]+avg_color_float2[0])/2.0)/255.0) );
+		enc_color1[1] = int( JAS_ROUND(31.0*((avg_color_float1[1]+avg_color_float2[1])/2.0)/255.0) );
+		enc_color1[2] = int( JAS_ROUND(31.0*((avg_color_float1[2]+avg_color_float2[2])/2.0)/255.0) );
+		enc_color2[0] = enc_color1[0];
+		enc_color2[1] = enc_color1[1];
+		enc_color2[2] = enc_color1[2];
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+	}
+	diffbit = 1;
+
+	// The difference to be coded:
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+	avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+	avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+	avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+	avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+	avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+	// Pack bits into the first word. 
+
+	compressed1_flip = 0;
+	PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[0], 5, 63);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[1], 5, 55);
+ 	PUTBITSHIGH( compressed1_flip, enc_color1[2], 5, 47);
+ 	PUTBITSHIGH( compressed1_flip, diff[0],       3, 58);
+ 	PUTBITSHIGH( compressed1_flip, diff[1],       3, 50);
+ 	PUTBITSHIGH( compressed1_flip, diff[2],       3, 42);
+
+	// upper part of block
+	flip_err = tryalltables_3bittable4x2(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	// lower part of block
+	flip_err += tryalltables_3bittable4x2(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 	PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 	PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 	PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+	best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+	best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+	
+	compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+
+	// Now lets see which is the best table to use. Only 8 tables are possible. 
+
+	if(norm_err <= flip_err)
+	{
+		compressed1 = compressed1_norm | 0;
+		compressed2 = compressed2_norm;
+		best_err = norm_err;
+		best_flip = 0;
+	}
+	else
+	{
+		compressed1 = compressed1_flip | 1;
+		compressed2 = compressed2_flip;
+		best_err = flip_err;
+		best_enc_color1[0] = enc_color1[0];
+		best_enc_color1[1] = enc_color1[1];
+		best_enc_color1[2] = enc_color1[2];
+		best_enc_color2[0] = enc_color2[0];
+		best_enc_color2[1] = enc_color2[1];
+		best_enc_color2[2] = enc_color2[2];
+		best_flip = 1;
+	}
+	return best_err;
+}
+
+// Compresses the block using only the differential mode in ETC1/ETC2
+// Uses the average color as the base color in each half-block.
+// If average colors are too different, use the average color of the entire block in both half-blocks.
+// Tries both flipped and unflipped.
+// Uses fixed point arithmetics where 1000 represents 1.0.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockOnlyDiffFlipAveragePerceptual1000(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3], diff[3];
+	unsigned int min_error=MAXERR1000;
+	unsigned int best_table_indices1=0, best_table_indices2=0;
+	unsigned int best_table1=0, best_table2=0;
+	int diffbit;
+
+	int norm_err=0;
+	int flip_err=0;
+
+	// First try normal blocks 2x4:
+
+	computeAverageColor2x4noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor2x4noQuantFloat(img,width,height,startx+2,starty,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	enc_color1[0] = int( JAS_ROUND(31.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(31.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(31.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(31.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(31.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(31.0*avg_color_float2[2]/255.0) );
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( !((diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3)) )
+	{
+		enc_color1[0] = (enc_color1[0] + enc_color2[0]) >> 1;
+		enc_color1[1] = (enc_color1[1] + enc_color2[1]) >> 1;
+		enc_color1[2] = (enc_color1[2] + enc_color2[2]) >> 1;
+
+		enc_color2[0] = enc_color1[0];
+		enc_color2[1] = enc_color1[1];
+		enc_color2[2] = enc_color1[2];
+
+	}
+
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		//     ETC1_RGB8_OES:
+		// 
+		//     a) bit layout in bits 63 through 32 if diffbit = 0
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		//     
+		//     b) bit layout in bits 63 through 32 if diffbit = 1
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+		//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		// 
+		//     c) bit layout in bits 31 through 0 (in both cases)
+		// 
+		//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+		//      --------------------------------------------------------------------------------------------------
+		//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+		//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+		//      --------------------------------------------------------------------------------------------------      
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_norm, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_norm, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_norm, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		norm_err = 0;
+
+		// left part of block 
+		norm_err = tryalltables_3bittable2x4percep1000(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4percep1000(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+
+	}
+	// Now try flipped blocks 4x2:
+
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty+2,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	enc_color1[0] = int( JAS_ROUND(31.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(31.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(31.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(31.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(31.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(31.0*avg_color_float2[2]/255.0) );
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( !((diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3)) )
+	{
+		enc_color1[0] = (enc_color1[0] + enc_color2[0]) >> 1;
+		enc_color1[1] = (enc_color1[1] + enc_color2[1]) >> 1;
+		enc_color1[2] = (enc_color1[2] + enc_color2[2]) >> 1;
+
+		enc_color2[0] = enc_color1[0];
+		enc_color2[1] = enc_color1[1];
+		enc_color2[2] = enc_color1[2];
+	}
+
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_flip, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_flip, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_flip, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2percep1000(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2percep1000(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+	unsigned int best_err;
+
+	if(norm_err <= flip_err)
+	{
+		compressed1 = compressed1_norm | 0;
+		compressed2 = compressed2_norm;
+		best_err = norm_err;
+	}
+	else
+	{
+		compressed1 = compressed1_flip | 1;
+		compressed2 = compressed2_flip;
+		best_err = flip_err;
+	}
+	return best_err;
+}
+
+// Compresses the block using both the individual and the differential mode in ETC1/ETC2
+// Uses the average color as the base color in each half-block.
+// Uses a perceptual error metric.
+// Tries both flipped and unflipped.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockDiffFlipAveragePerceptual(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3], diff[3];
+	int min_error=255*255*8*3;
+	unsigned int best_table_indices1=0, best_table_indices2=0;
+	unsigned int best_table1=0, best_table2=0;
+	int diffbit;
+
+	int norm_err=0;
+	int flip_err=0;
+
+	// First try normal blocks 2x4:
+
+	computeAverageColor2x4noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor2x4noQuantFloat(img,width,height,startx+2,starty,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	float eps;
+
+	enc_color1[0] = int( JAS_ROUND(31.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(31.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(31.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(31.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(31.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(31.0*avg_color_float2[2]/255.0) );
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( (diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3) )
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		//     ETC1_RGB8_OES:
+		// 
+		//     a) bit layout in bits 63 through 32 if diffbit = 0
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		//     
+		//     b) bit layout in bits 63 through 32 if diffbit = 1
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+		//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		// 
+		//     c) bit layout in bits 31 through 0 (in both cases)
+		// 
+		//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+		//      --------------------------------------------------------------------------------------------------
+		//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+		//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+		//      --------------------------------------------------------------------------------------------------      
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_norm, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_norm, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_norm, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		norm_err = 0;
+
+		// left part of block 
+		norm_err = tryalltables_3bittable2x4percep(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4percep(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+	else
+	{
+		diffbit = 0;
+		// The difference is bigger than what fits in 555 plus delta-333, so we will have
+		// to deal with 444 444.
+
+		eps = (float) 0.0001;
+
+		enc_color1[0] = int( ((float) avg_color_float1[0] / (17.0)) +0.5 + eps);
+		enc_color1[1] = int( ((float) avg_color_float1[1] / (17.0)) +0.5 + eps);
+		enc_color1[2] = int( ((float) avg_color_float1[2] / (17.0)) +0.5 + eps);
+		enc_color2[0] = int( ((float) avg_color_float2[0] / (17.0)) +0.5 + eps);
+		enc_color2[1] = int( ((float) avg_color_float2[1] / (17.0)) +0.5 + eps);
+		enc_color2[2] = int( ((float) avg_color_float2[2] / (17.0)) +0.5 + eps);
+		avg_color_quant1[0] = enc_color1[0] << 4 | enc_color1[0]; 
+		avg_color_quant1[1] = enc_color1[1] << 4 | enc_color1[1]; 
+		avg_color_quant1[2] = enc_color1[2] << 4 | enc_color1[2];
+		avg_color_quant2[0] = enc_color2[0] << 4 | enc_color2[0]; 
+		avg_color_quant2[1] = enc_color2[1] << 4 | enc_color2[1]; 
+		avg_color_quant2[2] = enc_color2[2] << 4 | enc_color2[2];
+	
+		// Pack bits into the first word. 
+
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 4, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 4, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 4, 47);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[0], 4, 59);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[1], 4, 51);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[2], 4, 43);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+		
+		// left part of block
+		norm_err = tryalltables_3bittable2x4percep(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4percep(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+
+	// Now try flipped blocks 4x2:
+
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty+2,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	enc_color1[0] = int( JAS_ROUND(31.0*avg_color_float1[0]/255.0) );
+	enc_color1[1] = int( JAS_ROUND(31.0*avg_color_float1[1]/255.0) );
+	enc_color1[2] = int( JAS_ROUND(31.0*avg_color_float1[2]/255.0) );
+	enc_color2[0] = int( JAS_ROUND(31.0*avg_color_float2[0]/255.0) );
+	enc_color2[1] = int( JAS_ROUND(31.0*avg_color_float2[1]/255.0) );
+	enc_color2[2] = int( JAS_ROUND(31.0*avg_color_float2[2]/255.0) );
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( (diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3) )
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_flip, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_flip, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_flip, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2percep(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2percep(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+	else
+	{
+		diffbit = 0;
+		// The difference is bigger than what fits in 555 plus delta-333, so we will have
+		// to deal with 444 444.
+		eps = (float) 0.0001;
+
+		enc_color1[0] = int( ((float) avg_color_float1[0] / (17.0)) +0.5 + eps);
+		enc_color1[1] = int( ((float) avg_color_float1[1] / (17.0)) +0.5 + eps);
+		enc_color1[2] = int( ((float) avg_color_float1[2] / (17.0)) +0.5 + eps);
+		enc_color2[0] = int( ((float) avg_color_float2[0] / (17.0)) +0.5 + eps);
+		enc_color2[1] = int( ((float) avg_color_float2[1] / (17.0)) +0.5 + eps);
+		enc_color2[2] = int( ((float) avg_color_float2[2] / (17.0)) +0.5 + eps);
+
+		avg_color_quant1[0] = enc_color1[0] << 4 | enc_color1[0]; 
+		avg_color_quant1[1] = enc_color1[1] << 4 | enc_color1[1]; 
+		avg_color_quant1[2] = enc_color1[2] << 4 | enc_color1[2];
+		avg_color_quant2[0] = enc_color2[0] << 4 | enc_color2[0]; 
+		avg_color_quant2[1] = enc_color2[1] << 4 | enc_color2[1]; 
+		avg_color_quant2[2] = enc_color2[2] << 4 | enc_color2[2];
+
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+
+		// Pack bits into the first word. 
+
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 4, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 4, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 4, 47);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[0], 4, 59);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[1], 4, 51);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[2], 4, 43);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2percep(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2percep(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+
+	// Now lets see which is the best table to use. Only 8 tables are possible. 
+	
+	double best_err;
+
+	if(norm_err <= flip_err)
+	{
+		compressed1 = compressed1_norm | 0;
+		compressed2 = compressed2_norm;
+		best_err = norm_err;
+	}
+	else
+	{
+		compressed1 = compressed1_flip | 1;
+		compressed2 = compressed2_flip;
+		best_err = flip_err;
+	}
+	return best_err;
+}
+
+// This is our structure for matrix data
+struct dMatrix
+{
+	int width;			// The number of coloumns in the matrix
+	int height;			// The number of rows in the matrix
+	double *data;		// The matrix data in row order
+};
+
+// Multiplies two matrices
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+dMatrix *multiplyMatrices( dMatrix *Amat, dMatrix *Bmat)
+{
+	int xx,yy, q;
+	dMatrix *resmatrix;
+
+	if(Amat->width != Bmat->height)
+	{
+		printf("Cannot multiply matrices -- dimensions do not agree.\n");
+		exit(1);
+	}
+
+	// Allocate space for result
+	resmatrix = (dMatrix*) malloc(sizeof(dMatrix));
+	resmatrix->width = Bmat->width;
+	resmatrix->height = Amat->height;
+	resmatrix->data = (double*) malloc(sizeof(double)*(resmatrix->width)*(resmatrix->height));
+
+	for(yy = 0; yy<resmatrix->height; yy++)
+		for(xx = 0; xx<resmatrix->width; xx++)
+			for(q=0, resmatrix->data[yy*resmatrix->width+xx] = 0.0; q<Amat->width; q++)
+				resmatrix->data[yy*resmatrix->width+xx] += Amat->data[yy*Amat->width + q] * Bmat->data[q*Bmat->width+xx];
+
+	return(resmatrix);
+
+}
+
+// Transposes a matrix
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void transposeMatrix( dMatrix *mat)
+{
+	int xx, yy, zz;
+	double *temp;
+	int newwidth, newheight;
+
+	temp = (double*) malloc (sizeof(double)*(mat->width)*(mat->height));
+
+	for(zz = 0; zz<((mat->width)*(mat->height)); zz++)
+		temp[zz] = mat->data[zz];
+
+	newwidth = mat->height;
+	newheight= mat->width;
+
+	for(yy = 0; yy<newheight; yy++)
+ 		for(xx = 0; xx<newwidth; xx++)
+ 			mat->data[yy*newwidth+xx] = temp[xx*(mat->width)+yy];
+
+	mat->height = newheight;
+	mat->width = newwidth;
+	free(temp);
+}
+
+// In the planar mode in ETC2, the block can be partitioned as follows:
+// 
+// O A  A  A  H
+// B D1 D3 C3
+// B D2 C2 D5
+// B C1 D4 D6
+// V
+// Here A-pixels, B-pixels and C-pixels only depend on two values. For instance, B-pixels only depend on O and V.
+// This can be used to quickly rule out combinations of colors.
+// Here we calculate the minimum error for the block if we know the red component for O and V.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcBBBred(uint8 *block, int colorO, int colorV)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error = 0;
+	
+	// Now first column: B B B 
+	/* unroll loop for( yy=0; (yy<4) && (error <= best_error_sofar); yy++)*/
+	{
+		error = error + square_table[(block[4*4 + 0] - clamp_table[ ((((colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*2 + 0] - clamp_table[ (((((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*3 + 0] - clamp_table[ (((3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+
+	return error;
+}
+
+// Calculating the minimum error for the block if we know the red component for H and V.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcCCCred(uint8 *block, int colorH, int colorV)
+{
+    colorH = (colorH << 2) | (colorH >> 4);
+    colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error=0;
+
+	error = error + square_table[(block[4*4*3 + 4 + 0] - clamp_table[ (((colorH + 3*colorV)+2)>>2) + 255])+255];
+	error = error + square_table[(block[4*4*2 + 4*2 + 0] - clamp_table[ (((2*colorH + 2*colorV)+2)>>2) + 255])+255];
+	error = error + square_table[(block[4*4 + 4*3 + 0] - clamp_table[ (((3*colorH + colorV)+2)>>2) + 255])+255];
+	
+	return error;
+}
+
+// Calculating the minimum error for the block if we know the red component for O and H.
+// Uses perceptual error metric.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcLowestPossibleRedOHperceptual(uint8 *block, int colorO, int colorH, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorH = (colorH << 2) | (colorH >> 4);
+
+	unsigned int error;
+
+	error = square_table_percep_red[(block[0] - colorO) + 255];
+	error = error + square_table_percep_red[(block[4] - clamp_table[ (((   (colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	if(error <= best_error_sofar)
+	{
+		error = error + square_table_percep_red[(block[4*2] - clamp_table[ (((  ((colorH-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table_percep_red[(block[4*3] - clamp_table[ ((( 3*(colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+	
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the red component for O and H.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcLowestPossibleRedOH(uint8 *block, int colorO, int colorH, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorH = (colorH << 2) | (colorH >> 4);
+
+	unsigned int error;
+
+	error = square_table[(block[0] - colorO) + 255];
+	error = error + square_table[(block[4] - clamp_table[ (((   (colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	if(error <= best_error_sofar)
+	{
+		error = error + square_table[(block[4*2] - clamp_table[ (((  ((colorH-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*3] - clamp_table[ ((( 3*(colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+	
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the red component for O and H and V.
+// Uses perceptual error metric. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcErrorPlanarOnlyRedPerceptual(uint8 *block, int colorO, int colorH, int colorV, unsigned int lowest_possible_error, unsigned int BBBvalue, unsigned int CCCvalue, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorH = (colorH << 2) | (colorH >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error;
+
+	// The block can be partitioned into: O A  A  A
+	//                                    B D1 D3 C3
+	//                                    B D2 C2 D5
+	//                                    B C1 D4 D6
+	int xpart_times_4;
+
+	// The first part: O A A A. It equals lowest_possible_error previously calculated. 
+	// lowest_possible_error is OAAA, BBBvalue is BBB and CCCvalue is C1C2C3.
+	error = lowest_possible_error + BBBvalue + CCCvalue;
+
+	// The remaining pixels to cover are D1 through D6.
+	if(error <= best_error_sofar)
+	{
+		// Second column: D1 D2  but not C1
+		xpart_times_4 = (colorH-colorO);
+		error = error + square_table_percep_red[(block[4*4 + 4 + 0] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table_percep_red[(block[4*4*2 + 4 + 0] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		// Third column: D3 notC2 D4
+		xpart_times_4 = (colorH-colorO) << 1;
+		error = error + square_table_percep_red[(block[4*4 + 4*2 + 0] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		if(error <= best_error_sofar)
+		{
+			error = error + square_table_percep_red[(block[4*4*3 + 4*2 + 0] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+			// Forth column: notC3 D5 D6
+			xpart_times_4 = 3*(colorH-colorO);
+			error = error + square_table_percep_red[(block[4*4*2 + 4*3 + 0] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+			error = error + square_table_percep_red[(block[4*4*3 + 4*3 + 0] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		}
+	}
+	return error;
+} 
+
+// Calculating the minimum error for the block (in planar mode) if we know the red component for O and H and V.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcErrorPlanarOnlyRed(uint8 *block, int colorO, int colorH, int colorV, unsigned int lowest_possible_error, unsigned int BBBvalue, unsigned int CCCvalue, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorH = (colorH << 2) | (colorH >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error;
+
+	// The block can be partitioned into: O A  A  A
+	//                                    B D1 D3 C3
+	//                                    B D2 C2 D5
+	//                                    B C1 D4 D6
+	int xpart_times_4;
+
+	// The first part: O A A A. It equals lowest_possible_error previously calculated. 
+	// lowest_possible_error is OAAA, BBBvalue is BBB and CCCvalue is C1C2C3.
+	error = lowest_possible_error + BBBvalue + CCCvalue;
+
+	// The remaining pixels to cover are D1 through D6.
+	if(error <= best_error_sofar)
+	{
+		// Second column: D1 D2  but not C1
+		xpart_times_4 = (colorH-colorO);
+		error = error + square_table[(block[4*4 + 4 + 0] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*2 + 4 + 0] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		// Third column: D3 notC2 D4
+		xpart_times_4 = (colorH-colorO) << 1;
+		error = error + square_table[(block[4*4 + 4*2 + 0] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		if(error <= best_error_sofar)
+		{
+			error = error + square_table[(block[4*4*3 + 4*2 + 0] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+			// Forth column: notC3 D5 D6
+			xpart_times_4 = 3*(colorH-colorO);
+			error = error + square_table[(block[4*4*2 + 4*3 + 0] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+			error = error + square_table[(block[4*4*3 + 4*3 + 0] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		}
+	}
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the red component for O and H.
+// Uses perceptual error metrics.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcLowestPossibleGreenOHperceptual(uint8 *block, int colorO, int colorH, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 1) | (colorO >> 6);
+	colorH = (colorH << 1) | (colorH >> 6);
+
+	unsigned int error;
+
+	error = square_table_percep_green[(block[1] - colorO) + 255];
+	error = error + square_table_percep_green[(block[4 + 1] - clamp_table[ (((   (colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	if(error <= best_error_sofar)
+	{
+		error = error + square_table_percep_green[(block[4*2 + 1] - clamp_table[ (((  ((colorH-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table_percep_green[(block[4*3 + 1] - clamp_table[ ((( 3*(colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the red component for O and H.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcLowestPossibleGreenOH(uint8 *block, int colorO, int colorH, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 1) | (colorO >> 6);
+	colorH = (colorH << 1) | (colorH >> 6);
+
+	unsigned int error;
+
+	error = square_table[(block[1] - colorO) + 255];
+	error = error + square_table[(block[4 + 1] - clamp_table[ (((   (colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	if(error <= best_error_sofar)
+	{
+		error = error + square_table[(block[4*2 + 1] - clamp_table[ (((  ((colorH-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*3 + 1] - clamp_table[ ((( 3*(colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the green component for O and V.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcBBBgreen(uint8 *block, int colorO, int colorV)
+{
+	colorO = (colorO << 1) | (colorO >> 6);
+	colorV = (colorV << 1) | (colorV >> 6);
+
+	unsigned int error = 0;
+	
+	// Now first column: B B B 
+	/* unroll loop for( yy=0; (yy<4) && (error <= best_error_sofar); yy++)*/
+	{
+		error = error + square_table[(block[4*4 + 1] - clamp_table[ ((((colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*2 + 1] - clamp_table[ (((((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*3 + 1] - clamp_table[ (((3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+
+	return error;
+
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the green component for H and V.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcCCCgreen(uint8 *block, int colorH, int colorV)
+{
+	colorH = (colorH << 1) | (colorH >> 6);
+	colorV = (colorV << 1) | (colorV >> 6);
+
+	unsigned int error=0;
+
+	error = error + square_table[(block[4*4*3 + 4 + 1] - clamp_table[ (((colorH + 3*colorV)+2)>>2) + 255])+255];
+	error = error + square_table[(block[4*4*2 + 4*2 + 1] - clamp_table[ (((2*colorH + 2*colorV)+2)>>2) + 255])+255];
+	error = error + square_table[(block[4*4 + 4*3 + 1] - clamp_table[ (((3*colorH + colorV)+2)>>2) + 255])+255];
+	
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the green component for H V and O.
+// Uses perceptual error metric.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcErrorPlanarOnlyGreenPerceptual(uint8 *block, int colorO, int colorH, int colorV, unsigned int lowest_possible_error, unsigned int BBBvalue, unsigned int CCCvalue, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 1) | (colorO >> 6);
+	colorH = (colorH << 1) | (colorH >> 6);
+	colorV = (colorV << 1) | (colorV >> 6);
+
+	unsigned int error;
+
+	// The block can be partitioned into: O A  A  A
+	//                                    B D1 D3 C3
+	//                                    B D2 C2 D5
+	//                                    B C1 D4 D6
+
+	int xpart_times_4;
+
+	// The first part: O A A A. It equals lowest_possible_error previously calculated. 
+	// lowest_possible_error is OAAA, BBBvalue is BBB and CCCvalue is C1C2C3.
+	error = lowest_possible_error + BBBvalue + CCCvalue;
+
+	// The remaining pixels to cover are D1 through D6.
+	if(error <= best_error_sofar)
+	{
+		// Second column: D1 D2  but not C1
+		xpart_times_4 = (colorH-colorO);
+		error = error + square_table_percep_green[(block[4*4 + 4 + 1] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table_percep_green[(block[4*4*2 + 4 + 1] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		// Third column: D3 notC2 D4
+		xpart_times_4 = (colorH-colorO) << 1;
+		error = error + square_table_percep_green[(block[4*4 + 4*2 + 1] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		if(error <= best_error_sofar)
+		{
+			error = error + square_table_percep_green[(block[4*4*3 + 4*2 + 1] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+			// Forth column: notC3 D5 D6
+			xpart_times_4 = 3*(colorH-colorO);
+			error = error + square_table_percep_green[(block[4*4*2 + 4*3 + 1] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+			error = error + square_table_percep_green[(block[4*4*3 + 4*3 + 1] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		}
+	}
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the green component for H V and O.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcErrorPlanarOnlyGreen(uint8 *block, int colorO, int colorH, int colorV, unsigned int lowest_possible_error, unsigned int BBBvalue, unsigned int CCCvalue, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 1) | (colorO >> 6);
+	colorH = (colorH << 1) | (colorH >> 6);
+	colorV = (colorV << 1) | (colorV >> 6);
+
+	unsigned int error;
+
+	// The block can be partitioned into: O A  A  A
+	//                                    B D1 D3 C3
+	//                                    B D2 C2 D5
+	//                                    B C1 D4 D6
+	int xpart_times_4;
+
+	// The first part: O A A A. It equals lowest_possible_error previously calculated. 
+	// lowest_possible_error is OAAA, BBBvalue is BBB and CCCvalue is C1C2C3.
+	error = lowest_possible_error + BBBvalue + CCCvalue;
+
+	// The remaining pixels to cover are D1 through D6.
+	if(error <= best_error_sofar)
+	{
+		// Second column: D1 D2  but not C1
+		xpart_times_4 = (colorH-colorO);
+		error = error + square_table[(block[4*4 + 4 + 1] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*2 + 4 + 1] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		// Third column: D3 notC2 D4
+		xpart_times_4 = (colorH-colorO) << 1;
+		error = error + square_table[(block[4*4 + 4*2 + 1] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		if(error <= best_error_sofar)
+		{
+			error = error + square_table[(block[4*4*3 + 4*2 + 1] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+			// Forth column: notC3 D5 D6
+			xpart_times_4 = 3*(colorH-colorO);
+			error = error + square_table[(block[4*4*2 + 4*3 + 1] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+			error = error + square_table[(block[4*4*3 + 4*3 + 1] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		}
+	}
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the blue component for O and V.
+// Uses perceptual error metric.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcBBBbluePerceptual(uint8 *block, int colorO, int colorV)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error = 0;
+	
+	// Now first column: B B B 
+	/* unroll loop for( yy=0; (yy<4) && (error <= best_error_sofar); yy++)*/
+	{
+		error = error + square_table_percep_blue[(block[4*4 + 2] - clamp_table[ ((((colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table_percep_blue[(block[4*4*2 + 2] - clamp_table[ (((((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table_percep_blue[(block[4*4*3 + 2] - clamp_table[ (((3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the blue component for O and V.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcBBBblue(uint8 *block, int colorO, int colorV)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error = 0;
+	
+	// Now first column: B B B 
+	/* unroll loop for( yy=0; (yy<4) && (error <= best_error_sofar); yy++)*/
+	{
+		error = error + square_table[(block[4*4 + 2] - clamp_table[ ((((colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*2 + 2] - clamp_table[ (((((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*3 + 2] - clamp_table[ (((3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the blue component for H and V.
+// Uses perceptual error metric.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcCCCbluePerceptual(uint8 *block, int colorH, int colorV)
+{
+	colorH = (colorH << 2) | (colorH >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error=0;
+
+	error = error + square_table_percep_blue[(block[4*4*3 + 4 + 2] - clamp_table[ (((colorH + 3*colorV)+2)>>2) + 255])+255];
+	error = error + square_table_percep_blue[(block[4*4*2 + 4*2 + 2] - clamp_table[ (((2*colorH + 2*colorV)+2)>>2) + 255])+255];
+	error = error + square_table_percep_blue[(block[4*4 + 4*3 + 2] - clamp_table[ (((3*colorH + colorV)+2)>>2) + 255])+255];
+	
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the blue component for O and V.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcCCCblue(uint8 *block, int colorH, int colorV)
+{
+	colorH = (colorH << 2) | (colorH >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error=0;
+
+	error = error + square_table[(block[4*4*3 + 4 + 2] - clamp_table[ (((colorH + 3*colorV)+2)>>2) + 255])+255];
+	error = error + square_table[(block[4*4*2 + 4*2 + 2] - clamp_table[ (((2*colorH + 2*colorV)+2)>>2) + 255])+255];
+	error = error + square_table[(block[4*4 + 4*3 + 2] - clamp_table[ (((3*colorH + colorV)+2)>>2) + 255])+255];
+	
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the blue component for O and H.
+// Uses perceptual error metric.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcLowestPossibleBlueOHperceptual(uint8 *block, int colorO, int colorH, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorH = (colorH << 2) | (colorH >> 4);
+
+	unsigned int error;
+
+	error = square_table_percep_blue[(block[2] - colorO) + 255];
+	error = error + square_table_percep_blue[(block[4+2] - clamp_table[ (((   (colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	if(error <= best_error_sofar)
+	{
+		error = error + square_table_percep_blue[(block[4*2+2] - clamp_table[ (((  ((colorH-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table_percep_blue[(block[4*3+2] - clamp_table[ ((( 3*(colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+	
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the blue component for O and H.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcLowestPossibleBlueOH(uint8 *block, int colorO, int colorH, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorH = (colorH << 2) | (colorH >> 4);
+
+	unsigned int error;
+
+	error = square_table[(block[2] - colorO) + 255];
+	error = error + square_table[(block[4+2] - clamp_table[ (((   (colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	if(error <= best_error_sofar)
+	{
+		error = error + square_table[(block[4*2+2] - clamp_table[ (((  ((colorH-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*3+2] - clamp_table[ ((( 3*(colorH-colorO) + 4*colorO)+2)>>2) + 255])+255];
+	}
+	
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the blue component for O, V and H.
+// Uses perceptual error metric.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcErrorPlanarOnlyBluePerceptual(uint8 *block, int colorO, int colorH, int colorV, unsigned int lowest_possible_error, unsigned int BBBvalue, unsigned int CCCvalue, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorH = (colorH << 2) | (colorH >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error;
+
+	// The block can be partitioned into: O A  A  A
+	//                                    B D1 D3 C3
+	//                                    B D2 C2 D5
+	//                                    B C1 D4 D6
+	int xpart_times_4;
+
+	// The first part: O A A A. It equals lowest_possible_error previously calculated. 
+	// lowest_possible_error is OAAA, BBBvalue is BBB and CCCvalue is C1C2C3.
+	error = lowest_possible_error + BBBvalue + CCCvalue;
+
+	// The remaining pixels to cover are D1 through D6.
+	if(error <= best_error_sofar)
+	{
+		// Second column: D1 D2  but not C1
+		xpart_times_4 = (colorH-colorO);
+		error = error + square_table_percep_blue[(block[4*4 + 4 + 2] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table_percep_blue[(block[4*4*2 + 4 + 2] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		// Third column: D3 notC2 D4
+		xpart_times_4 = (colorH-colorO) << 1;
+		error = error + square_table_percep_blue[(block[4*4 + 4*2 + 2] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		if(error <= best_error_sofar)
+		{
+			error = error + square_table_percep_blue[(block[4*4*3 + 4*2 + 2] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+			// Forth column: notC3 D5 D6
+			xpart_times_4 = 3*(colorH-colorO);
+			error = error + square_table_percep_blue[(block[4*4*2 + 4*3 + 2] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+			error = error + square_table_percep_blue[(block[4*4*3 + 4*3 + 2] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		}
+	}
+	
+	return error;
+}
+
+// Calculating the minimum error for the block (in planar mode) if we know the blue component for O, V and H.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calcErrorPlanarOnlyBlue(uint8 *block, int colorO, int colorH, int colorV, unsigned int lowest_possible_error, unsigned int BBBvalue, unsigned int CCCvalue, unsigned int best_error_sofar)
+{
+	colorO = (colorO << 2) | (colorO >> 4);
+	colorH = (colorH << 2) | (colorH >> 4);
+	colorV = (colorV << 2) | (colorV >> 4);
+
+	unsigned int error;
+
+	// The block can be partitioned into: O A  A  A
+	//                                    B D1 D3 C3
+	//                                    B D2 C2 D5
+	//                                    B C1 D4 D6
+	int xpart_times_4;
+
+	// The first part: O A A A. It equals lowest_possible_error previously calculated. 
+	// lowest_possible_error is OAAA, BBBvalue is BBB and CCCvalue is C1C2C3.
+	error = lowest_possible_error + BBBvalue + CCCvalue;
+
+	// The remaining pixels to cover are D1 through D6.
+	if(error <= best_error_sofar)
+	{
+		// Second column: D1 D2  but not C1
+		xpart_times_4 = (colorH-colorO);
+		error = error + square_table[(block[4*4 + 4 + 2] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		error = error + square_table[(block[4*4*2 + 4 + 2] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+		// Third column: D3 notC2 D4
+		xpart_times_4 = (colorH-colorO) << 1;
+		error = error + square_table[(block[4*4 + 4*2 + 2] - clamp_table[ (((xpart_times_4 + (colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		if(error <= best_error_sofar)
+		{
+			error = error + square_table[(block[4*4*3 + 4*2 + 2] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+			// Forth column: notC3 D5 D6
+			xpart_times_4 = 3*(colorH-colorO);
+			error = error + square_table[(block[4*4*2 + 4*3 + 2] - clamp_table[ (((xpart_times_4 + ((colorV-colorO)<<1) + 4*colorO)+2)>>2) + 255])+255];
+			error = error + square_table[(block[4*4*3 + 4*3 + 2] - clamp_table[ (((xpart_times_4 + 3*(colorV-colorO) + 4*colorO)+2)>>2) + 255])+255];
+		}
+	}
+
+	return error;
+}
+
+
+
+// This function uses least squares in order to determine the best values of the plane. 
+// This is close to optimal, but not quite, due to nonlinearities in the expantion from 6 and 7 bits to 8, and
+// in the clamping to a number between 0 and the maximum. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockPlanar57(uint8 *img, int width,int height,int startx,int starty, unsigned int &compressed57_1, unsigned int &compressed57_2)
+{
+	// Use least squares to find the solution with the smallest error.
+	// That is, find the vector x so that |Ax-b|^2 is minimized, where
+	// x = [Ro Rr Rv]';
+	// A = [1 3/4 2/4 1/4 3/4 2/4 1/4  0  2/4 1/4  0  -1/4  1/4  0  -1/4 -2/4 ; 
+    //      0 1/4 2/4 3/4  0  1/4 2/4 3/4  0  1/4 2/4  3/4   0  1/4  2/4  3/4 ;
+	//      0  0   0   0  1/4 1/4 1/4 1/4 2/4 2/4 2/4  2/4; 3/4 3/4  3/4  3/4]';
+	// b = [r11 r12 r13 r14 r21 r22 r23 r24 r31 r32 r33 r34 r41 r42 r43 r44];
+	//
+	// That is, find solution x = inv(A' * A) * A' * b
+	//                          = C * A' * b;
+	// C is always the same, so we have calculated it off-line here.
+	//                          = C * D
+	int xx,yy, cc;
+	double coeffsA[48]= { 1.00, 0.00, 0.00, 
+		                  0.75, 0.25, 0.00,
+						  0.50, 0.50, 0.00, 
+						  0.25, 0.75, 0.00, 
+		                  0.75, 0.00, 0.25, 
+		                  0.50, 0.25, 0.25,
+						  0.25, 0.50, 0.25, 
+						  0.00, 0.75, 0.25,
+						  0.50, 0.00, 0.50, 
+		                  0.25, 0.25, 0.50,
+						  0.00, 0.50, 0.50, 
+						 -0.25, 0.75, 0.50, 
+						  0.25, 0.00, 0.75, 
+		                  0.00, 0.25, 0.75,
+						 -0.25, 0.50, 0.75, 
+						 -0.50, 0.75, 0.75};
+
+	double coeffsC[9] = {0.2875, -0.0125, -0.0125, -0.0125, 0.4875, -0.3125, -0.0125, -0.3125, 0.4875};
+	double colorO[3], colorH[3], colorV[3];
+	uint8 colorO8[3], colorH8[3], colorV8[3];
+	
+	dMatrix *D_matrix;
+	dMatrix *x_vector;
+
+	dMatrix A_matrix; A_matrix.width = 3; A_matrix.height = 16; 
+	A_matrix.data = coeffsA;
+	dMatrix C_matrix; C_matrix.width = 3; C_matrix.height = 3; 
+	C_matrix.data = coeffsC;
+	dMatrix b_vector; b_vector.width = 1; b_vector.height = 16; 
+    b_vector.data = (double*) malloc(sizeof(double)*b_vector.width*b_vector.height);
+	transposeMatrix(&A_matrix);
+
+	// Red component
+
+	// Load color data into vector b:
+	for(cc = 0, yy = 0; yy<4; yy++)
+	   for(xx = 0; xx<4; xx++)
+		   b_vector.data[cc++] = img[3*width*(starty+yy) + 3*(startx+xx) + 0];
+
+	D_matrix = multiplyMatrices(&A_matrix, &b_vector);
+	x_vector = multiplyMatrices(&C_matrix, D_matrix);
+
+	colorO[0] = CLAMP(0.0, x_vector->data[0], 255.0);
+	colorH[0] = CLAMP(0.0, x_vector->data[1], 255.0);
+	colorV[0] = CLAMP(0.0, x_vector->data[2], 255.0);
+
+	free(D_matrix->data); free(D_matrix);
+	free(x_vector->data); free(x_vector);
+
+	// Green component
+
+	// Load color data into vector b:
+	for(cc = 0, yy = 0; yy<4; yy++)
+	   for(xx = 0; xx<4; xx++)
+		   b_vector.data[cc++] = img[3*width*(starty+yy) + 3*(startx+xx) + 1];
+
+	D_matrix = multiplyMatrices(&A_matrix, &b_vector);
+	x_vector = multiplyMatrices(&C_matrix, D_matrix);
+
+	colorO[1] = CLAMP(0.0, x_vector->data[0], 255.0);
+	colorH[1] = CLAMP(0.0, x_vector->data[1], 255.0);
+	colorV[1] = CLAMP(0.0, x_vector->data[2], 255.0);
+
+	free(D_matrix->data); free(D_matrix);
+	free(x_vector->data); free(x_vector);
+
+	// Blue component
+
+	// Load color data into vector b:
+	for(cc = 0, yy = 0; yy<4; yy++)
+	   for(xx = 0; xx<4; xx++)
+		   b_vector.data[cc++] = img[3*width*(starty+yy) + 3*(startx+xx) + 2];
+
+	D_matrix = multiplyMatrices(&A_matrix, &b_vector);
+	x_vector = multiplyMatrices(&C_matrix, D_matrix);
+
+	colorO[2] = CLAMP(0.0, x_vector->data[0], 255.0);
+	colorH[2] = CLAMP(0.0, x_vector->data[1], 255.0);
+	colorV[2] = CLAMP(0.0, x_vector->data[2], 255.0);
+
+	free(D_matrix->data); free(D_matrix);
+	free(x_vector->data); free(x_vector);
+
+	// Quantize to 6 bits
+	double D = 255*(1.0/((1<<6)-1.0) );
+	colorO8[0] = JAS_ROUND((1.0*colorO[0])/D);
+	colorO8[2] = JAS_ROUND((1.0*colorO[2])/D);
+	colorH8[0] = JAS_ROUND((1.0*colorH[0])/D);
+	colorH8[2] = JAS_ROUND((1.0*colorH[2])/D);
+	colorV8[0] = JAS_ROUND((1.0*colorV[0])/D);
+	colorV8[2] = JAS_ROUND((1.0*colorV[2])/D);
+
+	// Quantize to 7 bits
+	D = 255*(1.0/((1<<7)-1.0) );
+	colorO8[1] = JAS_ROUND((1.0*colorO[1])/D);
+	colorH8[1] = JAS_ROUND((1.0*colorH[1])/D);
+	colorV8[1] = JAS_ROUND((1.0*colorV[1])/D);
+
+	// Pack bits in 57 bits
+
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      ------------------------------------------------------------------------------------------------
+	//     | R0              | G0                 | B0              | RH              | GH                  |
+	//      ------------------------------------------------------------------------------------------------
+	//
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+	//      ------------------------------------------------------------------------------------------------
+	//     | BH              | RV              |  GV                | BV               | not used           |   
+	//      ------------------------------------------------------------------------------------------------
+
+	compressed57_1 = 0;
+	compressed57_2 = 0;
+	PUTBITSHIGH( compressed57_1, colorO8[0], 6, 63);
+	PUTBITSHIGH( compressed57_1, colorO8[1], 7, 57);
+	PUTBITSHIGH( compressed57_1, colorO8[2], 6, 50);
+	PUTBITSHIGH( compressed57_1, colorH8[0], 6, 44);
+	PUTBITSHIGH( compressed57_1, colorH8[1], 7, 38);
+	PUTBITS(     compressed57_2, colorH8[2], 6, 31);
+	PUTBITS(     compressed57_2, colorV8[0], 6, 25);
+	PUTBITS(     compressed57_2, colorV8[1], 7, 19);
+	PUTBITS(     compressed57_2, colorV8[2], 6, 12);
+}
+
+// During search it is not convenient to store the bits the way they are stored in the 
+// file format. Hence, after search, it is converted to this format.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void stuff57bits(unsigned int planar57_word1, unsigned int planar57_word2, unsigned int &planar_word1, unsigned int &planar_word2)
+{
+	// Put bits in twotimer configuration for 57 bits (red and green dont overflow, green does)
+	// 
+	// Go from this bit layout:
+	//
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     |R0               |G01G02              |B01B02  ;B03     |RH1           |RH2|GH                 |
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+	//      -----------------------------------------------------------------------------------------------
+	//     |BH               |RV               |GV                  |BV                | not used          |   
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//  To this:
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      ------------------------------------------------------------------------------------------------
+	//     |//|R0               |G01|/|G02              |B01|/ // //|B02  |//|B03     |RH1           |df|RH2|
+	//      ------------------------------------------------------------------------------------------------
+	//
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+	//      -----------------------------------------------------------------------------------------------
+	//     |GH                  |BH               |RV               |GV                   |BV              |
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+
+	uint8 RO, GO1, GO2, BO1, BO2, BO3, RH1, RH2, GH, BH, RV, GV, BV;
+	uint8 bit, a, b, c, d, bits;
+
+	RO = GETBITSHIGH( planar57_word1, 6, 63);
+	GO1= GETBITSHIGH( planar57_word1, 1, 57);
+	GO2= GETBITSHIGH( planar57_word1, 6, 56);
+	BO1= GETBITSHIGH( planar57_word1, 1, 50);
+	BO2= GETBITSHIGH( planar57_word1, 2, 49);
+	BO3= GETBITSHIGH( planar57_word1, 3, 47);
+	RH1= GETBITSHIGH( planar57_word1, 5, 44);
+	RH2= GETBITSHIGH( planar57_word1, 1, 39);
+	GH = GETBITSHIGH( planar57_word1, 7, 38);
+	BH = GETBITS(     planar57_word2, 6, 31);
+	RV = GETBITS(     planar57_word2, 6, 25);
+	GV = GETBITS(     planar57_word2, 7, 19);
+	BV = GETBITS(     planar57_word2, 6, 12);
+
+	planar_word1 = 0; planar_word2 = 0;
+	PUTBITSHIGH( planar_word1, RO,  6, 62);
+	PUTBITSHIGH( planar_word1, GO1, 1, 56);
+	PUTBITSHIGH( planar_word1, GO2, 6, 54);
+	PUTBITSHIGH( planar_word1, BO1, 1, 48);
+	PUTBITSHIGH( planar_word1, BO2, 2, 44);
+	PUTBITSHIGH( planar_word1, BO3, 3, 41);
+	PUTBITSHIGH( planar_word1, RH1, 5, 38);
+	PUTBITSHIGH( planar_word1, RH2, 1, 32);
+	PUTBITS(     planar_word2, GH,  7, 31);
+	PUTBITS(     planar_word2, BH,  6, 24);
+	PUTBITS(     planar_word2, RV,  6, 18);
+	PUTBITS(     planar_word2, GV,  7, 12);
+	PUTBITS(     planar_word2, BV,  6,  5);
+
+	// Make sure that red does not overflow:
+	bit = GETBITSHIGH( planar_word1, 1, 62);
+	PUTBITSHIGH( planar_word1, !bit,  1, 63);
+
+	// Make sure that green does not overflow:
+	bit = GETBITSHIGH( planar_word1, 1, 54);
+	PUTBITSHIGH( planar_word1, !bit,  1, 55);
+
+	// Make sure that blue overflows:
+	a = GETBITSHIGH( planar_word1, 1, 44);
+	b = GETBITSHIGH( planar_word1, 1, 43);
+	c = GETBITSHIGH( planar_word1, 1, 41);
+	d = GETBITSHIGH( planar_word1, 1, 40);
+	// The following bit abcd bit sequences should be padded with ones: 0111, 1010, 1011, 1101, 1110, 1111
+	// The following logical expression checks for the presence of any of those:
+	bit = (a & c) | (!a & b & c & d) | (a & b & !c & d);
+	bits = 0xf*bit;
+	PUTBITSHIGH( planar_word1, bits,  3, 47);
+	PUTBITSHIGH( planar_word1, !bit,  1, 42);
+
+	// Set diffbit
+	PUTBITSHIGH( planar_word1, 1,  1, 33);
+}
+
+// During search it is not convenient to store the bits the way they are stored in the 
+// file format. Hence, after search, it is converted to this format.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void stuff58bits(unsigned int thumbH58_word1, unsigned int thumbH58_word2, unsigned int &thumbH_word1, unsigned int &thumbH_word2)
+{
+	// Put bits in twotimer configuration for 58 (red doesn't overflow, green does)
+	// 
+	// Go from this bit layout:
+	//
+	//
+	//     |63 62 61 60 59 58|57 56 55 54|53 52 51 50|49 48 47 46|45 44 43 42|41 40 39 38|37 36 35 34|33 32|
+	//     |-------empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|d2 d1|
+	//
+	//     |31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+	//     |---------------------------------------index bits----------------------------------------------|
+	//
+	//  To this:
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     |//|R0         |G0      |// // //|G0|B0|//|B0b     |R1         |G1         |B0         |d2|df|d1|
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//     |31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+	//     |---------------------------------------index bits----------------------------------------------|
+	//
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |df|fp|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bt|bt|
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//
+	// Thus, what we are really doing is going from this bit layout:
+	//
+	//
+	//     |63 62 61 60 59 58|57 56 55 54 53 52 51|50 49|48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33|32   |
+	//     |-------empty-----|part0---------------|part1|part2------------------------------------------|part3|
+	//
+	//  To this:
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      --------------------------------------------------------------------------------------------------|
+	//     |//|part0               |// // //|part1|//|part2                                          |df|part3|
+	//      --------------------------------------------------------------------------------------------------|
+
+	unsigned int part0, part1, part2, part3;
+	uint8 bit, a, b, c, d, bits;
+
+	// move parts
+	part0 = GETBITSHIGH( thumbH58_word1, 7, 57);
+	part1 = GETBITSHIGH( thumbH58_word1, 2, 50);
+	part2 = GETBITSHIGH( thumbH58_word1,16, 48);
+	part3 = GETBITSHIGH( thumbH58_word1, 1, 32);
+	thumbH_word1 = 0;
+	PUTBITSHIGH( thumbH_word1, part0,  7, 62);
+	PUTBITSHIGH( thumbH_word1, part1,  2, 52);
+	PUTBITSHIGH( thumbH_word1, part2, 16, 49);
+	PUTBITSHIGH( thumbH_word1, part3,  1, 32);
+
+	// Make sure that red does not overflow:
+	bit = GETBITSHIGH( thumbH_word1, 1, 62);
+	PUTBITSHIGH( thumbH_word1, !bit,  1, 63);
+
+	// Make sure that green overflows:
+	a = GETBITSHIGH( thumbH_word1, 1, 52);
+	b = GETBITSHIGH( thumbH_word1, 1, 51);
+	c = GETBITSHIGH( thumbH_word1, 1, 49);
+	d = GETBITSHIGH( thumbH_word1, 1, 48);
+	// The following bit abcd bit sequences should be padded with ones: 0111, 1010, 1011, 1101, 1110, 1111
+	// The following logical expression checks for the presence of any of those:
+	bit = (a & c) | (!a & b & c & d) | (a & b & !c & d);
+	bits = 0xf*bit;
+	PUTBITSHIGH( thumbH_word1, bits,  3, 55);
+	PUTBITSHIGH( thumbH_word1, !bit,  1, 50);
+
+	// Set diffbit
+	PUTBITSHIGH( thumbH_word1, 1,  1, 33);
+	thumbH_word2 = thumbH58_word2;
+
+}
+
+// copy of above, but diffbit is 0
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void stuff58bitsDiffFalse(unsigned int thumbH58_word1, unsigned int thumbH58_word2, unsigned int &thumbH_word1, unsigned int &thumbH_word2)
+{
+	unsigned int part0, part1, part2, part3;
+	uint8 bit, a, b, c, d, bits;
+
+	// move parts
+	part0 = GETBITSHIGH( thumbH58_word1, 7, 57);
+	part1 = GETBITSHIGH( thumbH58_word1, 2, 50);
+	part2 = GETBITSHIGH( thumbH58_word1,16, 48);
+	part3 = GETBITSHIGH( thumbH58_word1, 1, 32);
+	thumbH_word1 = 0;
+	PUTBITSHIGH( thumbH_word1, part0,  7, 62);
+	PUTBITSHIGH( thumbH_word1, part1,  2, 52);
+	PUTBITSHIGH( thumbH_word1, part2, 16, 49);
+	PUTBITSHIGH( thumbH_word1, part3,  1, 32);
+
+	// Make sure that red does not overflow:
+	bit = GETBITSHIGH( thumbH_word1, 1, 62);
+	PUTBITSHIGH( thumbH_word1, !bit,  1, 63);
+
+	// Make sure that green overflows:
+	a = GETBITSHIGH( thumbH_word1, 1, 52);
+	b = GETBITSHIGH( thumbH_word1, 1, 51);
+	c = GETBITSHIGH( thumbH_word1, 1, 49);
+	d = GETBITSHIGH( thumbH_word1, 1, 48);
+	// The following bit abcd bit sequences should be padded with ones: 0111, 1010, 1011, 1101, 1110, 1111
+	// The following logical expression checks for the presence of any of those:
+	bit = (a & c) | (!a & b & c & d) | (a & b & !c & d);
+	bits = 0xf*bit;
+	PUTBITSHIGH( thumbH_word1, bits,  3, 55);
+	PUTBITSHIGH( thumbH_word1, !bit,  1, 50);
+
+	// Set diffbit
+	PUTBITSHIGH( thumbH_word1, 0,  1, 33);
+	thumbH_word2 = thumbH58_word2;
+
+}
+
+// During search it is not convenient to store the bits the way they are stored in the 
+// file format. Hence, after search, it is converted to this format.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void stuff59bits(unsigned int thumbT59_word1, unsigned int thumbT59_word2, unsigned int &thumbT_word1, unsigned int &thumbT_word2)
+{
+	// Put bits in twotimer configuration for 59 (red overflows)
+	// 
+	// Go from this bit layout:
+	//
+	//     |63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+	//     |----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+	//
+	//     |31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+	//     |----------------------------------------index bits---------------------------------------------|
+	//
+	//
+	//  To this:
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     |// // //|R0a  |//|R0b  |G0         |B0         |R1         |G1         |B1          |da  |df|db|
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//     |31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+	//     |----------------------------------------index bits---------------------------------------------|
+	//
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |df|fp|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bt|bt|
+	//      ------------------------------------------------------------------------------------------------
+
+	uint8 R0a;
+	uint8 bit, a, b, c, d, bits;
+
+	R0a = GETBITSHIGH( thumbT59_word1, 2, 58);
+
+	// Fix middle part
+	thumbT_word1 = thumbT59_word1 << 1;
+	// Fix R0a (top two bits of R0)
+	PUTBITSHIGH( thumbT_word1, R0a,  2, 60);
+	// Fix db (lowest bit of d)
+	PUTBITSHIGH( thumbT_word1, thumbT59_word1,  1, 32);
+	// 
+	// Make sure that red overflows:
+	a = GETBITSHIGH( thumbT_word1, 1, 60);
+	b = GETBITSHIGH( thumbT_word1, 1, 59);
+	c = GETBITSHIGH( thumbT_word1, 1, 57);
+	d = GETBITSHIGH( thumbT_word1, 1, 56);
+	// The following bit abcd bit sequences should be padded with ones: 0111, 1010, 1011, 1101, 1110, 1111
+	// The following logical expression checks for the presence of any of those:
+	bit = (a & c) | (!a & b & c & d) | (a & b & !c & d);
+	bits = 0xf*bit;
+	PUTBITSHIGH( thumbT_word1, bits,  3, 63);
+	PUTBITSHIGH( thumbT_word1, !bit,  1, 58);
+
+	// Set diffbit
+	PUTBITSHIGH( thumbT_word1, 1,  1, 33);
+	thumbT_word2 = thumbT59_word2;
+}
+
+
+// Decompress the planar mode and calculate the error per component compared to original image.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void decompressBlockPlanar57errorPerComponent(unsigned int compressed57_1, unsigned int compressed57_2, uint8 *img,int width,int height,int startx,int starty, uint8 *srcimg, unsigned int &error_red, unsigned int &error_green, unsigned int &error_blue)
+{
+	uint8 colorO[3], colorH[3], colorV[3];
+
+	colorO[0] = GETBITSHIGH( compressed57_1, 6, 63);
+	colorO[1] = GETBITSHIGH( compressed57_1, 7, 57);
+	colorO[2] = GETBITSHIGH( compressed57_1, 6, 50);
+	colorH[0] = GETBITSHIGH( compressed57_1, 6, 44);
+	colorH[1] = GETBITSHIGH( compressed57_1, 7, 38);
+	colorH[2] = GETBITS(     compressed57_2, 6, 31);
+	colorV[0] = GETBITS(     compressed57_2, 6, 25);
+	colorV[1] = GETBITS(     compressed57_2, 7, 19);
+	colorV[2] = GETBITS(     compressed57_2, 6, 12);
+
+	colorO[0] = (colorO[0] << 2) | (colorO[0] >> 4);
+	colorO[1] = (colorO[1] << 1) | (colorO[1] >> 6);
+	colorO[2] = (colorO[2] << 2) | (colorO[2] >> 4);
+
+	colorH[0] = (colorH[0] << 2) | (colorH[0] >> 4);
+	colorH[1] = (colorH[1] << 1) | (colorH[1] >> 6);
+	colorH[2] = (colorH[2] << 2) | (colorH[2] >> 4);
+
+	colorV[0] = (colorV[0] << 2) | (colorV[0] >> 4);
+	colorV[1] = (colorV[1] << 1) | (colorV[1] >> 6);
+	colorV[2] = (colorV[2] << 2) | (colorV[2] >> 4);
+	
+	int xx, yy;
+
+	for( xx=0; xx<4; xx++)
+	{
+		for( yy=0; yy<4; yy++)
+		{
+			img[3*width*(starty+yy) + 3*(startx+xx) + 0] = (int)CLAMP(0, JAS_ROUND((xx*(colorH[0]-colorO[0])/4.0 + yy*(colorV[0]-colorO[0])/4.0 + colorO[0])), 255);
+			img[3*width*(starty+yy) + 3*(startx+xx) + 1] = (int)CLAMP(0, JAS_ROUND((xx*(colorH[1]-colorO[1])/4.0 + yy*(colorV[1]-colorO[1])/4.0 + colorO[1])), 255);
+			img[3*width*(starty+yy) + 3*(startx+xx) + 2] = (int)CLAMP(0, JAS_ROUND((xx*(colorH[2]-colorO[2])/4.0 + yy*(colorV[2]-colorO[2])/4.0 + colorO[2])), 255);
+		}
+	}
+
+	error_red = 0;
+	error_green= 0;
+	error_blue = 0;
+	for( xx=0; xx<4; xx++)
+	{
+		for( yy=0; yy<4; yy++)
+		{
+			error_red = error_red + SQUARE(srcimg[3*width*(starty+yy) + 3*(startx+xx) + 0] - img[3*width*(starty+yy) + 3*(startx+xx) + 0]); 
+			error_green = error_green + SQUARE(srcimg[3*width*(starty+yy) + 3*(startx+xx) + 1] - img[3*width*(starty+yy) + 3*(startx+xx) + 1]);
+			error_blue = error_blue + SQUARE(srcimg[3*width*(starty+yy) + 3*(startx+xx) + 2] - img[3*width*(starty+yy) + 3*(startx+xx) + 2]);
+
+		}
+	}
+}
+
+// Compress using both individual and differential mode in ETC1/ETC2 using combined color 
+// quantization. Both flip modes are tried. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockDiffFlipCombined(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3], diff[3];
+	int min_error=255*255*8*3;
+	unsigned int best_table_indices1=0, best_table_indices2=0;
+	unsigned int best_table1=0, best_table2=0;
+	int diffbit;
+
+	int norm_err=0;
+	int flip_err=0;
+
+	// First try normal blocks 2x4:
+
+	computeAverageColor2x4noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor2x4noQuantFloat(img,width,height,startx+2,starty,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	float eps;
+
+	uint8 dummy[3];
+
+	quantize555ColorCombined(avg_color_float1, enc_color1, dummy);
+	quantize555ColorCombined(avg_color_float2, enc_color2, dummy);
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( (diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3) )
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		//     ETC1_RGB8_OES:
+		// 
+		//     a) bit layout in bits 63 through 32 if diffbit = 0
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		//     
+		//     b) bit layout in bits 63 through 32 if diffbit = 1
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+		//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		// 
+		//     c) bit layout in bits 31 through 0 (in both cases)
+		// 
+		//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+		//      --------------------------------------------------------------------------------------------------
+		//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+		//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+		//      --------------------------------------------------------------------------------------------------      
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_norm, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_norm, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_norm, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		norm_err = 0;
+
+		// left part of block
+		norm_err = tryalltables_3bittable2x4(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+
+	}
+	else
+	{
+		diffbit = 0;
+		// The difference is bigger than what fits in 555 plus delta-333, so we will have
+		// to deal with 444 444.
+
+		eps = (float) 0.0001;
+
+		uint8 dummy[3];
+		quantize444ColorCombined(avg_color_float1, enc_color1, dummy);
+		quantize444ColorCombined(avg_color_float2, enc_color2, dummy);
+
+		avg_color_quant1[0] = enc_color1[0] << 4 | enc_color1[0]; 
+		avg_color_quant1[1] = enc_color1[1] << 4 | enc_color1[1]; 
+		avg_color_quant1[2] = enc_color1[2] << 4 | enc_color1[2];
+		avg_color_quant2[0] = enc_color2[0] << 4 | enc_color2[0]; 
+		avg_color_quant2[1] = enc_color2[1] << 4 | enc_color2[1]; 
+		avg_color_quant2[2] = enc_color2[2] << 4 | enc_color2[2];
+	
+
+		// Pack bits into the first word. 
+
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 4, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 4, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 4, 47);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[0], 4, 59);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[1], 4, 51);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[2], 4, 43);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// left part of block
+		norm_err = tryalltables_3bittable2x4(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+
+	// Now try flipped blocks 4x2:
+
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty+2,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	quantize555ColorCombined(avg_color_float1, enc_color1, dummy);
+	quantize555ColorCombined(avg_color_float2, enc_color2, dummy);
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( (diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3) )
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_flip, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_flip, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_flip, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+	else
+	{
+		diffbit = 0;
+		// The difference is bigger than what fits in 555 plus delta-333, so we will have
+		// to deal with 444 444.
+		eps = (float) 0.0001;
+
+		uint8 dummy[3];
+		quantize444ColorCombined(avg_color_float1, enc_color1, dummy);
+		quantize444ColorCombined(avg_color_float2, enc_color2, dummy);
+
+		avg_color_quant1[0] = enc_color1[0] << 4 | enc_color1[0]; 
+		avg_color_quant1[1] = enc_color1[1] << 4 | enc_color1[1]; 
+		avg_color_quant1[2] = enc_color1[2] << 4 | enc_color1[2];
+		avg_color_quant2[0] = enc_color2[0] << 4 | enc_color2[0]; 
+		avg_color_quant2[1] = enc_color2[1] << 4 | enc_color2[1]; 
+		avg_color_quant2[2] = enc_color2[2] << 4 | enc_color2[2];
+
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+
+
+		// Pack bits into the first word. 
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 4, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 4, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 4, 47);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[0], 4, 59);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[1], 4, 51);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[2], 4, 43);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+
+	// Now lets see which is the best table to use. Only 8 tables are possible. 
+
+	if(norm_err <= flip_err)
+	{
+		compressed1 = compressed1_norm | 0;
+		compressed2 = compressed2_norm;
+	}
+	else
+	{
+		compressed1 = compressed1_flip | 1;
+		compressed2 = compressed2_flip;
+	}
+}
+
+// Calculation of the two block colors using the LBG-algorithm
+// The following method scales down the intensity, since this can be compensated for anyway by both the H and T mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void computeColorLBGHalfIntensityFast(uint8 *img,int width,int startx,int starty, uint8 (LBG_colors)[2][3]) 
+{
+	uint8 block_mask[4][4];
+
+	// reset rand so that we get predictable output per block
+	srand(10000);
+	//LBG-algorithm
+	double D = 0, oldD, bestD = MAXIMUM_ERROR, eps = 0.0000000001;
+	double error_a, error_b;
+	int number_of_iterations = 10;
+	double t_color[2][3];
+	double original_colors[4][4][3];
+	double current_colors[2][3];
+	double best_colors[2][3];
+	double max_v[3];
+	double min_v[3];
+	int x,y,i;
+	double red, green, blue;
+	bool continue_seeding;
+	int maximum_number_of_seedings = 10;
+	int seeding;
+	bool continue_iterate;
+
+	max_v[R] = -512.0;   max_v[G] = -512.0;   max_v[B] = -512.0; 
+	min_v[R] =  512.0;   min_v[G] =  512.0;   min_v[B] =  512.0;
+
+	// resolve trainingdata
+	for (y = 0; y < BLOCKHEIGHT; ++y) 
+	{
+		for (x = 0; x < BLOCKWIDTH; ++x) 
+		{
+			red = img[3*((starty+y)*width+startx+x)+R];
+			green = img[3*((starty+y)*width+startx+x)+G];
+			blue = img[3*((starty+y)*width+startx+x)+B];
+
+			// Use qrs representation instead of rgb
+			// qrs = Q * rgb where Q = [a a a ; b -b 0 ; c c -2c]; a = 1/sqrt(3), b= 1/sqrt(2), c = 1/sqrt(6);
+			// rgb = inv(Q)*qrs  = Q' * qrs where ' denotes transpose.
+			// The q variable holds intensity. r and s hold chrominance.
+			// q = [0, sqrt(3)*255], r = [-255/sqrt(2), 255/sqrt(2)], s = [-2*255/sqrt(6), 2*255/sqrt(6)];
+			//
+			// The LGB algorithm will only act on the r and s variables and not on q.
+			// 
+			original_colors[x][y][R] = (1.0/sqrt(1.0*3))*red + (1.0/sqrt(1.0*3))*green + (1.0/sqrt(1.0*3))*blue;
+			original_colors[x][y][G] = (1.0/sqrt(1.0*2))*red - (1.0/sqrt(1.0*2))*green;
+			original_colors[x][y][B] = (1.0/sqrt(1.0*6))*red + (1.0/sqrt(1.0*6))*green - (2.0/sqrt(1.0*6))*blue;
+		
+			// find max
+			if (original_colors[x][y][R] > max_v[R]) max_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] > max_v[G]) max_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] > max_v[B]) max_v[B] = original_colors[x][y][B];
+			// find min
+			if (original_colors[x][y][R] < min_v[R]) min_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] < min_v[G]) min_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] < min_v[B]) min_v[B] = original_colors[x][y][B];
+		}
+	}
+
+	D = 512*512*3*16.0; 
+	bestD = 512*512*3*16.0; 
+
+	continue_seeding = true;
+
+	// loop seeds
+	for (seeding = 0; (seeding < maximum_number_of_seedings) && continue_seeding; seeding++)
+	{
+		// hopefully we will not need more seedings:
+		continue_seeding = false;
+
+		// calculate seeds
+		for (uint8 s = 0; s < 2; ++s) 
+		{
+			for (uint8 c = 0; c < 3; ++c) 
+			{ 
+				current_colors[s][c] = double((double(rand())/RAND_MAX)*(max_v[c]-min_v[c])) + min_v[c];
+			}
+		}
+		
+		// divide into two quantization sets and calculate distortion
+
+		continue_iterate = true;
+		for(i = 0; (i < number_of_iterations) && continue_iterate; i++)
+		{
+			oldD = D;
+			D = 0;
+			int n = 0;
+			for (y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (int x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					error_a = 0.5*SQUARE(original_colors[x][y][R] - current_colors[0][R]) + 
+							  SQUARE(original_colors[x][y][G] - current_colors[0][G]) +
+							  SQUARE(original_colors[x][y][B] - current_colors[0][B]);
+					error_b = 0.5*SQUARE(original_colors[x][y][R] - current_colors[1][R]) + 
+							  SQUARE(original_colors[x][y][G] - current_colors[1][G]) +
+							  SQUARE(original_colors[x][y][B] - current_colors[1][B]);
+					if (error_a < error_b) 
+					{
+						block_mask[x][y] = 0;
+						D += error_a; 
+						++n;
+					} 
+					else 
+					{
+						block_mask[x][y] = 1;
+						D += error_b;
+					}
+				}
+			}
+
+			// compare with old distortion
+			if (D == 0) 
+			{
+				// Perfect score -- we dont need to go further iterations.
+				continue_iterate = false;
+				continue_seeding = false;
+			}
+			if (D == oldD)
+			{
+				// Same score as last round -- no need to go for further iterations.
+				continue_iterate = false;
+				continue_seeding = false;
+			}
+			if (D < bestD) 
+			{
+				bestD = D;
+				for(uint8 s = 0; s < 2; ++s) 
+				{
+					for(uint8 c = 0; c < 3; ++c) 
+					{
+						best_colors[s][c] = current_colors[s][c];
+					}
+				}
+			}
+			if (n == 0 || n == BLOCKWIDTH*BLOCKHEIGHT) 
+			{
+				// All colors end up in the same voroni region. We need to reseed.
+				continue_iterate = false;
+				continue_seeding = true;
+			}
+			else
+			{
+				// Calculate new reconstruction points using the centroids
+
+				// Find new construction values from average
+				t_color[0][R] = 0;
+				t_color[0][G] = 0;
+				t_color[0][B] = 0;
+				t_color[1][R] = 0;
+				t_color[1][G] = 0;
+				t_color[1][B] = 0;
+
+				for (y = 0; y < BLOCKHEIGHT; ++y) 
+				{
+					for (int x = 0; x < BLOCKWIDTH; ++x) 
+					{
+						// use dummy value for q-parameter
+						t_color[block_mask[x][y]][R] += original_colors[x][y][R];
+						t_color[block_mask[x][y]][G] += original_colors[x][y][G];
+						t_color[block_mask[x][y]][B] += original_colors[x][y][B];
+					}
+				}
+				current_colors[0][R] = t_color[0][R] / n;
+				current_colors[1][R] = t_color[1][R] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][G] = t_color[0][G] / n;
+				current_colors[1][G] = t_color[1][G] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][B] = t_color[0][B] / n;
+				current_colors[1][B] = t_color[1][B] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+			}
+		}
+	}
+
+	for(x=0;x<2;x++)
+	{
+		double qq, rr, ss;
+
+		qq = best_colors[x][0];
+		rr = best_colors[x][1];
+		ss = best_colors[x][2];
+
+		current_colors[x][0] = CLAMP(0, (1.0/sqrt(1.0*3))*qq + (1.0/sqrt(1.0*2))*rr + (1.0/sqrt(1.0*6))*ss, 255);
+		current_colors[x][1] = CLAMP(0, (1.0/sqrt(1.0*3))*qq - (1.0/sqrt(1.0*2))*rr + (1.0/sqrt(1.0*6))*ss, 255);
+		current_colors[x][2] = CLAMP(0, (1.0/sqrt(1.0*3))*qq + (0.0        )*rr - (2.0/sqrt(1.0*6))*ss, 255);
+	}
+
+	for(x=0;x<2;x++)
+		for(y=0;y<3;y++)
+			LBG_colors[x][y] = JAS_ROUND(current_colors[x][y]);
+}
+
+// Calculation of the two block colors using the LBG-algorithm
+// The following method scales down the intensity, since this can be compensated for anyway by both the H and T mode.
+// Faster version
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void computeColorLBGNotIntensityFast(uint8 *img,int width,int startx,int starty, uint8 (LBG_colors)[2][3]) 
+{
+	uint8 block_mask[4][4];
+
+	// reset rand so that we get predictable output per block
+	srand(10000);
+	//LBG-algorithm
+	double D = 0, oldD, bestD = MAXIMUM_ERROR, eps = 0.0000000001;
+	double error_a, error_b;
+	int number_of_iterations = 10;
+	double t_color[2][3];
+	double original_colors[4][4][3];
+	double current_colors[2][3];
+	double best_colors[2][3];
+	double max_v[3];
+	double min_v[3];
+	int x,y,i;
+	double red, green, blue;
+	bool continue_seeding;
+	int maximum_number_of_seedings = 10;
+	int seeding;
+	bool continue_iterate;
+
+	max_v[R] = -512.0;   max_v[G] = -512.0;   max_v[B] = -512.0; 
+	min_v[R] =  512.0;   min_v[G] =  512.0;   min_v[B] =  512.0;
+
+	// resolve trainingdata
+	for (y = 0; y < BLOCKHEIGHT; ++y) 
+	{
+		for (x = 0; x < BLOCKWIDTH; ++x) 
+		{
+			red = img[3*((starty+y)*width+startx+x)+R];
+			green = img[3*((starty+y)*width+startx+x)+G];
+			blue = img[3*((starty+y)*width+startx+x)+B];
+
+			// Use qrs representation instead of rgb
+			// qrs = Q * rgb where Q = [a a a ; b -b 0 ; c c -2c]; a = 1/sqrt(1.0*3), b= 1/sqrt(1.0*2), c = 1/sqrt(1.0*6);
+			// rgb = inv(Q)*qrs  = Q' * qrs where ' denotes transpose.
+			// The q variable holds intensity. r and s hold chrominance.
+			// q = [0, sqrt(1.0*3)*255], r = [-255/sqrt(1.0*2), 255/sqrt(1.0*2)], s = [-2*255/sqrt(1.0*6), 2*255/sqrt(1.0*6)];
+			//
+			// The LGB algorithm will only act on the r and s variables and not on q.
+			// 
+			original_colors[x][y][R] = (1.0/sqrt(1.0*3))*red + (1.0/sqrt(1.0*3))*green + (1.0/sqrt(1.0*3))*blue;
+			original_colors[x][y][G] = (1.0/sqrt(1.0*2))*red - (1.0/sqrt(1.0*2))*green;
+			original_colors[x][y][B] = (1.0/sqrt(1.0*6))*red + (1.0/sqrt(1.0*6))*green - (2.0/sqrt(1.0*6))*blue;
+		
+			// find max
+			if (original_colors[x][y][R] > max_v[R]) max_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] > max_v[G]) max_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] > max_v[B]) max_v[B] = original_colors[x][y][B];
+			// find min
+			if (original_colors[x][y][R] < min_v[R]) min_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] < min_v[G]) min_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] < min_v[B]) min_v[B] = original_colors[x][y][B];
+		}
+	}
+
+	D = 512*512*3*16.0; 
+	bestD = 512*512*3*16.0; 
+
+	continue_seeding = true;
+
+	// loop seeds
+	for (seeding = 0; (seeding < maximum_number_of_seedings) && continue_seeding; seeding++)
+	{
+		// hopefully we will not need more seedings:
+		continue_seeding = false;
+
+		// calculate seeds
+		for (uint8 s = 0; s < 2; ++s) 
+		{
+			for (uint8 c = 0; c < 3; ++c) 
+			{ 
+				current_colors[s][c] = double((double(rand())/RAND_MAX)*(max_v[c]-min_v[c])) + min_v[c];
+			}
+		}
+		// divide into two quantization sets and calculate distortion
+
+		continue_iterate = true;
+		for(i = 0; (i < number_of_iterations) && continue_iterate; i++)
+		{
+			oldD = D;
+			D = 0;
+			int n = 0;
+			for (y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (int x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					error_a = 0.0*SQUARE(original_colors[x][y][R] - current_colors[0][R]) + 
+							  SQUARE(original_colors[x][y][G] - current_colors[0][G]) +
+							  SQUARE(original_colors[x][y][B] - current_colors[0][B]);
+					error_b = 0.0*SQUARE(original_colors[x][y][R] - current_colors[1][R]) + 
+							  SQUARE(original_colors[x][y][G] - current_colors[1][G]) +
+							  SQUARE(original_colors[x][y][B] - current_colors[1][B]);
+					if (error_a < error_b) 
+					{
+						block_mask[x][y] = 0;
+						D += error_a; 
+						++n;
+					} 
+					else 
+					{
+						block_mask[x][y] = 1;
+						D += error_b;
+					}
+				}
+			}
+
+			// compare with old distortion
+			if (D == 0) 
+			{
+				// Perfect score -- we dont need to go further iterations.
+				continue_iterate = false;
+				continue_seeding = false;
+			}
+			if (D == oldD)
+			{
+				// Same score as last round -- no need to go for further iterations.
+				continue_iterate = false;
+				continue_seeding = false;
+			}
+			if (D < bestD) 
+			{
+				bestD = D;
+				for(uint8 s = 0; s < 2; ++s) 
+				{
+					for(uint8 c = 0; c < 3; ++c) 
+					{
+						best_colors[s][c] = current_colors[s][c];
+					}
+				}
+			}
+			if (n == 0 || n == BLOCKWIDTH*BLOCKHEIGHT) 
+			{
+				// All colors end up in the same voroni region. We need to reseed.
+				continue_iterate = false;
+				continue_seeding = true;
+			}
+			else
+			{
+				// Calculate new reconstruction points using the centroids
+
+				// Find new construction values from average
+				t_color[0][R] = 0;
+				t_color[0][G] = 0;
+				t_color[0][B] = 0;
+				t_color[1][R] = 0;
+				t_color[1][G] = 0;
+				t_color[1][B] = 0;
+
+				for (y = 0; y < BLOCKHEIGHT; ++y) 
+				{
+					for (int x = 0; x < BLOCKWIDTH; ++x) 
+					{
+						// use dummy value for q-parameter
+						t_color[block_mask[x][y]][R] += original_colors[x][y][R];
+						t_color[block_mask[x][y]][G] += original_colors[x][y][G];
+						t_color[block_mask[x][y]][B] += original_colors[x][y][B];
+					}
+				}
+				current_colors[0][R] = t_color[0][R] / n;
+				current_colors[1][R] = t_color[1][R] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][G] = t_color[0][G] / n;
+				current_colors[1][G] = t_color[1][G] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][B] = t_color[0][B] / n;
+				current_colors[1][B] = t_color[1][B] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+			}
+		}
+	}
+
+	for(x=0;x<2;x++)
+	{
+		double qq, rr, ss;
+
+		qq = best_colors[x][0];
+		rr = best_colors[x][1];
+		ss = best_colors[x][2];
+
+		current_colors[x][0] = CLAMP(0, (1.0/sqrt(1.0*3))*qq + (1.0/sqrt(1.0*2))*rr + (1.0/sqrt(1.0*6))*ss, 255);
+		current_colors[x][1] = CLAMP(0, (1.0/sqrt(1.0*3))*qq - (1.0/sqrt(1.0*2))*rr + (1.0/sqrt(1.0*6))*ss, 255);
+		current_colors[x][2] = CLAMP(0, (1.0/sqrt(1.0*3))*qq + (0.0        )*rr - (2.0/sqrt(1.0*6))*ss, 255);
+	}
+
+	for(x=0;x<2;x++)
+		for(y=0;y<3;y++)
+			LBG_colors[x][y] = JAS_ROUND(current_colors[x][y]);
+}
+
+// Calculation of the two block colors using the LBG-algorithm
+// The following method completely ignores the intensity, since this can be compensated for anyway by both the H and T mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void computeColorLBGNotIntensity(uint8 *img,int width,int startx,int starty, uint8 (LBG_colors)[2][3]) 
+{
+	uint8 block_mask[4][4];
+
+	// reset rand so that we get predictable output per block
+	srand(10000);
+	//LBG-algorithm
+	double D = 0, oldD, bestD = MAXIMUM_ERROR, eps = 0.0000000001;
+	double error_a, error_b;
+	int number_of_iterations = 10;
+	double t_color[2][3];
+	double original_colors[4][4][3];
+	double current_colors[2][3];
+	double best_colors[2][3];
+	double max_v[3];
+	double min_v[3];
+	int x,y,i;
+	double red, green, blue;
+	bool continue_seeding;
+	int maximum_number_of_seedings = 10;
+	int seeding;
+	bool continue_iterate;
+
+	max_v[R] = -512.0;   max_v[G] = -512.0;   max_v[B] = -512.0; 
+	min_v[R] =  512.0;   min_v[G] =  512.0;   min_v[B] =  512.0;
+
+	// resolve trainingdata
+	for (y = 0; y < BLOCKHEIGHT; ++y) 
+	{
+		for (x = 0; x < BLOCKWIDTH; ++x) 
+		{
+			red = img[3*((starty+y)*width+startx+x)+R];
+			green = img[3*((starty+y)*width+startx+x)+G];
+			blue = img[3*((starty+y)*width+startx+x)+B];
+
+			// Use qrs representation instead of rgb
+			// qrs = Q * rgb where Q = [a a a ; b -b 0 ; c c -2c]; a = 1/sqrt(1.0*3), b= 1/sqrt(1.0*2), c = 1/sqrt(1.0*6);
+			// rgb = inv(Q)*qrs  = Q' * qrs where ' denotes transpose.
+			// The q variable holds intensity. r and s hold chrominance.
+			// q = [0, sqrt(1.0*3)*255], r = [-255/sqrt(1.0*2), 255/sqrt(1.0*2)], s = [-2*255/sqrt(1.0*6), 2*255/sqrt(1.0*6)];
+			//
+			// The LGB algorithm will only act on the r and s variables and not on q.
+			// 
+			original_colors[x][y][R] = (1.0/sqrt(1.0*3))*red + (1.0/sqrt(1.0*3))*green + (1.0/sqrt(1.0*3))*blue;
+			original_colors[x][y][G] = (1.0/sqrt(1.0*2))*red - (1.0/sqrt(1.0*2))*green;
+			original_colors[x][y][B] = (1.0/sqrt(1.0*6))*red + (1.0/sqrt(1.0*6))*green - (2.0/sqrt(1.0*6))*blue;
+
+			// find max
+			if (original_colors[x][y][R] > max_v[R]) max_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] > max_v[G]) max_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] > max_v[B]) max_v[B] = original_colors[x][y][B];
+			// find min
+			if (original_colors[x][y][R] < min_v[R]) min_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] < min_v[G]) min_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] < min_v[B]) min_v[B] = original_colors[x][y][B];
+		}
+	}
+
+	D = 512*512*3*16.0; 
+	bestD = 512*512*3*16.0; 
+
+	continue_seeding = true;
+
+	// loop seeds
+	for (seeding = 0; (seeding < maximum_number_of_seedings) && continue_seeding; seeding++)
+	{
+		// hopefully we will not need more seedings:
+		continue_seeding = false;
+
+		// calculate seeds
+		for (uint8 s = 0; s < 2; ++s) 
+		{
+			for (uint8 c = 0; c < 3; ++c) 
+			{ 
+				current_colors[s][c] = double((double(rand())/RAND_MAX)*(max_v[c]-min_v[c])) + min_v[c];
+			}
+		}
+		
+		// divide into two quantization sets and calculate distortion
+
+		continue_iterate = true;
+		for(i = 0; (i < number_of_iterations) && continue_iterate; i++)
+		{
+			oldD = D;
+			D = 0;
+			int n = 0;
+			for (y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (int x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					error_a = 0.0*SQUARE(original_colors[x][y][R] - current_colors[0][R]) + 
+							  SQUARE(original_colors[x][y][G] - current_colors[0][G]) +
+							  SQUARE(original_colors[x][y][B] - current_colors[0][B]);
+					error_b = 0.0*SQUARE(original_colors[x][y][R] - current_colors[1][R]) + 
+							  SQUARE(original_colors[x][y][G] - current_colors[1][G]) +
+							  SQUARE(original_colors[x][y][B] - current_colors[1][B]);
+					if (error_a < error_b) 
+					{
+						block_mask[x][y] = 0;
+						D += error_a; 
+						++n;
+					} 
+					else 
+					{
+						block_mask[x][y] = 1;
+						D += error_b;
+					}
+				}
+			}
+
+			// compare with old distortion
+			if (D == 0) 
+			{
+				// Perfect score -- we dont need to go further iterations.
+				continue_iterate = false;
+				continue_seeding = false;
+			}
+			if (D == oldD)
+			{
+				// Same score as last round -- no need to go for further iterations.
+				continue_iterate = false;
+				continue_seeding = true;
+			}
+			if (D < bestD) 
+			{
+				bestD = D;
+				for(uint8 s = 0; s < 2; ++s) 
+				{
+					for(uint8 c = 0; c < 3; ++c) 
+					{
+						best_colors[s][c] = current_colors[s][c];
+					}
+				}
+			}
+			if (n == 0 || n == BLOCKWIDTH*BLOCKHEIGHT) 
+			{
+				// All colors end up in the same voroni region. We need to reseed.
+				continue_iterate = false;
+				continue_seeding = true;
+			}
+			else
+			{
+				// Calculate new reconstruction points using the centroids
+
+				// Find new construction values from average
+				t_color[0][R] = 0;
+				t_color[0][G] = 0;
+				t_color[0][B] = 0;
+				t_color[1][R] = 0;
+				t_color[1][G] = 0;
+				t_color[1][B] = 0;
+
+				for (y = 0; y < BLOCKHEIGHT; ++y) 
+				{
+					for (int x = 0; x < BLOCKWIDTH; ++x) 
+					{
+						// use dummy value for q-parameter
+						t_color[block_mask[x][y]][R] += original_colors[x][y][R];
+						t_color[block_mask[x][y]][G] += original_colors[x][y][G];
+						t_color[block_mask[x][y]][B] += original_colors[x][y][B];
+					}
+				}
+				current_colors[0][R] = t_color[0][R] / n;
+				current_colors[1][R] = t_color[1][R] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][G] = t_color[0][G] / n;
+				current_colors[1][G] = t_color[1][G] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][B] = t_color[0][B] / n;
+				current_colors[1][B] = t_color[1][B] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+			}
+		}
+	}
+
+	for(x=0;x<2;x++)
+	{
+		double qq, rr, ss;
+
+		qq = best_colors[x][0];
+		rr = best_colors[x][1];
+		ss = best_colors[x][2];
+
+		current_colors[x][0] = CLAMP(0, (1.0/sqrt(1.0*3))*qq + (1.0/sqrt(1.0*2))*rr + (1.0/sqrt(1.0*6))*ss, 255);
+		current_colors[x][1] = CLAMP(0, (1.0/sqrt(1.0*3))*qq - (1.0/sqrt(1.0*2))*rr + (1.0/sqrt(1.0*6))*ss, 255);
+		current_colors[x][2] = CLAMP(0, (1.0/sqrt(1.0*3))*qq + (0.0        )*rr - (2.0/sqrt(1.0*6))*ss, 255);
+	}
+
+	for(x=0;x<2;x++)
+		for(y=0;y<3;y++)
+			LBG_colors[x][y] = JAS_ROUND(current_colors[x][y]);
+}
+
+// Calculation of the two block colors using the LBG-algorithm
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void computeColorLBG(uint8 *img,int width,int startx,int starty, uint8 (LBG_colors)[2][3]) 
+{
+	uint8 block_mask[4][4];
+
+	// reset rand so that we get predictable output per block
+	srand(10000);
+	//LBG-algorithm
+	double D = 0, oldD, bestD = MAXIMUM_ERROR, eps = 0.0000000001;
+	double error_a, error_b;
+	int number_of_iterations = 10;
+	double t_color[2][3];
+	double original_colors[4][4][3];
+	double current_colors[2][3];
+	double best_colors[2][3];
+	double max_v[3];
+	double min_v[3];
+	int x,y,i;
+	double red, green, blue;
+	bool continue_seeding;
+	int maximum_number_of_seedings = 10;
+	int seeding;
+	bool continue_iterate;
+
+	max_v[R] = -512.0;   max_v[G] = -512.0;   max_v[B] = -512.0; 
+	min_v[R] =  512.0;   min_v[G] =  512.0;   min_v[B] =  512.0;
+
+	// resolve trainingdata
+	for (y = 0; y < BLOCKHEIGHT; ++y) 
+	{
+		for (x = 0; x < BLOCKWIDTH; ++x) 
+		{
+			red = img[3*((starty+y)*width+startx+x)+R];
+			green = img[3*((starty+y)*width+startx+x)+G];
+			blue = img[3*((starty+y)*width+startx+x)+B];
+
+			original_colors[x][y][R] = red;
+			original_colors[x][y][G] = green;
+			original_colors[x][y][B] = blue;
+
+			// find max
+			if (original_colors[x][y][R] > max_v[R]) max_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] > max_v[G]) max_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] > max_v[B]) max_v[B] = original_colors[x][y][B];
+			// find min
+			if (original_colors[x][y][R] < min_v[R]) min_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] < min_v[G]) min_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] < min_v[B]) min_v[B] = original_colors[x][y][B];
+		}
+	}
+
+	D = 512*512*3*16.0; 
+	bestD = 512*512*3*16.0; 
+
+	continue_seeding = true;
+
+	// loop seeds
+	for (seeding = 0; (seeding < maximum_number_of_seedings) && continue_seeding; seeding++)
+	{
+		// hopefully we will not need more seedings:
+		continue_seeding = false;
+
+		// calculate seeds
+		for (uint8 s = 0; s < 2; ++s) 
+		{
+			for (uint8 c = 0; c < 3; ++c) 
+			{ 
+				current_colors[s][c] = double((double(rand())/RAND_MAX)*(max_v[c]-min_v[c])) + min_v[c];
+			}
+		}
+		
+		// divide into two quantization sets and calculate distortion
+
+		continue_iterate = true;
+		for(i = 0; (i < number_of_iterations) && continue_iterate; i++)
+		{
+			oldD = D;
+			D = 0;
+			int n = 0;
+			for (y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (int x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					error_a = SQUARE(original_colors[x][y][R] - JAS_ROUND(current_colors[0][R])) + 
+							  SQUARE(original_colors[x][y][G] - JAS_ROUND(current_colors[0][G])) +
+							  SQUARE(original_colors[x][y][B] - JAS_ROUND(current_colors[0][B]));
+					error_b = SQUARE(original_colors[x][y][R] - JAS_ROUND(current_colors[1][R])) + 
+							  SQUARE(original_colors[x][y][G] - JAS_ROUND(current_colors[1][G])) +
+							  SQUARE(original_colors[x][y][B] - JAS_ROUND(current_colors[1][B]));
+					if (error_a < error_b) 
+					{
+						block_mask[x][y] = 0;
+						D += error_a; 
+						++n;
+					} 
+					else 
+					{
+						block_mask[x][y] = 1;
+						D += error_b;
+					}
+				}
+			}
+
+			// compare with old distortion
+			if (D == 0) 
+			{
+				// Perfect score -- we dont need to go further iterations.
+				continue_iterate = false;
+				continue_seeding = false;
+			}
+			if (D == oldD)
+			{
+				// Same score as last round -- no need to go for further iterations.
+				continue_iterate = false;
+				continue_seeding = true;
+			}
+			if (D < bestD) 
+			{
+				bestD = D;
+				for(uint8 s = 0; s < 2; ++s) 
+				{
+					for(uint8 c = 0; c < 3; ++c) 
+					{
+						best_colors[s][c] = current_colors[s][c];
+					}
+				}
+			}
+			if (n == 0 || n == BLOCKWIDTH*BLOCKHEIGHT) 
+			{
+				// All colors end up in the same voroni region. We need to reseed.
+				continue_iterate = false;
+				continue_seeding = true;
+			}
+			else
+			{
+				// Calculate new reconstruction points using the centroids
+
+				// Find new construction values from average
+				t_color[0][R] = 0;
+				t_color[0][G] = 0;
+				t_color[0][B] = 0;
+				t_color[1][R] = 0;
+				t_color[1][G] = 0;
+				t_color[1][B] = 0;
+
+				for (y = 0; y < BLOCKHEIGHT; ++y) 
+				{
+					for (int x = 0; x < BLOCKWIDTH; ++x) 
+					{
+						// use dummy value for q-parameter
+						t_color[block_mask[x][y]][R] += original_colors[x][y][R];
+						t_color[block_mask[x][y]][G] += original_colors[x][y][G];
+						t_color[block_mask[x][y]][B] += original_colors[x][y][B];
+					}
+				}
+				current_colors[0][R] = t_color[0][R] / n;
+				current_colors[1][R] = t_color[1][R] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][G] = t_color[0][G] / n;
+				current_colors[1][G] = t_color[1][G] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][B] = t_color[0][B] / n;
+				current_colors[1][B] = t_color[1][B] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+			}
+		}
+	}
+
+	// Set the best colors as the final block colors
+	for(int s = 0; s < 2; ++s) 
+	{
+		for(uint8 c = 0; c < 3; ++c) 
+		{
+			current_colors[s][c] = best_colors[s][c];
+		}
+	}		
+
+	for(x=0;x<2;x++)
+		for(y=0;y<3;y++)
+			LBG_colors[x][y] = JAS_ROUND(current_colors[x][y]);
+}
+
+// Calculation of the two block colors using the LBG-algorithm
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void computeColorLBGfast(uint8 *img,int width,int startx,int starty, uint8 (LBG_colors)[2][3]) 
+{
+	uint8 block_mask[4][4];
+
+	// reset rand so that we get predictable output per block
+	srand(10000);
+	//LBG-algorithm
+	double D = 0, oldD, bestD = MAXIMUM_ERROR, eps = 0.0000000001;
+	double error_a, error_b;
+	int number_of_iterations = 10;
+	double t_color[2][3];
+	uint8 original_colors[4][4][3];
+	double current_colors[2][3];
+	double best_colors[2][3];
+	double max_v[3];
+	double min_v[3];
+	int x,y,i;
+	bool continue_seeding;
+	int maximum_number_of_seedings = 10;
+	int seeding;
+	bool continue_iterate;
+
+	max_v[R] = -512.0;   max_v[G] = -512.0;   max_v[B] = -512.0; 
+	min_v[R] =  512.0;   min_v[G] =  512.0;   min_v[B] =  512.0;
+
+	// resolve trainingdata
+	for (y = 0; y < BLOCKHEIGHT; ++y) 
+	{
+		for (x = 0; x < BLOCKWIDTH; ++x) 
+		{
+			original_colors[x][y][R] = img[3*((starty+y)*width+startx+x)+R];
+			original_colors[x][y][G] = img[3*((starty+y)*width+startx+x)+G];
+			original_colors[x][y][B] = img[3*((starty+y)*width+startx+x)+B];
+		
+			// find max
+			if (original_colors[x][y][R] > max_v[R]) max_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] > max_v[G]) max_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] > max_v[B]) max_v[B] = original_colors[x][y][B];
+			// find min
+			if (original_colors[x][y][R] < min_v[R]) min_v[R] = original_colors[x][y][R];
+			if (original_colors[x][y][G] < min_v[G]) min_v[G] = original_colors[x][y][G];
+			if (original_colors[x][y][B] < min_v[B]) min_v[B] = original_colors[x][y][B];
+		}
+	}
+
+	D = 512*512*3*16.0; 
+	bestD = 512*512*3*16.0; 
+
+	continue_seeding = true;
+
+	// loop seeds
+	for (seeding = 0; (seeding < maximum_number_of_seedings) && continue_seeding; seeding++)
+	{
+		// hopefully we will not need more seedings:
+		continue_seeding = false;
+
+		// calculate seeds
+		for (uint8 s = 0; s < 2; ++s) 
+		{
+			for (uint8 c = 0; c < 3; ++c) 
+			{ 
+				current_colors[s][c] = double((double(rand())/RAND_MAX)*(max_v[c]-min_v[c])) + min_v[c];
+			}
+		}
+		
+		// divide into two quantization sets and calculate distortion
+		continue_iterate = true;
+		for(i = 0; (i < number_of_iterations) && continue_iterate; i++)
+		{
+			oldD = D;
+			D = 0;
+			int n = 0;
+			for (y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (int x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					error_a = SQUARE(original_colors[x][y][R] - JAS_ROUND(current_colors[0][R])) + 
+							  SQUARE(original_colors[x][y][G] - JAS_ROUND(current_colors[0][G])) +
+							  SQUARE(original_colors[x][y][B] - JAS_ROUND(current_colors[0][B]));
+					error_b = SQUARE(original_colors[x][y][R] - JAS_ROUND(current_colors[1][R])) + 
+							  SQUARE(original_colors[x][y][G] - JAS_ROUND(current_colors[1][G])) +
+							  SQUARE(original_colors[x][y][B] - JAS_ROUND(current_colors[1][B]));
+					if (error_a < error_b) 
+					{
+						block_mask[x][y] = 0;
+						D += error_a; 
+						++n;
+					} 
+					else 
+					{
+						block_mask[x][y] = 1;
+						D += error_b;
+					}
+				}
+			}
+
+			// compare with old distortion
+			if (D == 0) 
+			{
+				// Perfect score -- we dont need to go further iterations.
+				continue_iterate = false;
+				continue_seeding = false;
+			}
+			if (D == oldD)
+			{
+				// Same score as last round -- no need to go for further iterations.
+				continue_iterate = false;
+				continue_seeding = false;
+			}
+			if (D < bestD) 
+			{
+				bestD = D;
+				for(uint8 s = 0; s < 2; ++s) 
+				{
+					for(uint8 c = 0; c < 3; ++c) 
+					{
+						best_colors[s][c] = current_colors[s][c];
+					}
+				}
+			}
+			if (n == 0 || n == BLOCKWIDTH*BLOCKHEIGHT) 
+			{
+				// All colors end up in the same voroni region. We need to reseed.
+				continue_iterate = false;
+				continue_seeding = true;
+			}
+			else
+			{
+				// Calculate new reconstruction points using the centroids
+
+				// Find new construction values from average
+				t_color[0][R] = 0;
+				t_color[0][G] = 0;
+				t_color[0][B] = 0;
+				t_color[1][R] = 0;
+				t_color[1][G] = 0;
+				t_color[1][B] = 0;
+
+				for (y = 0; y < BLOCKHEIGHT; ++y) 
+				{
+					for (int x = 0; x < BLOCKWIDTH; ++x) 
+					{
+						// use dummy value for q-parameter
+						t_color[block_mask[x][y]][R] += original_colors[x][y][R];
+						t_color[block_mask[x][y]][G] += original_colors[x][y][G];
+						t_color[block_mask[x][y]][B] += original_colors[x][y][B];
+					}
+				}
+				current_colors[0][R] = t_color[0][R] / n;
+				current_colors[1][R] = t_color[1][R] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][G] = t_color[0][G] / n;
+				current_colors[1][G] = t_color[1][G] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+				current_colors[0][B] = t_color[0][B] / n;
+				current_colors[1][B] = t_color[1][B] / (BLOCKWIDTH*BLOCKHEIGHT - n);
+			}
+		}
+	}
+
+	// Set the best colors as the final block colors
+	for(int s = 0; s < 2; ++s) 
+	{
+		for(uint8 c = 0; c < 3; ++c) 
+		{
+			current_colors[s][c] = best_colors[s][c];
+		}
+	}		
+
+	for(x=0;x<2;x++)
+		for(y=0;y<3;y++)
+			LBG_colors[x][y] = JAS_ROUND(current_colors[x][y]);
+}
+
+// Each color component is compressed to fit in its specified number of bits
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressColor(int R_B, int G_B, int B_B, uint8 (current_color)[2][3], uint8 (quantized_color)[2][3]) 
+{
+	//
+	//	The color is calculated as:
+	//
+	//  c = (c + (2^(8-b))/2) / (255 / (2^b - 1)) where b is the number of bits
+	//                                            to code color c with
+	//  For instance, if b = 3:
+	//
+	//  c = (c + 16) / (255 / 7) = 7 * (c + 16) / 255
+	//
+
+	quantized_color[0][R] = CLAMP(0,(BINPOW(R_B)-1) * (current_color[0][R] + BINPOW(8-R_B-1)) / 255,255);
+	quantized_color[0][G] = CLAMP(0,(BINPOW(G_B)-1) * (current_color[0][G] + BINPOW(8-G_B-1)) / 255,255);
+	quantized_color[0][B] = CLAMP(0,(BINPOW(B_B)-1) * (current_color[0][B] + BINPOW(8-B_B-1)) / 255,255);
+
+	quantized_color[1][R] = CLAMP(0,(BINPOW(R_B)-1) * (current_color[1][R] + BINPOW(8-R_B-1)) / 255,255);
+	quantized_color[1][G] = CLAMP(0,(BINPOW(G_B)-1) * (current_color[1][G] + BINPOW(8-G_B-1)) / 255,255);
+	quantized_color[1][B] = CLAMP(0,(BINPOW(B_B)-1) * (current_color[1][B] + BINPOW(8-B_B-1)) / 255,255);
+}
+
+// Swapping two RGB-colors
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void swapColors(uint8 (colors)[2][3]) 
+{
+	uint8 temp = colors[0][R];
+	colors[0][R] = colors[1][R];
+	colors[1][R] = temp;
+
+	temp = colors[0][G];
+	colors[0][G] = colors[1][G];
+	colors[1][G] = temp;
+
+	temp = colors[0][B];
+	colors[0][B] = colors[1][B];
+	colors[1][B] = temp;
+}
+
+
+// Calculate the paint colors from the block colors 
+// using a distance d and one of the H- or T-patterns.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+
+// Calculate the error for the block at position (startx,starty)
+// The parameters needed for reconstruction are calculated as well
+// 
+// Please note that the function can change the order between the two colors in colorsRGB444
+//
+// In the 59T bit mode, we only have pattern T.
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateError59Tperceptual1000(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3], uint8 &distance, unsigned int &pixel_indices) 
+{
+
+	unsigned int block_error = 0, 
+		   best_block_error = MAXERR1000,
+		   pixel_error, 
+		   best_pixel_error;
+	int diff[3];
+	uint8 best_sw;
+	unsigned int pixel_colors;
+	uint8 colors[2][3];
+	uint8 possible_colors[4][3];
+
+	// First use the colors as they are, then swap them
+	for (uint8 sw = 0; sw <2; ++sw) 
+	{ 
+		if (sw == 1) 
+		{
+			swapColors(colorsRGB444);
+		}
+		decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+
+		// Test all distances
+		for (uint8 d = 0; d < BINPOW(TABLE_BITS_59T); ++d) 
+		{
+			calculatePaintColors59T(d,PATTERN_T, colors, possible_colors);
+			
+			block_error = 0;	
+			pixel_colors = 0;
+
+			// Loop block
+			for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					best_pixel_error = MAXERR1000;
+					pixel_colors <<=2; // Make room for next value
+
+					// Loop possible block colors
+					for (uint8 c = 0; c < 4; ++c) 
+					{
+					
+						diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+						diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+						diff[B] = srcimg[3*((starty+y)*width+startx+x)+B] - CLAMP(0,possible_colors[c][B],255);
+
+						pixel_error =	PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff[R]) +
+										PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE(diff[G]) +
+										PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*SQUARE(diff[B]);
+
+						// Choose best error
+						if (pixel_error < best_pixel_error) 
+						{
+							best_pixel_error = pixel_error;
+							pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+							pixel_colors |= c;
+						} 
+					}
+					block_error += best_pixel_error;
+				}
+			}
+			if (block_error < best_block_error) 
+			{
+				best_block_error = block_error;
+				distance = d;
+				pixel_indices = pixel_colors;
+				best_sw = sw;
+			}
+		}
+		
+		if (sw == 1 && best_sw == 0) 
+		{
+			swapColors(colorsRGB444);
+		}
+		decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+	}
+	return best_block_error;
+}
+
+// Calculate the error for the block at position (startx,starty)
+// The parameters needed for reconstruction is calculated as well
+// 
+// Please note that the function can change the order between the two colors in colorsRGB444
+//
+// In the 59T bit mode, we only have pattern T.
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculateError59T(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3], uint8 &distance, unsigned int &pixel_indices) 
+{
+	double block_error = 0, 
+		     best_block_error = MAXIMUM_ERROR, 
+				 pixel_error, 
+				 best_pixel_error;
+	int diff[3];
+	uint8 best_sw;
+	unsigned int pixel_colors;
+	uint8 colors[2][3];
+	uint8 possible_colors[4][3];
+
+	// First use the colors as they are, then swap them
+	for (uint8 sw = 0; sw <2; ++sw) 
+	{ 
+		if (sw == 1) 
+		{
+			swapColors(colorsRGB444);
+		}
+		decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+
+		// Test all distances
+		for (uint8 d = 0; d < BINPOW(TABLE_BITS_59T); ++d) 
+		{
+			calculatePaintColors59T(d,PATTERN_T, colors, possible_colors);
+			
+			block_error = 0;	
+			pixel_colors = 0;
+
+			// Loop block
+			for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					best_pixel_error = MAXIMUM_ERROR;
+					pixel_colors <<=2; // Make room for next value
+
+					// Loop possible block colors
+					for (uint8 c = 0; c < 4; ++c) 
+					{
+					
+						diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+						diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+						diff[B] = srcimg[3*((starty+y)*width+startx+x)+B] - CLAMP(0,possible_colors[c][B],255);
+
+						pixel_error =	weight[R]*SQUARE(diff[R]) +
+										weight[G]*SQUARE(diff[G]) +
+										weight[B]*SQUARE(diff[B]);
+
+						// Choose best error
+						if (pixel_error < best_pixel_error) 
+						{
+							best_pixel_error = pixel_error;
+							pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+							pixel_colors |= c;
+						} 
+					}
+					block_error += best_pixel_error;
+				}
+			}
+			if (block_error < best_block_error) 
+			{
+				best_block_error = block_error;
+				distance = d;
+				pixel_indices = pixel_colors;
+				best_sw = sw;
+			}
+		}
+		
+		if (sw == 1 && best_sw == 0) 
+		{
+			swapColors(colorsRGB444);
+		}
+		decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+	}
+	return best_block_error;
+}
+
+// Calculate the error for the block at position (startx,starty)
+// The parameters needed for reconstruction is calculated as well
+//
+// In the 59T bit mode, we only have pattern T.
+// 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateError59TnoSwapPerceptual1000(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3], uint8 &distance, unsigned int &pixel_indices) 
+{
+
+	unsigned int block_error = 0, 
+		   best_block_error = MAXERR1000,
+		   pixel_error, 
+		   best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 colors[2][3];
+	uint8 possible_colors[4][3];
+	int thebestintheworld;
+
+	// First use the colors as they are, then swap them
+		decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+
+		// Test all distances
+		for (uint8 d = 0; d < BINPOW(TABLE_BITS_59T); ++d) 
+		{
+			calculatePaintColors59T(d,PATTERN_T, colors, possible_colors);
+			
+			block_error = 0;	
+			pixel_colors = 0;
+
+			// Loop block
+			for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					best_pixel_error = MAXERR1000;
+					pixel_colors <<=2; // Make room for next value
+
+					// Loop possible block colors
+					for (uint8 c = 0; c < 4; ++c) 
+					{
+					
+						diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+						diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+						diff[B] = srcimg[3*((starty+y)*width+startx+x)+B] - CLAMP(0,possible_colors[c][B],255);
+
+						pixel_error =	PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff[R]) +
+										PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE(diff[G]) +
+										PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*SQUARE(diff[B]);
+
+						// Choose best error
+						if (pixel_error < best_pixel_error) 
+						{
+							best_pixel_error = pixel_error;
+							pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+							pixel_colors |= c;
+							thebestintheworld = c;
+						} 
+					}
+					block_error += best_pixel_error;
+				}
+			}
+			if (block_error < best_block_error) 
+			{
+				best_block_error = block_error;
+				distance = d;
+				pixel_indices = pixel_colors;
+			}
+		}
+		
+	decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+	return best_block_error;
+}
+
+// Calculate the error for the block at position (startx,starty)
+// The parameters needed for reconstruction is calculated as well
+//
+// In the 59T bit mode, we only have pattern T.
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculateError59TnoSwap(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3], uint8 &distance, unsigned int &pixel_indices) 
+{
+	double block_error = 0, 
+		     best_block_error = MAXIMUM_ERROR, 
+				 pixel_error, 
+				 best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 colors[2][3];
+	uint8 possible_colors[4][3];
+	int thebestintheworld;
+
+	// First use the colors as they are, then swap them
+	decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+
+	// Test all distances
+	for (uint8 d = 0; d < BINPOW(TABLE_BITS_59T); ++d) 
+	{
+		calculatePaintColors59T(d,PATTERN_T, colors, possible_colors);
+			
+		block_error = 0;	
+		pixel_colors = 0;
+
+		// Loop block
+		for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+			{
+				best_pixel_error = MAXIMUM_ERROR;
+				pixel_colors <<=2; // Make room for next value
+
+				// Loop possible block colors
+				for (uint8 c = 0; c < 4; ++c) 
+				{
+					diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+					diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+					diff[B] = srcimg[3*((starty+y)*width+startx+x)+B] - CLAMP(0,possible_colors[c][B],255);
+
+					pixel_error =	weight[R]*SQUARE(diff[R]) +
+						            weight[G]*SQUARE(diff[G]) +
+												weight[B]*SQUARE(diff[B]);
+
+					// Choose best error
+					if (pixel_error < best_pixel_error) 
+					{
+						best_pixel_error = pixel_error;
+						pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+						pixel_colors |= c;
+						thebestintheworld = c;
+					} 
+				}
+				block_error += best_pixel_error;
+			}
+		}
+		if (block_error < best_block_error) 
+		{
+			best_block_error = block_error;
+			distance = d;
+			pixel_indices = pixel_colors;
+		}
+	}
+		
+	decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+	return best_block_error;
+}
+
+// Put the compress params into the compression block 
+//
+//
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void packBlock59T(uint8 (colors)[2][3], uint8 d, unsigned int pixel_indices, unsigned int &compressed1, unsigned int &compressed2) 
+{ 
+	
+	compressed1 = 0;
+
+	PUTBITSHIGH( compressed1, colors[0][R], 4, 58);
+ 	PUTBITSHIGH( compressed1, colors[0][G], 4, 54);
+ 	PUTBITSHIGH( compressed1, colors[0][B], 4, 50);
+ 	PUTBITSHIGH( compressed1, colors[1][R], 4, 46);
+ 	PUTBITSHIGH( compressed1, colors[1][G], 4, 42);
+ 	PUTBITSHIGH( compressed1, colors[1][B], 4, 38);	
+	PUTBITSHIGH( compressed1, d, TABLE_BITS_59T, 34);
+	pixel_indices=indexConversion(pixel_indices);
+	compressed2 = 0;
+	PUTBITS( compressed2, pixel_indices, 32, 31);
+}
+
+// Copy colors from source to dest
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void copyColors(uint8 (source)[2][3], uint8 (dest)[2][3])
+{
+	int x,y;
+
+	for (x=0; x<2; x++)
+		for (y=0; y<3; y++)
+			dest[x][y] = source[x][y];
+}
+
+// The below code should compress the block to 59 bits. 
+//
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockTHUMB59TFastestOnlyColorPerceptual1000(uint8 *img,int width,int height,int startx,int starty, int (best_colorsRGB444_packed)[2])
+{
+	unsigned int best_error = MAXERR1000;
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	unsigned int error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+
+	uint8 colors[2][3];
+
+	// Calculate average color using the LBG-algorithm
+	computeColorLBGHalfIntensityFast(img,width,startx,starty, colors);
+	compressColor(R_BITS59T, G_BITS59T, B_BITS59T, colors, colorsRGB444_no_i);
+
+	// Determine the parameters for the lowest error
+	error_no_i = calculateError59Tperceptual1000(img, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);			
+
+	best_error = error_no_i;
+	best_distance = distance_no_i;
+	best_pixel_indices = pixel_indices_no_i;
+
+	best_colorsRGB444_packed[0] = (colorsRGB444_no_i[0][0] << 8) + (colorsRGB444_no_i[0][1] << 4) + (colorsRGB444_no_i[0][2] << 0);
+	best_colorsRGB444_packed[1] = (colorsRGB444_no_i[1][0] << 8) + (colorsRGB444_no_i[1][1] << 4) + (colorsRGB444_no_i[1][2] << 0);
+
+	return best_error;
+}
+
+
+// The below code should compress the block to 59 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+//
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockTHUMB59TFastestOnlyColor(uint8 *img,int width,int height,int startx,int starty, int (best_colorsRGB444_packed)[2])
+{
+	double best_error = MAXIMUM_ERROR;
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	double error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+
+	uint8 colors[2][3];
+
+	// Calculate average color using the LBG-algorithm
+	computeColorLBGHalfIntensityFast(img,width,startx,starty, colors);
+	compressColor(R_BITS59T, G_BITS59T, B_BITS59T, colors, colorsRGB444_no_i);
+
+	// Determine the parameters for the lowest error
+	error_no_i = calculateError59T(img, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);			
+
+	best_error = error_no_i;
+	best_distance = distance_no_i;
+	best_pixel_indices = pixel_indices_no_i;
+
+	best_colorsRGB444_packed[0] = (colorsRGB444_no_i[0][0] << 8) + (colorsRGB444_no_i[0][1] << 4) + (colorsRGB444_no_i[0][2] << 0);
+	best_colorsRGB444_packed[1] = (colorsRGB444_no_i[1][0] << 8) + (colorsRGB444_no_i[1][1] << 4) + (colorsRGB444_no_i[1][2] << 0);
+
+	return best_error;
+}
+
+// The below code should compress the block to 59 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+//
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockTHUMB59TFastestPerceptual1000(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2) 
+{
+	double best_error = MAXIMUM_ERROR;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	double error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+
+	uint8 colors[2][3];
+
+	// Calculate average color using the LBG-algorithm
+	computeColorLBGHalfIntensityFast(img,width,startx,starty, colors);
+	compressColor(R_BITS59T, G_BITS59T, B_BITS59T, colors, colorsRGB444_no_i);
+
+	// Determine the parameters for the lowest error
+	error_no_i = calculateError59Tperceptual1000(img, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);			
+
+	best_error = error_no_i;
+	best_distance = distance_no_i;
+	best_pixel_indices = pixel_indices_no_i;
+	copyColors(colorsRGB444_no_i, best_colorsRGB444);
+
+	// Put the compress params into the compression block 
+	packBlock59T(best_colorsRGB444, best_distance, best_pixel_indices, compressed1, compressed2);
+
+	return best_error;
+}
+
+// The below code should compress the block to 59 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+//
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockTHUMB59TFastest(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2) 
+{
+	double best_error = MAXIMUM_ERROR;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	double error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+
+	uint8 colors[2][3];
+
+	// Calculate average color using the LBG-algorithm
+	computeColorLBGHalfIntensityFast(img,width,startx,starty, colors);
+	compressColor(R_BITS59T, G_BITS59T, B_BITS59T, colors, colorsRGB444_no_i);
+
+	// Determine the parameters for the lowest error
+	error_no_i = calculateError59T(img, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);			
+
+	best_error = error_no_i;
+	best_distance = distance_no_i;
+	best_pixel_indices = pixel_indices_no_i;
+	copyColors(colorsRGB444_no_i, best_colorsRGB444);
+
+	// Put the compress params into the compression block 
+	packBlock59T(best_colorsRGB444, best_distance, best_pixel_indices, compressed1, compressed2);
+
+	return best_error;
+}
+
+// The below code should compress the block to 59 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+//
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockTHUMB59TFast(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2) 
+{
+	double best_error = MAXIMUM_ERROR;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	double error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+
+	double error_half_i;
+	uint8 colorsRGB444_half_i[2][3];
+	unsigned int pixel_indices_half_i;
+	uint8 distance_half_i;
+	
+	double error;
+	uint8 colorsRGB444[2][3];
+	unsigned int pixel_indices;
+	uint8 distance;
+
+	uint8 colors[2][3];
+
+	// Calculate average color using the LBG-algorithm
+	computeColorLBGNotIntensityFast(img,width,startx,starty, colors);
+	compressColor(R_BITS59T, G_BITS59T, B_BITS59T, colors, colorsRGB444_no_i);
+	// Determine the parameters for the lowest error
+	error_no_i = calculateError59T(img, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);			
+
+	// Calculate average color using the LBG-algorithm
+	computeColorLBGHalfIntensityFast(img,width,startx,starty, colors);
+	compressColor(R_BITS59T, G_BITS59T, B_BITS59T, colors, colorsRGB444_half_i);
+	// Determine the parameters for the lowest error
+	error_half_i = calculateError59T(img, width, startx, starty, colorsRGB444_half_i, distance_half_i, pixel_indices_half_i);			
+
+	// Calculate average color using the LBG-algorithm
+	computeColorLBGfast(img,width,startx,starty, colors);
+	compressColor(R_BITS59T, G_BITS59T, B_BITS59T, colors, colorsRGB444);
+	// Determine the parameters for the lowest error
+	error = calculateError59T(img, width, startx, starty, colorsRGB444, distance, pixel_indices);			
+	
+	best_error = error_no_i;
+	best_distance = distance_no_i;
+	best_pixel_indices = pixel_indices_no_i;
+	copyColors(colorsRGB444_no_i, best_colorsRGB444);
+
+	if(error_half_i < best_error)
+	{
+		best_error = error_half_i;
+		best_distance = distance_half_i;
+		best_pixel_indices = pixel_indices_half_i;
+		copyColors (colorsRGB444_half_i, best_colorsRGB444);
+	}
+	if(error < best_error)
+	{
+		best_error = error;
+		best_distance = distance;
+		best_pixel_indices = pixel_indices;
+		copyColors (colorsRGB444, best_colorsRGB444);
+	}
+
+	// Put the compress params into the compression block 
+	packBlock59T(best_colorsRGB444, best_distance, best_pixel_indices, compressed1, compressed2);
+
+	return best_error;
+}
+
+// Calculate the error for the block at position (startx,starty)
+// The parameters needed for reconstruction is calculated as well
+// 
+// In the 58H bit mode, we only have pattern H.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateErrorAndCompress58Hperceptual1000(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3], uint8 &distance, unsigned int &pixel_indices) 
+{
+	unsigned int block_error = 0, 
+		           best_block_error = MAXERR1000, 
+							 pixel_error, 
+							 best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 possible_colors[4][3];
+	uint8 colors[2][3];
+
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);
+
+	// Test all distances
+	for (uint8 d = 0; d < BINPOW(TABLE_BITS_58H); ++d) 
+	{
+		calculatePaintColors58H(d, PATTERN_H, colors, possible_colors);
+
+		block_error = 0;	
+		pixel_colors = 0;
+
+		// Loop block
+		for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+			{
+				best_pixel_error = MAXERR1000;
+				pixel_colors <<=2; // Make room for next value
+
+				// Loop possible block colors
+				for (uint8 c = 0; c < 4; ++c) 
+				{
+					diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+					diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+					diff[B] = srcimg[3*((starty+y)*width+startx+x)+B] - CLAMP(0,possible_colors[c][B],255);
+
+					pixel_error =	PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff[R]) +
+									PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE(diff[G]) +
+									PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*SQUARE(diff[B]);
+
+					// Choose best error
+					if (pixel_error < best_pixel_error) 
+					{
+						best_pixel_error = pixel_error;
+						pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+						pixel_colors |= c;
+					} 
+				}
+				block_error += best_pixel_error;
+			}
+		}
+		
+		if (block_error < best_block_error) 
+		{
+			best_block_error = block_error;
+			distance = d;
+			pixel_indices = pixel_colors;
+		}
+	}
+	return best_block_error;
+}
+
+// The H-mode but with punchthrough alpha
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculateErrorAndCompress58HAlpha(uint8* srcimg, uint8* alphaimg,int width, int startx, int starty, uint8 (colorsRGB444)[2][3], uint8 &distance, unsigned int &pixel_indices) 
+{
+	double block_error = 0, 
+		   best_block_error = MAXIMUM_ERROR, 
+		   pixel_error, 
+		   best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 possible_colors[4][3];
+	uint8 colors[2][3];
+	int alphaindex;
+	int colorsRGB444_packed[2];
+	colorsRGB444_packed[0] = (colorsRGB444[0][R] << 8) + (colorsRGB444[0][G] << 4) + colorsRGB444[0][B];
+	colorsRGB444_packed[1] = (colorsRGB444[1][R] << 8) + (colorsRGB444[1][G] << 4) + colorsRGB444[1][B];
+	
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);
+	
+	// Test all distances
+	for (uint8 d = 0; d < BINPOW(TABLE_BITS_58H); ++d) 
+	{
+		alphaindex=2;
+		if( (colorsRGB444_packed[0] >= colorsRGB444_packed[1]) ^ ((d & 1)==1) )
+		{
+			//we're going to have to swap the colors to be able to choose this distance.. that means
+			//that the indices will be swapped as well, so C1 will be the one with alpha instead of C3..
+			alphaindex=0;
+		}
+
+		calculatePaintColors58H(d, PATTERN_H, colors, possible_colors);
+
+		block_error = 0;	
+		pixel_colors = 0;
+
+		// Loop block
+		for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+			{
+				int alpha=0;
+				if(alphaimg[((starty+y)*width+startx+x)]>0)
+					alpha=1;
+				if(alphaimg[((starty+y)*width+startx+x)]>0&&alphaimg[((starty+y)*width+startx+x)]<255)
+					printf("INVALID ALPHA DATA!!\n");
+				best_pixel_error = MAXIMUM_ERROR;
+				pixel_colors <<=2; // Make room for next value
+
+				// Loop possible block colors
+				for (uint8 c = 0; c < 4; ++c) 
+				{
+					if(c==alphaindex&&alpha) 
+					{
+						pixel_error=0;
+					}
+					else if(c==alphaindex||alpha) 
+					{
+						pixel_error=MAXIMUM_ERROR;
+					}
+					else 
+					{
+						diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+						diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+						diff[B] = srcimg[3*((starty+y)*width+startx+x)+B] - CLAMP(0,possible_colors[c][B],255);
+
+						pixel_error =	weight[R]*SQUARE(diff[R]) +
+										weight[G]*SQUARE(diff[G]) +
+										weight[B]*SQUARE(diff[B]);
+					}
+
+					// Choose best error
+					if (pixel_error < best_pixel_error) 
+					{
+						best_pixel_error = pixel_error;
+						pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+						pixel_colors |= c;
+					} 
+				}
+				block_error += best_pixel_error;
+			}
+		}
+		if (block_error < best_block_error) 
+		{
+			best_block_error = block_error;
+			distance = d;
+			pixel_indices = pixel_colors;
+		}
+	}
+	return best_block_error;
+}
+
+// Calculate the error for the block at position (startx,starty)
+// The parameters needed for reconstruction is calculated as well
+// 
+// In the 58H bit mode, we only have pattern H.
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculateErrorAndCompress58H(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3], uint8 &distance, unsigned int &pixel_indices) 
+{
+	double block_error = 0, 
+	       best_block_error = MAXIMUM_ERROR, 
+				 pixel_error, 
+				 best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 possible_colors[4][3];
+	uint8 colors[2][3];
+
+	
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);
+
+	// Test all distances
+	for (uint8 d = 0; d < BINPOW(TABLE_BITS_58H); ++d) 
+	{
+		calculatePaintColors58H(d, PATTERN_H, colors, possible_colors);
+
+		block_error = 0;	
+		pixel_colors = 0;
+
+		// Loop block
+		for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+			{
+				best_pixel_error = MAXIMUM_ERROR;
+				pixel_colors <<=2; // Make room for next value
+
+				// Loop possible block colors
+				for (uint8 c = 0; c < 4; ++c) 
+				{
+					diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+					diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+					diff[B] = srcimg[3*((starty+y)*width+startx+x)+B] - CLAMP(0,possible_colors[c][B],255);
+
+					pixel_error =	weight[R]*SQUARE(diff[R]) +
+									weight[G]*SQUARE(diff[G]) +
+									weight[B]*SQUARE(diff[B]);
+
+					// Choose best error
+					if (pixel_error < best_pixel_error) 
+					{
+						best_pixel_error = pixel_error;
+						pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+						pixel_colors |= c;
+					} 
+				}
+				block_error += best_pixel_error;
+			}
+		}
+		
+		if (block_error < best_block_error) 
+		{
+			best_block_error = block_error;
+			distance = d;
+			pixel_indices = pixel_colors;
+		}
+	}
+		
+	return best_block_error;
+}
+
+// Makes sure that col0 < col1;
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void sortColorsRGB444(uint8 (colorsRGB444)[2][3])
+{
+	unsigned int col0, col1, tcol;
+
+	// sort colors
+	col0 = 16*16*colorsRGB444[0][R] + 16*colorsRGB444[0][G] + colorsRGB444[0][B];
+	col1 = 16*16*colorsRGB444[1][R] + 16*colorsRGB444[1][G] + colorsRGB444[1][B];
+
+	// After this, col0 should be smaller than col1 (col0 < col1)
+	if( col0 > col1)
+	{
+		tcol = col0;
+		col0 = col1;
+		col1 = tcol;
+	}
+	else
+	{
+		if(col0 == col1)
+		{	
+			// Both colors are the same. That is useless. If they are both black,
+			// col1 can just as well be (0,0,1). Else, col0 can be col1 - 1.
+			if(col0 == 0)
+				col1 = col0+1;
+			else
+				col0 = col1-1;
+		}
+	}
+	
+	colorsRGB444[0][R] = GETBITS(col0, 4, 11);
+	colorsRGB444[0][G] = GETBITS(col0, 4, 7);
+	colorsRGB444[0][B] = GETBITS(col0, 4, 3);
+	colorsRGB444[1][R] = GETBITS(col1, 4, 11);
+	colorsRGB444[1][G] = GETBITS(col1, 4, 7);
+	colorsRGB444[1][B] = GETBITS(col1, 4, 3);
+}
+
+// The below code should compress the block to 58 bits. 
+// The bit layout is thought to be:
+//
+//|63 62 61 60 59 58|57 56 55 54|53 52 51 50|49 48 47 46|45 44 43 42|41 40 39 38|37 36 35 34|33 32|
+//|-------empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|d2 d1|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// The distance d is three bits, d2 (MSB), d1 and d0 (LSB). d0 is not stored explicitly. 
+// Instead if the 12-bit word red0,green0,blue0 < red1,green1,blue1, d0 is assumed to be 0.
+// Else, it is assumed to be 1.
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockTHUMB58HFastestPerceptual1000(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2) 
+{
+	unsigned int best_error = MAXERR1000;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	unsigned int error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+	uint8 colors[2][3];
+	
+	// Calculate average color using the LBG-algorithm but discarding the intensity in the error function
+	computeColorLBGHalfIntensityFast(img, width, startx, starty, colors);
+	compressColor(R_BITS58H, G_BITS58H, B_BITS58H, colors, colorsRGB444_no_i);
+	sortColorsRGB444(colorsRGB444_no_i);
+
+	error_no_i = calculateErrorAndCompress58Hperceptual1000(img, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);
+
+	best_error = error_no_i;	
+	best_distance = distance_no_i; 
+	best_pixel_indices = pixel_indices_no_i;
+	copyColors(colorsRGB444_no_i, best_colorsRGB444);
+
+	//                   | col0 >= col1      col0 < col1
+	//------------------------------------------------------
+	// (dist & 1) = 1    | no need to swap | need to swap
+	//                   |-----------------+----------------
+	// (dist & 1) = 0    | need to swap    | no need to swap
+	//
+	// This can be done with an xor test.
+
+	int best_colorsRGB444_packed[2];
+	best_colorsRGB444_packed[0] = (best_colorsRGB444[0][R] << 8) + (best_colorsRGB444[0][G] << 4) + best_colorsRGB444[0][B];
+	best_colorsRGB444_packed[1] = (best_colorsRGB444[1][R] << 8) + (best_colorsRGB444[1][G] << 4) + best_colorsRGB444[1][B];
+	if( (best_colorsRGB444_packed[0] >= best_colorsRGB444_packed[1]) ^ ((best_distance & 1)==1) )
+	{
+		swapColors(best_colorsRGB444);
+
+		// Reshuffle pixel indices to to exchange C1 with C3, and C2 with C4
+		best_pixel_indices = (0x55555555 & best_pixel_indices) | (0xaaaaaaaa & (~best_pixel_indices));
+	}
+	
+	// Put the compress params into the compression block 
+
+	compressed1 = 0;
+
+	PUTBITSHIGH( compressed1, best_colorsRGB444[0][R], 4, 57);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][G], 4, 53);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][B], 4, 49);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][R], 4, 45);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][G], 4, 41);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][B], 4, 37);
+	PUTBITSHIGH( compressed1, (best_distance >> 1), 2, 33);
+
+	compressed2 = 0;
+	best_pixel_indices=indexConversion(best_pixel_indices);
+	PUTBITS( compressed2, best_pixel_indices, 32, 31);
+
+	return best_error;
+}
+
+// The below code should compress the block to 58 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+// The bit layout is thought to be:
+//
+//|63 62 61 60 59 58|57 56 55 54|53 52 51 50|49 48 47 46|45 44 43 42|41 40 39 38|37 36 35 34|33 32|
+//|-------empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|d2 d1|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// The distance d is three bits, d2 (MSB), d1 and d0 (LSB). d0 is not stored explicitly. 
+// Instead if the 12-bit word red0,green0,blue0 < red1,green1,blue1, d0 is assumed to be 0.
+// Else, it is assumed to be 1.
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockTHUMB58HFastest(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2) 
+{
+	double best_error = MAXIMUM_ERROR;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	double error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+	uint8 colors[2][3];
+	
+	// Calculate average color using the LBG-algorithm but discarding the intensity in the error function
+	computeColorLBGHalfIntensityFast(img, width, startx, starty, colors);
+	compressColor(R_BITS58H, G_BITS58H, B_BITS58H, colors, colorsRGB444_no_i);
+	sortColorsRGB444(colorsRGB444_no_i);
+
+	error_no_i = calculateErrorAndCompress58H(img, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);
+
+	best_error = error_no_i;	
+	best_distance = distance_no_i; 
+	best_pixel_indices = pixel_indices_no_i;
+	copyColors(colorsRGB444_no_i, best_colorsRGB444);
+
+	//                   | col0 >= col1      col0 < col1
+	//------------------------------------------------------
+	// (dist & 1) = 1    | no need to swap | need to swap
+	//                   |-----------------+----------------
+	// (dist & 1) = 0    | need to swap    | no need to swap
+	//
+	// This can be done with an xor test.
+
+	int best_colorsRGB444_packed[2];
+	best_colorsRGB444_packed[0] = (best_colorsRGB444[0][R] << 8) + (best_colorsRGB444[0][G] << 4) + best_colorsRGB444[0][B];
+	best_colorsRGB444_packed[1] = (best_colorsRGB444[1][R] << 8) + (best_colorsRGB444[1][G] << 4) + best_colorsRGB444[1][B];
+	if( (best_colorsRGB444_packed[0] >= best_colorsRGB444_packed[1]) ^ ((best_distance & 1)==1) )
+	{
+		swapColors(best_colorsRGB444);
+
+		// Reshuffle pixel indices to to exchange C1 with C3, and C2 with C4
+		best_pixel_indices = (0x55555555 & best_pixel_indices) | (0xaaaaaaaa & (~best_pixel_indices));
+	}
+
+	// Put the compress params into the compression block 
+
+	compressed1 = 0;
+
+	PUTBITSHIGH( compressed1, best_colorsRGB444[0][R], 4, 57);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][G], 4, 53);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][B], 4, 49);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][R], 4, 45);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][G], 4, 41);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][B], 4, 37);
+	PUTBITSHIGH( compressed1, (best_distance >> 1), 2, 33);
+	best_pixel_indices=indexConversion(best_pixel_indices);
+	compressed2 = 0;
+	PUTBITS( compressed2, best_pixel_indices, 32, 31);
+
+	return best_error;
+}
+
+//same as above, but with 1-bit alpha
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockTHUMB58HAlpha(uint8 *img, uint8* alphaimg, int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2) 
+{
+	double best_error = MAXIMUM_ERROR;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	double error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+	uint8 colors[2][3];
+	
+	// Calculate average color using the LBG-algorithm but discarding the intensity in the error function
+	computeColorLBGHalfIntensityFast(img, width, startx, starty, colors);
+	compressColor(R_BITS58H, G_BITS58H, B_BITS58H, colors, colorsRGB444_no_i);
+	sortColorsRGB444(colorsRGB444_no_i);
+
+	error_no_i = calculateErrorAndCompress58HAlpha(img, alphaimg,width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);
+
+	best_error = error_no_i;	
+	best_distance = distance_no_i; 
+	best_pixel_indices = pixel_indices_no_i;
+	copyColors(colorsRGB444_no_i, best_colorsRGB444);
+
+	//                   | col0 >= col1      col0 < col1
+	//------------------------------------------------------
+	// (dist & 1) = 1    | no need to swap | need to swap
+	//                   |-----------------+----------------
+	// (dist & 1) = 0    | need to swap    | no need to swap
+	//
+	// This can be done with an xor test.
+
+	int best_colorsRGB444_packed[2];
+	best_colorsRGB444_packed[0] = (best_colorsRGB444[0][R] << 8) + (best_colorsRGB444[0][G] << 4) + best_colorsRGB444[0][B];
+	best_colorsRGB444_packed[1] = (best_colorsRGB444[1][R] << 8) + (best_colorsRGB444[1][G] << 4) + best_colorsRGB444[1][B];
+	if( (best_colorsRGB444_packed[0] >= best_colorsRGB444_packed[1]) ^ ((best_distance & 1)==1) )
+	{
+		swapColors(best_colorsRGB444);
+
+		// Reshuffle pixel indices to to exchange C1 with C3, and C2 with C4
+		best_pixel_indices = (0x55555555 & best_pixel_indices) | (0xaaaaaaaa & (~best_pixel_indices));
+	}
+
+	// Put the compress params into the compression block 
+
+	compressed1 = 0;
+
+	PUTBITSHIGH( compressed1, best_colorsRGB444[0][R], 4, 57);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][G], 4, 53);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][B], 4, 49);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][R], 4, 45);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][G], 4, 41);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][B], 4, 37);
+	PUTBITSHIGH( compressed1, (best_distance >> 1), 2, 33);
+	best_pixel_indices=indexConversion(best_pixel_indices);
+	compressed2 = 0;
+	PUTBITS( compressed2, best_pixel_indices, 32, 31);
+
+	return best_error;
+}
+
+// The below code should compress the block to 58 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+// The bit layout is thought to be:
+//
+//|63 62 61 60 59 58|57 56 55 54|53 52 51 50|49 48 47 46|45 44 43 42|41 40 39 38|37 36 35 34|33 32|
+//|-------empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|d2 d1|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// The distance d is three bits, d2 (MSB), d1 and d0 (LSB). d0 is not stored explicitly. 
+// Instead if the 12-bit word red0,green0,blue0 < red1,green1,blue1, d0 is assumed to be 0.
+// Else, it is assumed to be 1.
+//
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockTHUMB58HFast(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2) 
+{
+	double best_error = MAXIMUM_ERROR;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	double error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+
+	double error_half_i;
+	uint8 colorsRGB444_half_i[2][3];
+	unsigned int pixel_indices_half_i;
+	uint8 distance_half_i;
+
+	double error;
+	uint8 colorsRGB444[2][3];
+	unsigned int pixel_indices;
+	uint8 distance;
+
+	uint8 colors[2][3];
+	
+	// Calculate average color using the LBG-algorithm but discarding the intensity in the error function
+	computeColorLBGNotIntensity(img, width, startx, starty, colors);
+	compressColor(R_BITS58H, G_BITS58H, B_BITS58H, colors, colorsRGB444_no_i);
+	sortColorsRGB444(colorsRGB444_no_i);
+	error_no_i = calculateErrorAndCompress58H(img, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);
+
+	// Calculate average color using the LBG-algorithm but halfing the influence of the intensity in the error function
+	computeColorLBGNotIntensity(img, width, startx, starty, colors);
+	compressColor(R_BITS58H, G_BITS58H, B_BITS58H, colors, colorsRGB444_half_i);
+	sortColorsRGB444(colorsRGB444_half_i);
+	error_half_i = calculateErrorAndCompress58H(img, width, startx, starty, colorsRGB444_half_i, distance_half_i, pixel_indices_half_i);
+
+	// Calculate average color using the LBG-algorithm
+	computeColorLBG(img, width, startx, starty, colors);
+	compressColor(R_BITS58H, G_BITS58H, B_BITS58H, colors, colorsRGB444);
+	sortColorsRGB444(colorsRGB444);
+	error = calculateErrorAndCompress58H(img, width, startx, starty, colorsRGB444, distance, pixel_indices);
+
+	best_error = error_no_i;	
+	best_distance = distance_no_i; 
+	best_pixel_indices = pixel_indices_no_i;
+	copyColors(colorsRGB444_no_i, best_colorsRGB444);
+
+	if(error_half_i < best_error)
+	{
+		best_error = error_half_i;
+		best_distance = distance_half_i;
+		best_pixel_indices = pixel_indices_half_i;
+		copyColors(colorsRGB444_half_i, best_colorsRGB444);
+	}
+
+	if(error < best_error)
+	{
+		best_error = error;	
+		best_distance = distance; 
+		best_pixel_indices = pixel_indices;
+		copyColors(colorsRGB444, best_colorsRGB444);
+	}
+
+	//                   | col0 >= col1      col0 < col1
+	//------------------------------------------------------
+	// (dist & 1) = 1    | no need to swap | need to swap
+	//                   |-----------------+----------------
+	// (dist & 1) = 0    | need to swap    | no need to swap
+	//
+	// This can be done with an xor test.
+
+	int best_colorsRGB444_packed[2];
+	best_colorsRGB444_packed[0] = (best_colorsRGB444[0][R] << 8) + (best_colorsRGB444[0][G] << 4) + best_colorsRGB444[0][B];
+	best_colorsRGB444_packed[1] = (best_colorsRGB444[1][R] << 8) + (best_colorsRGB444[1][G] << 4) + best_colorsRGB444[1][B];
+	if( (best_colorsRGB444_packed[0] >= best_colorsRGB444_packed[1]) ^ ((best_distance & 1)==1) )
+	{
+		swapColors(best_colorsRGB444);
+
+		// Reshuffle pixel indices to to exchange C1 with C3, and C2 with C4
+		best_pixel_indices = (0x55555555 & best_pixel_indices) | (0xaaaaaaaa & (~best_pixel_indices));
+	}
+
+	// Put the compress params into the compression block 
+	compressed1 = 0;
+
+	PUTBITSHIGH( compressed1, best_colorsRGB444[0][R], 4, 57);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][G], 4, 53);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][B], 4, 49);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][R], 4, 45);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][G], 4, 41);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][B], 4, 37);
+	PUTBITSHIGH( compressed1, (best_distance >> 1), 2, 33);
+	best_pixel_indices=indexConversion(best_pixel_indices);
+	compressed2 = 0;
+	PUTBITS( compressed2, best_pixel_indices, 32, 31);
+
+	return best_error;
+}
+
+// Compress block testing both individual and differential mode.
+// Perceptual error metric.
+// Combined quantization for colors.
+// Both flipped and unflipped tested.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockDiffFlipCombinedPerceptual(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3], diff[3];
+	int min_error=255*255*8*3;
+	unsigned int best_table_indices1=0, best_table_indices2=0;
+	unsigned int best_table1=0, best_table2=0;
+	int diffbit;
+
+	int norm_err=0;
+	int flip_err=0;
+
+	// First try normal blocks 2x4:
+
+	computeAverageColor2x4noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor2x4noQuantFloat(img,width,height,startx+2,starty,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+
+	float eps;
+
+	uint8 dummy[3];
+
+	quantize555ColorCombinedPerceptual(avg_color_float1, enc_color1, dummy);
+	quantize555ColorCombinedPerceptual(avg_color_float2, enc_color2, dummy);
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( (diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3) )
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+
+		//     ETC1_RGB8_OES:
+		// 
+		//     a) bit layout in bits 63 through 32 if diffbit = 0
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		//     
+		//     b) bit layout in bits 63 through 32 if diffbit = 1
+		// 
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+		//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+		// 
+		//     c) bit layout in bits 31 through 0 (in both cases)
+		// 
+		//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+		//      --------------------------------------------------------------------------------------------------
+		//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+		//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+		//      --------------------------------------------------------------------------------------------------      
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_norm, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_norm, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_norm, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		norm_err = 0;
+
+		// left part of block
+		norm_err = tryalltables_3bittable2x4percep(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4percep(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+	else
+	{
+		diffbit = 0;
+		// The difference is bigger than what fits in 555 plus delta-333, so we will have
+		// to deal with 444 444.
+
+		eps = (float) 0.0001;
+
+		quantize444ColorCombinedPerceptual(avg_color_float1, enc_color1, dummy);
+		quantize444ColorCombinedPerceptual(avg_color_float2, enc_color2, dummy);
+
+		avg_color_quant1[0] = enc_color1[0] << 4 | enc_color1[0]; 
+		avg_color_quant1[1] = enc_color1[1] << 4 | enc_color1[1]; 
+		avg_color_quant1[2] = enc_color1[2] << 4 | enc_color1[2];
+		avg_color_quant2[0] = enc_color2[0] << 4 | enc_color2[0]; 
+		avg_color_quant2[1] = enc_color2[1] << 4 | enc_color2[1]; 
+		avg_color_quant2[2] = enc_color2[2] << 4 | enc_color2[2];
+
+		// Pack bits into the first word. 
+
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+
+		compressed1_norm = 0;
+		PUTBITSHIGH( compressed1_norm, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[0], 4, 63);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[1], 4, 55);
+ 		PUTBITSHIGH( compressed1_norm, enc_color1[2], 4, 47);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[0], 4, 59);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[1], 4, 51);
+ 		PUTBITSHIGH( compressed1_norm, enc_color2[2], 4, 43);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+		
+		// left part of block
+		norm_err = tryalltables_3bittable2x4percep(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+		// right part of block
+		norm_err += tryalltables_3bittable2x4percep(img,width,height,startx+2,starty,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_norm, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_norm, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_norm,           0,   1, 32);
+
+		compressed2_norm = 0;
+		PUTBITS( compressed2_norm, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2_norm, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2_norm, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+
+	// Now try flipped blocks 4x2:
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty,avg_color_float1);
+	computeAverageColor4x2noQuantFloat(img,width,height,startx,starty+2,avg_color_float2);
+
+	// First test if avg_color1 is similar enough to avg_color2 so that
+	// we can use differential coding of colors. 
+	quantize555ColorCombinedPerceptual(avg_color_float1, enc_color1, dummy);
+	quantize555ColorCombinedPerceptual(avg_color_float2, enc_color2, dummy);
+
+	diff[0] = enc_color2[0]-enc_color1[0];	
+	diff[1] = enc_color2[1]-enc_color1[1];	
+	diff[2] = enc_color2[2]-enc_color1[2];
+
+	if( (diff[0] >= -4) && (diff[0] <= 3) && (diff[1] >= -4) && (diff[1] <= 3) && (diff[2] >= -4) && (diff[2] <= 3) )
+	{
+		diffbit = 1;
+
+		// The difference to be coded:
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 5, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 5, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 5, 47);
+ 		PUTBITSHIGH( compressed1_flip, diff[0],       3, 58);
+ 		PUTBITSHIGH( compressed1_flip, diff[1],       3, 50);
+ 		PUTBITSHIGH( compressed1_flip, diff[2],       3, 42);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2percep(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2percep(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+	else
+	{
+		diffbit = 0;
+		// The difference is bigger than what fits in 555 plus delta-333, so we will have
+		// to deal with 444 444.
+		eps = (float) 0.0001;
+
+		quantize444ColorCombinedPerceptual(avg_color_float1, enc_color1, dummy);
+		quantize444ColorCombinedPerceptual(avg_color_float2, enc_color2, dummy);
+
+		avg_color_quant1[0] = enc_color1[0] << 4 | enc_color1[0]; 
+		avg_color_quant1[1] = enc_color1[1] << 4 | enc_color1[1]; 
+		avg_color_quant1[2] = enc_color1[2] << 4 | enc_color1[2];
+		avg_color_quant2[0] = enc_color2[0] << 4 | enc_color2[0]; 
+		avg_color_quant2[1] = enc_color2[1] << 4 | enc_color2[1]; 
+		avg_color_quant2[2] = enc_color2[2] << 4 | enc_color2[2];
+
+		//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+		//      ---------------------------------------------------------------------------------------------------
+		//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+		//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+		//      ---------------------------------------------------------------------------------------------------
+
+		// Pack bits into the first word. 
+		compressed1_flip = 0;
+		PUTBITSHIGH( compressed1_flip, diffbit,       1, 33);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[0], 4, 63);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[1], 4, 55);
+ 		PUTBITSHIGH( compressed1_flip, enc_color1[2], 4, 47);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[0], 4, 59);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[1], 4, 51);
+ 		PUTBITSHIGH( compressed1_flip, enc_color2[2], 4, 43);
+
+		unsigned int best_pixel_indices1_MSB;
+		unsigned int best_pixel_indices1_LSB;
+		unsigned int best_pixel_indices2_MSB;
+		unsigned int best_pixel_indices2_LSB;
+
+		// upper part of block
+		flip_err = tryalltables_3bittable4x2percep(img,width,height,startx,starty,avg_color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+		// lower part of block
+		flip_err += tryalltables_3bittable4x2percep(img,width,height,startx,starty+2,avg_color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+ 		PUTBITSHIGH( compressed1_flip, best_table1,   3, 39);
+ 		PUTBITSHIGH( compressed1_flip, best_table2,   3, 36);
+ 		PUTBITSHIGH( compressed1_flip,           1,   1, 32);
+
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);
+		
+		compressed2_flip = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+
+	// Now lets see which is the best table to use. Only 8 tables are possible. 
+	if(norm_err <= flip_err)
+	{
+		compressed1 = compressed1_norm | 0;
+		compressed2 = compressed2_norm;
+	}
+	else
+	{
+		compressed1 = compressed1_flip | 1;
+		compressed2 = compressed2_flip;
+	}
+}
+
+// Calculate the error of a block
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calcBlockErrorRGB(uint8 *img, uint8 *imgdec, int width, int height, int startx, int starty)
+{
+	int xx,yy;
+	double err;
+
+	err = 0;
+
+	for(xx = startx; xx< startx+4; xx++)
+	{
+		for(yy = starty; yy<starty+4; yy++)
+		{
+ 			err += SQUARE(1.0*RED(img,width,xx,yy)  - 1.0*RED(imgdec, width, xx,yy));
+ 			err += SQUARE(1.0*GREEN(img,width,xx,yy)- 1.0*GREEN(imgdec, width, xx,yy));
+ 			err += SQUARE(1.0*BLUE(img,width,xx,yy) - 1.0*BLUE(imgdec, width, xx,yy));
+		}
+	}
+
+	return err;
+}
+
+// Calculate the perceptually weighted error of a block
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calcBlockPerceptualErrorRGB(uint8 *img, uint8 *imgdec, int width, int height, int startx, int starty)
+{
+	int xx,yy;
+	double err;
+
+	err = 0;
+
+	for(xx = startx; xx< startx+4; xx++)
+	{
+		for(yy = starty; yy<starty+4; yy++)
+		{
+ 			err += PERCEPTUAL_WEIGHT_R_SQUARED*SQUARE(1.0*RED(img,width,xx,yy)  - 1.0*RED(imgdec, width, xx,yy));
+ 			err += PERCEPTUAL_WEIGHT_G_SQUARED*SQUARE(1.0*GREEN(img,width,xx,yy)- 1.0*GREEN(imgdec, width, xx,yy));
+ 			err += PERCEPTUAL_WEIGHT_B_SQUARED*SQUARE(1.0*BLUE(img,width,xx,yy) - 1.0*BLUE(imgdec, width, xx,yy));
+		}
+	}
+
+	return err;
+}
+
+// Compress an ETC1 block (or the individual and differential modes of an ETC2 block)
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockDiffFlipFast(uint8 *img, uint8 *imgdec,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int average_block1;
+	unsigned int average_block2;
+	double error_average;
+
+	unsigned int combined_block1;
+	unsigned int combined_block2;
+	double error_combined;
+
+	double best_error;
+
+	// First quantize the average color to the nearest neighbor.
+	compressBlockDiffFlipAverage(img, width, height, startx, starty, average_block1, average_block2);
+	decompressBlockDiffFlip(average_block1, average_block2, imgdec, width, height, startx, starty);
+	error_average = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+
+	// Then quantize the average color taking into consideration that intensity can change
+	compressBlockDiffFlipCombined(img, width, height, startx, starty, combined_block1, combined_block2);
+	decompressBlockDiffFlip(combined_block1, combined_block2, imgdec, width, height, startx, starty);
+	error_combined = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+
+	if(error_combined < error_average)
+	{
+		compressed1 = combined_block1;
+		compressed2 = combined_block2;
+		best_error = error_combined;
+	}
+	else
+	{
+		compressed1 = average_block1;
+		compressed2 = average_block2;
+		best_error = error_average;
+	}
+	return best_error;
+}
+
+// Compress an ETC1 block (or the individual and differential modes of an ETC2 block)
+// Uses perceptual error metric.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockDiffFlipFastPerceptual(uint8 *img, uint8 *imgdec,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int average_block1;
+	unsigned int average_block2;
+	double error_average;
+
+	unsigned int combined_block1;
+	unsigned int combined_block2;
+	double error_combined;
+
+	// First quantize the average color to the nearest neighbor.
+	compressBlockDiffFlipAveragePerceptual(img, width, height, startx, starty, average_block1, average_block2);
+	decompressBlockDiffFlip(average_block1, average_block2, imgdec, width, height, startx, starty);
+	error_average = calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+
+	// Then quantize the average color taking into consideration that intensity can change 
+	compressBlockDiffFlipCombinedPerceptual(img, width, height, startx, starty, combined_block1, combined_block2);
+	decompressBlockDiffFlip(combined_block1, combined_block2, imgdec, width, height, startx, starty);
+	error_combined = calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+
+	if(error_combined < error_average)
+	{
+		compressed1 = combined_block1;
+		compressed2 = combined_block2;
+	}
+	else
+	{
+		compressed1 = average_block1;
+		compressed2 = average_block2;
+	}
+}
+
+// Compresses the differential mode of an ETC2 block with punchthrough alpha
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int compressBlockDifferentialWithAlpha(bool isTransparent, uint8* img, uint8* alphaimg, uint8* imgdec, int width, int height, int startx, int starty, unsigned int &etc1_word1, unsigned int &etc1_word2) 
+{
+	unsigned int compressed1_norm, compressed2_norm;
+	unsigned int compressed1_flip, compressed2_flip;
+	unsigned int compressed1_temp, compressed2_temp;
+	uint8 avg_color_quant1[3], avg_color_quant2[3];
+
+	float avg_color_float1[3],avg_color_float2[3];
+	int enc_color1[3], enc_color2[3], diff[3];
+	int min_error=255*255*8*3;
+	
+	int norm_err=0;
+	int flip_err=0;
+	int temp_err=0;
+	for(int flipbit=0; flipbit<2; flipbit++) 
+	{
+		//compute average color for each half.
+		for(int c=0; c<3; c++) 
+		{
+			avg_color_float1[c]=0;
+			avg_color_float2[c]=0;
+			float sum1=0;
+			float sum2=0;
+			for(int x=0; x<4; x++) 
+			{
+				for(int y=0; y<4; y++) 
+				{
+					float fac=1;
+					int index = x+startx+(y+starty)*width;
+					//transparent pixels are only barely figured into the average. This ensures that they DO matter if we have only
+					//transparent pixels in one half of the block, and not otherwise. A bit ugly perhaps.
+					if(alphaimg[index]<128)
+						fac=0.0001f;
+					float col = fac*img[index*3+c];
+					if( (flipbit==0&&x<2) || (flipbit==1&&y<2) ) 
+					{
+						sum1+=fac;
+						avg_color_float1[c]+=col;
+					}
+					else 
+					{
+						sum2+=fac;
+						avg_color_float2[c]+=col;
+					}
+				}
+			}
+			avg_color_float1[c]/=sum1;
+			avg_color_float2[c]/=sum2;
+		}
+		uint8 dummy[3];
+		quantize555ColorCombined(avg_color_float1, enc_color1, dummy);
+		quantize555ColorCombined(avg_color_float2, enc_color2, dummy);
+
+		diff[0] = enc_color2[0]-enc_color1[0];	
+		diff[1] = enc_color2[1]-enc_color1[1];	
+		diff[2] = enc_color2[2]-enc_color1[2];
+
+		//make sure diff is small enough for diff-coding
+		for(int c=0; c<3; c++) 
+		{
+				if(diff[c]<-4)
+					diff[c]=-4;
+				if(diff[c]>3)
+					diff[c]=3;
+				enc_color2[c]=enc_color1[c]+diff[c];
+		}
+
+		avg_color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		avg_color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+		avg_color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+		avg_color_quant2[0] = enc_color2[0] << 3 | (enc_color2[0] >> 2);
+		avg_color_quant2[1] = enc_color2[1] << 3 | (enc_color2[1] >> 2);
+		avg_color_quant2[2] = enc_color2[2] << 3 | (enc_color2[2] >> 2);
+
+		// Pack bits into the first word. 
+		// see regular compressblockdiffflipfast for details
+
+		compressed1_temp = 0;
+		PUTBITSHIGH( compressed1_temp, !isTransparent,       1, 33);
+		PUTBITSHIGH( compressed1_temp, enc_color1[0], 5, 63);
+		PUTBITSHIGH( compressed1_temp, enc_color1[1], 5, 55);
+		PUTBITSHIGH( compressed1_temp, enc_color1[2], 5, 47);
+		PUTBITSHIGH( compressed1_temp, diff[0],       3, 58);
+		PUTBITSHIGH( compressed1_temp, diff[1],       3, 50);
+		PUTBITSHIGH( compressed1_temp, diff[2],       3, 42);
+
+		temp_err = 0;
+		
+		int besterror[2];
+		besterror[0]=255*255*3*16;
+		besterror[1]=255*255*3*16;
+		int besttable[2];
+		int best_indices_LSB[16];
+		int	best_indices_MSB[16];
+		//for each table, we're going to compute the indices required to get minimum error in each half.
+		//then we'll check if this was the best table for either half, and set besterror/besttable accordingly.
+		for(int table=0; table<8; table++) 
+		{
+			int taberror[2];//count will be sort of an index of each pixel within a half, determining where the index will be placed in the bitstream.
+			
+			int pixel_indices_LSB[16],pixel_indices_MSB[16];
+			
+			for(int i=0; i<2; i++) 
+			{
+				taberror[i]=0;
+			}
+			for(int x=0; x<4; x++) 
+			{
+				for(int y=0; y<4; y++) 
+				{
+					int index = x+startx+(y+starty)*width;
+					uint8 basecol[3];
+					bool transparentPixel=alphaimg[index]<128;
+					//determine which half of the block this pixel is in, based on the flipbit.
+					int half=0;
+					if( (flipbit==0&&x<2) || (flipbit&&y<2) ) 
+					{
+						basecol[0]=avg_color_quant1[0];
+						basecol[1]=avg_color_quant1[1];
+						basecol[2]=avg_color_quant1[2];
+					}
+					else 
+					{
+						half=1;
+						basecol[0]=avg_color_quant2[0];
+						basecol[1]=avg_color_quant2[1];
+						basecol[2]=avg_color_quant2[2];
+					}
+					int besterri=255*255*3*2;
+					int besti=0;
+					int erri;
+					for(int i=0; i<4; i++) 
+					{
+						if(i==1&&isTransparent)
+							continue;
+						erri=0;
+						for(int c=0; c<3; c++) 
+						{
+							int col=CLAMP(0,((int)basecol[c])+compressParams[table*2][i],255);
+							if(i==2&&isTransparent) 
+							{
+								 col=(int)basecol[c];
+							}
+							int errcol=col-((int)(img[index*3+c]));
+							erri=erri+(errcol*errcol);
+						}
+						if(erri<besterri) 
+						{
+							besterri=erri;
+							besti=i;
+						}
+					}
+					if(transparentPixel) 
+					{
+						besterri=0;
+						besti=1;
+					}
+					//the best index for this pixel using this table for its half is known.
+					//add its error to error for this table and half.
+					taberror[half]+=besterri;
+					//store the index! we might toss it later though if this was a bad table.
+
+					int pixel_index = scramble[besti];
+
+					pixel_indices_MSB[x*4+y]=(pixel_index >> 1);
+					pixel_indices_LSB[x*4+y]=(pixel_index & 1);
+				}
+			}
+			for(int half=0; half<2; half++) 
+			{
+				if(taberror[half]<besterror[half]) 
+				{
+					besterror[half]=taberror[half];
+					besttable[half]=table;
+					for(int i=0; i<16; i++) 
+					{
+						int thishalf=0;
+						int y=i%4;
+						int x=i/4;
+						if( !(flipbit==0&&x<2) && !(flipbit&&y<2) )
+							thishalf=1;
+						if(half!=thishalf) //this pixel is not in given half, don't update best index!
+							continue;
+						best_indices_MSB[i]=pixel_indices_MSB[i];
+						best_indices_LSB[i]=pixel_indices_LSB[i];
+					}
+				}
+			}
+		}
+		PUTBITSHIGH( compressed1_temp, besttable[0],   3, 39);
+		PUTBITSHIGH( compressed1_temp, besttable[1],   3, 36);
+		PUTBITSHIGH( compressed1_temp,      0,   1, 32);
+
+		compressed2_temp = 0;
+		for(int i=0; i<16; i++) 
+		{
+			PUTBITS( compressed2_temp, (best_indices_MSB[i]  ), 1, 16+i);
+			PUTBITS( compressed2_temp, (best_indices_LSB[i]  ), 1, i);
+		}
+		
+		if(flipbit) 
+		{
+			flip_err=besterror[0]+besterror[1];
+			compressed1_flip=compressed1_temp;
+			compressed2_flip=compressed2_temp;
+		}
+		else 
+		{
+			norm_err=besterror[0]+besterror[1];
+			compressed1_norm=compressed1_temp;
+			compressed2_norm=compressed2_temp;
+		}
+	}
+	// Now to find out if flipping was a good idea or not
+
+	if(norm_err <= flip_err)
+	{
+		etc1_word1 = compressed1_norm | 0;
+		etc1_word2 = compressed2_norm;
+		return norm_err;
+	}
+	else
+	{
+		etc1_word1 = compressed1_flip | 1;
+		etc1_word2 = compressed2_flip;
+		return flip_err;
+	}
+}
+
+
+// Calculate RGBA error --- only count non-transparent pixels (alpha > 128)
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calcBlockErrorRGBA(uint8 *img, uint8 *imgdec, uint8* alpha, int width, int height, int startx, int starty)
+{
+	int xx,yy;
+	double err;
+
+	err = 0;
+
+	for(xx = startx; xx< startx+4; xx++)
+	{
+		for(yy = starty; yy<starty+4; yy++)
+		{
+			//only count non-transparent pixels.
+			if(alpha[yy*width+xx]>128)	
+			{
+ 				err += SQUARE(1.0*RED(img,width,xx,yy)  - 1.0*RED(imgdec, width, xx,yy));
+ 				err += SQUARE(1.0*GREEN(img,width,xx,yy)- 1.0*GREEN(imgdec, width, xx,yy));
+ 				err += SQUARE(1.0*BLUE(img,width,xx,yy) - 1.0*BLUE(imgdec, width, xx,yy));
+			}
+		}
+	}
+	return err;
+}
+
+//calculates the error for a block using the given colors, and the paremeters required to obtain the error. This version uses 1-bit punch-through alpha.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculateError59TAlpha(uint8* srcimg, uint8* alpha,int width, int startx, int starty, uint8 (colorsRGB444)[2][3], uint8 &distance, unsigned int &pixel_indices) 
+{
+
+	double block_error = 0, 
+		   best_block_error = MAXIMUM_ERROR, 
+		   pixel_error, 
+		   best_pixel_error;
+	int diff[3];
+	uint8 best_sw;
+	unsigned int pixel_colors;
+	uint8 colors[2][3];
+	uint8 possible_colors[4][3];
+
+	// First use the colors as they are, then swap them
+	for (uint8 sw = 0; sw <2; ++sw) 
+	{ 
+		if (sw == 1) 
+		{
+			swapColors(colorsRGB444);
+		}
+		decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+
+		// Test all distances
+		for (uint8 d = 0; d < BINPOW(TABLE_BITS_59T); ++d) 
+		{
+			calculatePaintColors59T(d,PATTERN_T, colors, possible_colors);
+			
+			block_error = 0;	
+			pixel_colors = 0;
+
+			// Loop block
+			for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+			{
+				for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+				{
+					best_pixel_error = MAXIMUM_ERROR;
+					pixel_colors <<=2; // Make room for next value
+
+					// Loop possible block colors
+					if(alpha[x+startx+(y+starty)*width]==0) 
+					{
+						best_pixel_error=0;
+						pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+						pixel_colors |= 2; //insert the index for this pixel, two meaning transparent.
+					}
+					else 
+					{
+						for (uint8 c = 0; c < 4; ++c) 
+						{
+							
+							if(c==2) 
+								continue; //don't use this, because we don't have alpha here and index 2 means transparent.
+							diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+							diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+							diff[B] = srcimg[3*((starty+y)*width+startx+x)+B] - CLAMP(0,possible_colors[c][B],255);
+
+							pixel_error =	weight[R]*SQUARE(diff[R]) +
+											weight[G]*SQUARE(diff[G]) +
+											weight[B]*SQUARE(diff[B]);
+
+							// Choose best error
+							if (pixel_error < best_pixel_error) 
+							{
+								best_pixel_error = pixel_error;
+								pixel_colors ^= (pixel_colors & 3); // Reset the two first bits
+								pixel_colors |= c; //insert the index for this pixel
+							} 
+						}
+					}
+					block_error += best_pixel_error;
+				}
+			}
+			if (block_error < best_block_error) 
+			{
+				best_block_error = block_error;
+				distance = d;
+				pixel_indices = pixel_colors;
+				best_sw = sw;
+			}
+		}
+		
+		if (sw == 1 && best_sw == 0) 
+		{
+			swapColors(colorsRGB444);
+		}
+		decompressColor(R_BITS59T, G_BITS59T, B_BITS59T, colorsRGB444, colors);
+	}
+	return best_block_error;
+}
+
+// same as fastest t-mode compressor above, but here one of the colors (the central one in the T) is used to also signal that the pixel is transparent.
+// the only difference is that calculateError has been swapped out to one that considers alpha.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double compressBlockTHUMB59TAlpha(uint8 *img, uint8* alpha, int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2) 
+{
+	double best_error = MAXIMUM_ERROR;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	double error_no_i;
+	uint8 colorsRGB444_no_i[2][3];
+	unsigned int pixel_indices_no_i;
+	uint8 distance_no_i;
+
+	uint8 colors[2][3];
+
+	// Calculate average color using the LBG-algorithm
+  computeColorLBGHalfIntensityFast(img,width,startx,starty, colors);
+	compressColor(R_BITS59T, G_BITS59T, B_BITS59T, colors, colorsRGB444_no_i);
+
+	// Determine the parameters for the lowest error
+	error_no_i = calculateError59TAlpha(img, alpha, width, startx, starty, colorsRGB444_no_i, distance_no_i, pixel_indices_no_i);			
+
+	best_error = error_no_i;
+	best_distance = distance_no_i;
+	best_pixel_indices = pixel_indices_no_i;
+	copyColors(colorsRGB444_no_i, best_colorsRGB444);
+
+	// Put the compress params into the compression block 
+	packBlock59T(best_colorsRGB444, best_distance, best_pixel_indices, compressed1, compressed2);
+
+	return best_error;
+}
+
+// Put bits in order for the format.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void stuff59bitsDiffFalse(unsigned int thumbT59_word1, unsigned int thumbT59_word2, unsigned int &thumbT_word1, unsigned int &thumbT_word2)
+{
+	// Put bits in twotimer configuration for 59 (red overflows)
+	// 
+	// Go from this bit layout:
+	//
+	//     |63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+	//     |----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+	//
+	//     |31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+	//     |----------------------------------------index bits---------------------------------------------|
+	//
+	//
+	//  To this:
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     |// // //|R0a  |//|R0b  |G0         |B0         |R1         |G1         |B1          |da  |df|db|
+	//      -----------------------------------------------------------------------------------------------
+	//
+	//     |31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+	//     |----------------------------------------index bits---------------------------------------------|
+	//
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 
+	//      -----------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |df|fp|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bt|bt|
+	//      ------------------------------------------------------------------------------------------------
+
+	uint8 R0a;
+	uint8 bit, a, b, c, d, bits;
+
+	R0a = GETBITSHIGH( thumbT59_word1, 2, 58);
+
+	// Fix middle part
+	thumbT_word1 = thumbT59_word1 << 1;
+	// Fix R0a (top two bits of R0)
+	PUTBITSHIGH( thumbT_word1, R0a,  2, 60);
+	// Fix db (lowest bit of d)
+	PUTBITSHIGH( thumbT_word1, thumbT59_word1,  1, 32);
+	// 
+	// Make sure that red overflows:
+	a = GETBITSHIGH( thumbT_word1, 1, 60);
+	b = GETBITSHIGH( thumbT_word1, 1, 59);
+	c = GETBITSHIGH( thumbT_word1, 1, 57);
+	d = GETBITSHIGH( thumbT_word1, 1, 56);
+	// The following bit abcd bit sequences should be padded with ones: 0111, 1010, 1011, 1101, 1110, 1111
+	// The following logical expression checks for the presence of any of those:
+	bit = (a & c) | (!a & b & c & d) | (a & b & !c & d);
+	bits = 0xf*bit;
+	PUTBITSHIGH( thumbT_word1, bits,  3, 63);
+	PUTBITSHIGH( thumbT_word1, !bit,  1, 58);
+
+	// Set diffbit
+	PUTBITSHIGH( thumbT_word1, 0,  1, 33);
+	thumbT_word2 = thumbT59_word2;
+}
+
+// Tests if there is at least one pixel in the image which would get alpha = 0 in punchtrough mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool hasAlpha(uint8* alphaimg, int ix, int iy, int width) 
+{
+	for(int x=ix; x<ix+4; x++) 
+	{
+		for(int y=iy; y<iy+4; y++) 
+		{
+			int index = x+y*width;
+			if(alphaimg[index]<128) 
+			{
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+// Compress a block with ETC2 RGB
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockETC2Fast(uint8 *img, uint8* alphaimg, uint8 *imgdec,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int etc1_word1;
+	unsigned int etc1_word2;
+	double error_etc1;
+
+	unsigned int planar57_word1;
+	unsigned int planar57_word2;
+	unsigned int planar_word1;
+	unsigned int planar_word2;
+	double error_planar;
+
+	unsigned int thumbT59_word1;
+	unsigned int thumbT59_word2;
+	unsigned int thumbT_word1;
+	unsigned int thumbT_word2;
+	double error_thumbT;
+	
+	unsigned int thumbH58_word1;
+	unsigned int thumbH58_word2;
+	unsigned int thumbH_word1;
+	unsigned int thumbH_word2;
+	double error_thumbH;
+
+	double error_best;
+	signed char best_char;
+	int best_mode;
+	
+	if(format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+	{
+		/*                if we have one-bit alpha, we never use the individual mode,
+		                  instead that bit flags that one of our four offsets will instead
+						          mean transparent (with 0 offset for color channels) */
+
+		/*                the regular ETC individual mode is disabled, but the old T, H and planar modes
+						          are kept unchanged and may be used for blocks without transparency.
+						          Introduced are old ETC with only differential coding,
+						          ETC differential but with 3 offsets and transparent,
+						          and T-mode with 3 colors plus transparent.*/
+
+		/*                in a fairly hackish manner, error_etc1, etc1_word1 and etc1_word2 will
+		                  represent the best out of the three introduced modes, to be compared
+						          with the three kept modes in the old code*/
+
+		unsigned int tempword1, tempword2;
+		double temperror;
+		//try regular differential transparent mode
+
+		int testerr= compressBlockDifferentialWithAlpha(true,img,alphaimg, imgdec,width,height,startx,starty,etc1_word1,etc1_word2);
+
+		uint8* alphadec = new uint8[width*height];
+		decompressBlockDifferentialWithAlpha(etc1_word1, etc1_word2, imgdec, alphadec,width, height, startx, starty);
+		error_etc1 = calcBlockErrorRGBA(img, imgdec, alphaimg,width, height, startx, starty);
+		if(error_etc1!=testerr) 
+		{
+			printf("testerr: %d, etcerr: %lf\n",testerr,error_etc1);
+		}
+		//try T-mode with transparencies
+		//for now, skip this...
+		compressBlockTHUMB59TAlpha(img,alphaimg,width,height,startx,starty,tempword1,tempword2);
+		decompressBlockTHUMB59TAlpha(tempword1,tempword2,imgdec, alphadec, width,height,startx,starty);
+		temperror=calcBlockErrorRGBA(img, imgdec, alphaimg, width, height, startx, starty);
+		if(temperror<error_etc1) 
+		{
+			error_etc1=temperror;
+			stuff59bitsDiffFalse(tempword1,tempword2,etc1_word1,etc1_word2);
+		}
+		compressBlockTHUMB58HAlpha(img,alphaimg,width,height,startx,starty,tempword1,tempword2);
+		decompressBlockTHUMB58HAlpha(tempword1,tempword2,imgdec, alphadec, width,height,startx,starty);
+		temperror=calcBlockErrorRGBA(img, imgdec, alphaimg, width, height, startx, starty);
+		if(temperror<error_etc1) 
+		{
+			error_etc1=temperror;
+			stuff58bitsDiffFalse(tempword1,tempword2,etc1_word1,etc1_word2);
+		}
+		//if we have transparency in this pixel, we know that one of these two modes was best..
+		if(hasAlpha(alphaimg,startx,starty,width)) 
+		{
+			compressed1=etc1_word1;
+			compressed2=etc1_word2;
+			delete alphadec;
+			return;
+		}
+		//error_etc1=255*255*1000;
+		//otherwise, they MIGHT have been the best, although that's unlikely.. anyway, try old differential mode now
+		
+		compressBlockDifferentialWithAlpha(false,img,alphaimg,imgdec,width,height,startx,starty,tempword1,tempword2);
+		decompressBlockDiffFlip(tempword1, tempword2, imgdec, width, height, startx, starty);
+		temperror = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+		decompressBlockDifferentialWithAlpha(tempword1,tempword2,imgdec,alphadec,width,height,startx,starty);
+		if(temperror<error_etc1) 
+		{
+			error_etc1=temperror;
+			etc1_word1=tempword1;
+			etc1_word2=tempword2;
+		}
+		delete alphadec;
+		//drop out of this if, and test old T, H and planar modes (we have already returned if there are transparent pixels in this block)
+	}
+	else 
+	{
+		//this includes individual mode, and therefore doesn't apply in case of punch-through alpha.
+		compressBlockDiffFlipFast(img, imgdec, width, height, startx, starty, etc1_word1, etc1_word2);
+		decompressBlockDiffFlip(etc1_word1, etc1_word2, imgdec, width, height, startx, starty);
+		error_etc1 = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+	}
+	//these modes apply regardless of whether we want punch-through alpha or not.
+	//error etc_1 and etc1_word1/etc1_word2 contain previous best candidate.
+	compressBlockPlanar57(img, width, height, startx, starty, planar57_word1, planar57_word2);
+	decompressBlockPlanar57(planar57_word1, planar57_word2, imgdec, width, height, startx, starty);
+	error_planar = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff57bits(planar57_word1, planar57_word2, planar_word1, planar_word2);
+
+	compressBlockTHUMB59TFastest(img,width, height, startx, starty, thumbT59_word1, thumbT59_word2);
+	decompressBlockTHUMB59T(thumbT59_word1, thumbT59_word2, imgdec, width, height, startx, starty);			
+	error_thumbT = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff59bits(thumbT59_word1, thumbT59_word2, thumbT_word1, thumbT_word2);
+
+	compressBlockTHUMB58HFastest(img,width,height,startx, starty, thumbH58_word1, thumbH58_word2);
+	decompressBlockTHUMB58H(thumbH58_word1, thumbH58_word2, imgdec, width, height, startx, starty);			
+	error_thumbH = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff58bits(thumbH58_word1, thumbH58_word2, thumbH_word1, thumbH_word2);
+
+	error_best = error_etc1;
+	compressed1 = etc1_word1;
+	compressed2 = etc1_word2;
+	best_char = '.';
+	best_mode = MODE_ETC1;
+
+	if(error_planar < error_best)
+	{
+		compressed1 = planar_word1;
+        compressed2 = planar_word2;
+		best_char = 'p';
+		error_best = error_planar;	
+		best_mode = MODE_PLANAR;
+	}
+	if(error_thumbT < error_best)
+	{
+		compressed1 = thumbT_word1;
+        compressed2 = thumbT_word2;
+		best_char = 'T';
+		error_best = error_thumbT;
+		best_mode = MODE_THUMB_T;
+	}
+	if(error_thumbH < error_best)
+	{
+		compressed1 = thumbH_word1;
+        compressed2 = thumbH_word2;
+		best_char = 'H';
+		error_best = error_thumbH;
+		best_mode = MODE_THUMB_H;
+	}
+	
+	switch(best_mode)
+	{
+		// Now see which mode won and compress that a little bit harder
+	case MODE_THUMB_T:
+		compressBlockTHUMB59TFast(img,width, height, startx, starty, thumbT59_word1, thumbT59_word2);
+		decompressBlockTHUMB59T(thumbT59_word1, thumbT59_word2, imgdec, width, height, startx, starty);			
+		error_thumbT = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+		stuff59bits(thumbT59_word1, thumbT59_word2, thumbT_word1, thumbT_word2);
+		if(error_thumbT < error_best)
+		{
+			compressed1 = thumbT_word1;
+			compressed2 = thumbT_word2;
+		}
+		break;
+	case MODE_THUMB_H:
+		compressBlockTHUMB58HFast(img,width,height,startx, starty, thumbH58_word1, thumbH58_word2);
+		decompressBlockTHUMB58H(thumbH58_word1, thumbH58_word2, imgdec, width, height, startx, starty);			
+		error_thumbH = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+		stuff58bits(thumbH58_word1, thumbH58_word2, thumbH_word1, thumbH_word2);
+		if(error_thumbH < error_best)
+		{
+			compressed1 = thumbH_word1;
+			compressed2 = thumbH_word2;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+// Compress an ETC2 RGB block using perceptual error metric
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockETC2FastPerceptual(uint8 *img, uint8 *imgdec,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int etc1_word1;
+	unsigned int etc1_word2;
+	double error_etc1;
+
+	unsigned int planar57_word1;
+	unsigned int planar57_word2;
+	unsigned int planar_word1;
+	unsigned int planar_word2;
+	double error_planar;
+
+	unsigned int thumbT59_word1;
+	unsigned int thumbT59_word2;
+	unsigned int thumbT_word1;
+	unsigned int thumbT_word2;
+	double error_thumbT;
+	
+	unsigned int thumbH58_word1;
+	unsigned int thumbH58_word2;
+	unsigned int thumbH_word1;
+	unsigned int thumbH_word2;
+	double error_thumbH;
+
+	double error_best;
+	signed char best_char;
+	int best_mode;
+
+	compressBlockDiffFlipFastPerceptual(img, imgdec, width, height, startx, starty, etc1_word1, etc1_word2);
+	decompressBlockDiffFlip(etc1_word1, etc1_word2, imgdec, width, height, startx, starty);
+	error_etc1 = 1000*calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+
+	compressBlockPlanar57(img, width, height, startx, starty, planar57_word1, planar57_word2);
+	decompressBlockPlanar57(planar57_word1, planar57_word2, imgdec, width, height, startx, starty);
+	error_planar = 1000*calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff57bits(planar57_word1, planar57_word2, planar_word1, planar_word2);
+
+	compressBlockTHUMB59TFastestPerceptual1000(img,width, height, startx, starty, thumbT59_word1, thumbT59_word2);
+	decompressBlockTHUMB59T(thumbT59_word1, thumbT59_word2, imgdec, width, height, startx, starty);			
+	error_thumbT = 1000*calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff59bits(thumbT59_word1, thumbT59_word2, thumbT_word1, thumbT_word2);
+
+	compressBlockTHUMB58HFastestPerceptual1000(img,width,height,startx, starty, thumbH58_word1, thumbH58_word2);
+	decompressBlockTHUMB58H(thumbH58_word1, thumbH58_word2, imgdec, width, height, startx, starty);			
+	error_thumbH = 1000*calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff58bits(thumbH58_word1, thumbH58_word2, thumbH_word1, thumbH_word2);
+
+	error_best = error_etc1;
+	compressed1 = etc1_word1;
+	compressed2 = etc1_word2;
+	best_char = '.';
+	best_mode = MODE_ETC1;
+
+	if(error_planar < error_best)
+	{
+		compressed1 = planar_word1;
+		compressed2 = planar_word2;
+		best_char = 'p';
+		error_best = error_planar;	
+		best_mode = MODE_PLANAR;
+	}
+	if(error_thumbT < error_best)
+	{
+		compressed1 = thumbT_word1;
+		compressed2 = thumbT_word2;
+		best_char = 'T';
+		error_best = error_thumbT;
+		best_mode = MODE_THUMB_T;
+	}
+	if(error_thumbH < error_best)
+	{
+		compressed1 = thumbH_word1;
+		compressed2 = thumbH_word2;
+		best_char = 'H';
+		error_best = error_thumbH;
+		best_mode = MODE_THUMB_H;
+	}
+	
+	switch(best_mode)
+	{
+		// Now see which mode won and compress that a little bit harder
+	case MODE_THUMB_T:
+		compressBlockTHUMB59TFast(img,width, height, startx, starty, thumbT59_word1, thumbT59_word2);
+		decompressBlockTHUMB59T(thumbT59_word1, thumbT59_word2, imgdec, width, height, startx, starty);			
+		error_thumbT = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+		stuff59bits(thumbT59_word1, thumbT59_word2, thumbT_word1, thumbT_word2);
+		if(error_thumbT < error_best)
+		{
+			compressed1 = thumbT_word1;
+			compressed2 = thumbT_word2;
+		}
+		break;
+	case MODE_THUMB_H:
+		compressBlockTHUMB58HFast(img,width,height,startx, starty, thumbH58_word1, thumbH58_word2);
+		decompressBlockTHUMB58H(thumbH58_word1, thumbH58_word2, imgdec, width, height, startx, starty);			
+		error_thumbH = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+		stuff58bits(thumbH58_word1, thumbH58_word2, thumbH_word1, thumbH_word2);
+		if(error_thumbH < error_best)
+		{
+			compressed1 = thumbH_word1;
+			compressed2 = thumbH_word2;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+// Write a word in big endian style
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void write_big_endian_2byte_word(unsigned short *blockadr, FILE *f)
+{
+	uint8 bytes[2];
+	unsigned short block;
+
+	block = blockadr[0];
+
+	bytes[0] = (block >> 8) & 0xff;
+	bytes[1] = (block >> 0) & 0xff;
+
+	fwrite(&bytes[0],1,1,f);
+	fwrite(&bytes[1],1,1,f);
+}
+
+
+// Write a word in big endian style
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void write_big_endian_4byte_word(unsigned int *blockadr, FILE *f)
+{
+	uint8 bytes[4];
+	unsigned int block;
+
+	block = blockadr[0];
+
+	bytes[0] = (block >> 24) & 0xff;
+	bytes[1] = (block >> 16) & 0xff;
+	bytes[2] = (block >> 8) & 0xff;
+	bytes[3] = (block >> 0) & 0xff;
+
+	fwrite(&bytes[0],1,1,f);
+	fwrite(&bytes[1],1,1,f);
+	fwrite(&bytes[2],1,1,f);
+	fwrite(&bytes[3],1,1,f);
+}
+
+extern int alphaTable[256][8];
+extern int alphaBase[16][4];
+
+// valtab holds precalculated data used for compressing using EAC2.
+// Note that valtab is constructed using get16bits11bits, which means
+// that it already is expanded to 16 bits.
+// Note also that it its contents will depend on the value of formatSigned.
+int *valtab;
+
+void setupAlphaTableAndValtab()
+{
+  setupAlphaTable();
+
+	//fix precomputation table..!
+	valtab = new int[1024*512];
+    int16 val16;
+	int count=0;
+	for(int base=0; base<256; base++) 
+	{
+		for(int tab=0; tab<16; tab++) 
+		{
+			for(int mul=0; mul<16; mul++) 
+			{
+				for(int index=0; index<8; index++) 
+				{
+					if(formatSigned)
+					{
+						val16=get16bits11signed(base,tab,mul,index);
+						valtab[count] = val16 + 256*128;
+					}
+					else
+						valtab[count]=get16bits11bits(base,tab,mul,index);
+					count++;
+				}
+			}
+		}
+	}
+}
+
+// Reads alpha data
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void readAlpha(uint8* &data, int &width, int &height, int &extendedwidth, int &extendedheight) 
+{
+	//width and height are already known..?
+	uint8* tempdata;
+	int wantedBitDepth;
+	if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS) 
+	{
+		wantedBitDepth=8;
+	}
+	else if(format==ETC2PACKAGE_R_NO_MIPMAPS) 
+	{
+		wantedBitDepth=16;
+	}
+	else 
+	{
+		printf("invalid format for alpha reading!\n");
+		exit(1);
+	}
+	fReadPGM("alpha.pgm",width,height,tempdata,wantedBitDepth);
+	extendedwidth=4*((width+3)/4);
+	extendedheight=4*((height+3)/4);
+
+	if(width==extendedwidth&&height==extendedheight) 
+	{
+		data=tempdata;
+	}
+	else 
+	{
+		data = (uint8*)malloc(extendedwidth*extendedheight*wantedBitDepth/8);
+		uint8 last=0;
+		uint8 lastlast=0;
+		for(int x=0; x<extendedwidth; x++) 
+		{
+			for(int y=0; y<extendedheight; y++) 
+			{
+				if(wantedBitDepth==8) 
+				{
+					if(x<width&&y<height) 
+					{
+						last = tempdata[x+y*width];
+					}
+					data[x+y*extendedwidth]=last;
+				}
+				else 
+				{
+					if(x<width&&y<height) 
+					{
+						last = tempdata[(x+y*width)*2];
+						lastlast = tempdata[(x+y*width)*2+1];						
+					}
+					data[(x+y*extendedwidth)*2]=last;
+					data[(x+y*extendedwidth)*2+1]=lastlast;
+				}
+			}
+		}
+	}
+	if(format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS) 
+	{
+		for(int x=0; x<extendedwidth; x++) 
+		{
+			for(int y=0; y<extendedheight; y++) 
+			{
+				if(data[x+y*extendedwidth]<128)
+					data[x+y*extendedwidth]=0;
+				else
+					data[x+y*extendedwidth]=255;
+			}
+		}
+	}
+}
+
+
+// Compresses the alpha part of a GL_COMPRESSED_RGBA8_ETC2_EAC block.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockAlphaFast(uint8 * data, int ix, int iy, int width, int height, uint8* returnData) 
+{
+	int alphasum=0;
+	int maxdist=-2;
+	for(int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			alphasum+=data[ix+x+(iy+y)*width];
+		}
+	}
+	int alpha = (int)( ((float)alphasum)/16.0f+0.5f); //average pixel value, used as guess for base value.
+	for(int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			if(abs(alpha-data[ix+x+(iy+y)*width])>maxdist)
+				maxdist=abs(alpha-data[ix+x+(iy+y)*width]); //maximum distance from average
+		}
+	}
+	int approxPos = (maxdist*255)/160-4;  //experimentally derived formula for calculating approximate table position given a max distance from average
+	if(approxPos>255)
+		approxPos=255;
+	int startTable=approxPos-15; //first table to be tested
+	if(startTable<0)
+		startTable=0;
+	int endTable=clamp(approxPos+15);  //last table to be tested
+
+	int bestsum=1000000000;
+	int besttable=-3; 
+	int bestalpha=128;
+	int prevalpha=alpha;
+
+	//main loop: determine best base alpha value and offset table to use for compression
+	//try some different alpha tables.
+	for(int table = startTable; table<endTable&&bestsum>0; table++)
+	{
+		int tablealpha=prevalpha;
+		int tablebestsum=1000000000;
+		//test some different alpha values, trying to find the best one for the given table.	
+		for(int alphascale=16; alphascale>0; alphascale/=4) 
+		{
+			int startalpha;
+			int endalpha;
+			if(alphascale==16) 
+			{
+				startalpha = clamp(tablealpha-alphascale*4);
+				endalpha = clamp(tablealpha+alphascale*4);
+			}
+			else 
+			{
+				startalpha = clamp(tablealpha-alphascale*2);
+				endalpha = clamp(tablealpha+alphascale*2);
+			}
+			for(alpha=startalpha; alpha<=endalpha; alpha+=alphascale) 
+			{
+				int sum=0;
+				int val,diff,bestdiff=10000000,index;
+				for(int x=0; x<4; x++) 
+				{
+					for(int y=0; y<4; y++) 
+					{
+						//compute best offset here, add square difference to sum..
+						val=data[ix+x+(iy+y)*width];
+						bestdiff=1000000000;
+						//the values are always ordered from small to large, with the first 4 being negative and the last 4 positive
+						//search is therefore made in the order 0-1-2-3 or 7-6-5-4, stopping when error increases compared to the previous entry tested.
+						if(val>alpha) 
+						{ 
+							for(index=7; index>3; index--) 
+							{
+								diff=clamp_table[alpha+(int)(alphaTable[table][index])+255]-val;
+								diff*=diff;
+								if(diff<=bestdiff) 
+								{
+									bestdiff=diff;
+								}
+								else
+									break;
+							}
+						}
+						else 
+						{
+							for(index=0; index<4; index++) 
+							{
+								diff=clamp_table[alpha+(int)(alphaTable[table][index])+255]-val;
+								diff*=diff;
+								if(diff<bestdiff) 
+								{
+									bestdiff=diff;
+								}
+								else
+									break;
+							}
+						}
+
+						//best diff here is bestdiff, add it to sum!
+						sum+=bestdiff;
+						//if the sum here is worse than previously best already, there's no use in continuing the count..
+						//note that tablebestsum could be used for more precise estimation, but the speedup gained here is deemed more important.
+						if(sum>bestsum) 
+						{ 
+							x=9999; //just to make it large and get out of the x<4 loop
+							break;
+						}
+					}
+				}
+				if(sum<tablebestsum) 
+				{
+					tablebestsum=sum;
+					tablealpha=alpha;
+				}
+				if(sum<bestsum) 
+				{
+					bestsum=sum;
+					besttable=table;
+					bestalpha=alpha;
+			}
+		}
+		if(alphascale<=2)
+			alphascale=0;
+		}
+	}
+
+	alpha=bestalpha;	
+
+	//"good" alpha value and table are known!
+	//store them, then loop through the pixels again and print indices.
+
+	returnData[0]=alpha;
+	returnData[1]=besttable;
+	for(int pos=2; pos<8; pos++) 
+	{
+		returnData[pos]=0;
+	}
+	int byte=2;
+	int bit=0;
+	for(int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			//find correct index
+			int besterror=1000000;
+			int bestindex=99;
+			for(int index=0; index<8; index++) //no clever ordering this time, as this loop is only run once per block anyway
+			{ 
+				int error= (clamp(alpha +(int)(alphaTable[besttable][index]))-data[ix+x+(iy+y)*width])*(clamp(alpha +(int)(alphaTable[besttable][index]))-data[ix+x+(iy+y)*width]);
+				if(error<besterror) 
+				{
+					besterror=error;
+					bestindex=index;
+				}
+			}
+			//best table index has been determined.
+			//pack 3-bit index into compressed data, one bit at a time
+			for(int numbit=0; numbit<3; numbit++) 
+			{
+				returnData[byte]|=getbit(bestindex,2-numbit,7-bit);
+
+				bit++;
+				if(bit>7) 
+				{
+					bit=0;
+					byte++;
+				}
+			}
+		}
+	}
+}
+
+// Helper function for the below function
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int getPremulIndex(int base, int tab, int mul, int index) 
+{
+	return (base<<11)+(tab<<7)+(mul<<3)+index;
+}
+
+// Calculates the error used in compressBlockAlpha16()
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calcError(uint8* data, int ix, int iy, int width, int height, int base, int tab, int mul, double prevbest) 
+{
+	int offset = getPremulIndex(base,tab,mul,0);
+	double error=0;
+	for (int y=0; y<4; y++) 
+	{
+		for(int x=0; x<4; x++) 
+		{
+			double besthere = (1<<20);
+			besthere*=besthere;
+			uint8 byte1 = data[2*(x+ix+(y+iy)*width)];
+			uint8 byte2 = data[2*(x+ix+(y+iy)*width)+1];
+			int alpha = (byte1<<8)+byte2;
+			for(int index=0; index<8; index++) 
+			{
+				double indexError;
+				indexError = alpha-valtab[offset+index];
+				indexError*=indexError;
+				if(indexError<besthere)
+					besthere=indexError;
+			}
+			error+=besthere;
+			if(error>=prevbest)
+				return prevbest+(1<<30);
+		}
+	}
+	return error;
+}
+
+// compressBlockAlpha16
+// 
+// Compresses a block using the 11-bit EAC formats.
+// Depends on the global variable formatSigned.
+// 
+// COMPRESSED_R11_EAC (if formatSigned = 0)
+// This is an 11-bit unsigned format. Since we do not have a good 11-bit file format, we use 16-bit pgm instead.
+// Here we assume that, in the input 16-bit pgm file, 0 represents 0.0 and 65535 represents 1.0. The function compressBlockAlpha16 
+// will find the compressed block which best matches the data. In detail, it will find the compressed block, which 
+// if decompressed, will generate an 11-bit block that after bit replication to 16-bits will generate the closest 
+// block to the original 16-bit pgm block.
+// 
+// COMPRESSED_SIGNED_R11_EAC (if formatSigned = 1)
+// This is an 11-bit signed format. Since we do not have any signed file formats, we use unsigned 16-bit pgm instead.
+// Hence we assume that, in the input 16-bit pgm file, 1 represents -1.0, 32768 represents 0.0 and 65535 represents 1.0. 
+// The function compresseBlockAlpha16 will find the compressed block, which if decompressed, will generate a signed
+// 11-bit block that after bit replication to 16-bits and conversion to unsigned (1 equals -1.0, 32768 equals 0.0 and 
+// 65535 equals 1.0) will generate the closest block to the original 16-bit pgm block. 
+//
+// COMPRESSED_RG11_EAC is compressed by calling the function twice, dito for COMPRESSED_SIGNED_RG11_EAC.
+// 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockAlpha16(uint8* data, int ix, int iy, int width, int height, uint8* returnData) 
+{
+	unsigned int bestbase, besttable, bestmul;
+	double besterror;
+	besterror=1<<20;
+	besterror*=besterror;
+	for(int base=0; base<256; base++) 
+	{
+		for(int table=0; table<16; table++) 
+		{
+			for(int mul=0; mul<16; mul++) 
+			{
+				double e = calcError(data, ix, iy, width, height,base,table,mul,besterror);
+				if(e<besterror) 
+				{
+					bestbase=base;
+					besttable=table;
+					bestmul=mul;
+					besterror=e;
+				}
+			}
+		}
+	}
+	returnData[0]=bestbase;
+	returnData[1]=(bestmul<<4)+besttable;
+	if(formatSigned) 
+	{
+		//if we have a signed format, the base value should be given as a signed byte. 
+		signed char signedbase = bestbase-128;
+		returnData[0]=*((uint8*)(&signedbase));
+	}
+	
+	for(int i=2; i<8; i++) 
+	{
+		returnData[i]=0;
+	}
+
+	int byte=2;
+	int bit=0;
+	for (int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			double besterror=255*255;
+			besterror*=besterror;
+			int bestindex=99;
+			uint8 byte1 = data[2*(x+ix+(y+iy)*width)];
+			uint8 byte2 = data[2*(x+ix+(y+iy)*width)+1];
+			int alpha = (byte1<<8)+byte2;
+			for(unsigned int index=0; index<8; index++) 
+			{
+				double indexError;
+				if(formatSigned)
+				{
+					int16 val16;
+					int val;
+                    val16 = get16bits11signed(bestbase,besttable,bestmul,index);
+                    val = val16 + 256*128;
+					indexError = alpha-val;
+				}
+				else
+					indexError = alpha-get16bits11bits(bestbase,besttable,bestmul,index);
+
+				indexError*=indexError;
+				if(indexError<besterror) 
+				{
+					besterror=indexError;
+					bestindex=index;
+				}
+			}
+			
+			for(int numbit=0; numbit<3; numbit++) 
+			{
+				returnData[byte]|=getbit(bestindex,2-numbit,7-bit);
+				bit++;
+				if(bit>7) 
+				{
+					bit=0;
+					byte++;
+				}
+			}
+		}
+	}
+}
+
+// Exhaustive compression of alpha compression in a GL_COMPRESSED_RGB8_ETC2 block
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockAlphaSlow(uint8* data, int ix, int iy, int width, int height, uint8* returnData) 
+{
+	//determine the best table and base alpha value for this block using MSE
+	int alphasum=0;
+	int maxdist=-2;
+	for(int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			alphasum+=data[ix+x+(iy+y)*width];
+		}
+	}
+	int alpha = (int)( ((float)alphasum)/16.0f+0.5f); //average pixel value, used as guess for base value.
+
+	int bestsum=1000000000;
+	int besttable=-3; 
+	int bestalpha=128;
+	int prevalpha=alpha;
+
+	//main loop: determine best base alpha value and offset table to use for compression
+	//try some different alpha tables.
+	for(int table = 0; table<256&&bestsum>0; table++)
+	{
+		int tablealpha=prevalpha;
+		int tablebestsum=1000000000;
+		//test some different alpha values, trying to find the best one for the given table.
+		for(int alphascale=32; alphascale>0; alphascale/=8) 
+		{
+			
+			int startalpha = clamp(tablealpha-alphascale*4);
+			int endalpha = clamp(tablealpha+alphascale*4);
+			
+			for(alpha=startalpha; alpha<=endalpha; alpha+=alphascale) {
+				int sum=0;
+				int val,diff,bestdiff=10000000,index;
+				for(int x=0; x<4; x++) 
+				{
+					for(int y=0; y<4; y++) 
+					{
+						//compute best offset here, add square difference to sum..
+						val=data[ix+x+(iy+y)*width];
+						bestdiff=1000000000;
+						//the values are always ordered from small to large, with the first 4 being negative and the last 4 positive
+						//search is therefore made in the order 0-1-2-3 or 7-6-5-4, stopping when error increases compared to the previous entry tested.
+						if(val>alpha) 
+						{ 
+							for(index=7; index>3; index--) 
+							{
+								diff=clamp_table[alpha+(alphaTable[table][index])+255]-val;
+								diff*=diff;
+								if(diff<=bestdiff) 
+								{
+									bestdiff=diff;
+								}
+								else
+									break;
+							}
+						}
+						else 
+						{
+							for(index=0; index<5; index++) 
+							{
+								diff=clamp_table[alpha+(alphaTable[table][index])+255]-val;
+								diff*=diff;
+								if(diff<bestdiff) 
+								{
+									bestdiff=diff;
+								}
+								else
+									break;
+							}
+						}
+
+						//best diff here is bestdiff, add it to sum!
+						sum+=bestdiff;
+						//if the sum here is worse than previously best already, there's no use in continuing the count..
+						if(sum>tablebestsum) 
+						{ 
+							x=9999; //just to make it large and get out of the x<4 loop
+							break;
+						}
+					}
+				}
+				if(sum<tablebestsum) 
+				{
+					tablebestsum=sum;
+					tablealpha=alpha;
+				}
+				if(sum<bestsum) 
+				{
+					bestsum=sum;
+					besttable=table;
+					bestalpha=alpha;
+				}
+			}
+			if(alphascale==4)
+				alphascale=8;
+		}
+	}
+
+	alpha=bestalpha;	
+	//the best alpha value and table are known!
+	//store them, then loop through the pixels again and print indices.
+	returnData[0]=alpha;
+	returnData[1]=besttable;
+	for(int pos=2; pos<8; pos++) 
+	{
+		returnData[pos]=0;
+	}
+	int byte=2;
+	int bit=0;
+	for(int x=0; x<4; x++) 
+	{
+		for(int y=0; y<4; y++) 
+		{
+			//find correct index
+			int besterror=1000000;
+			int bestindex=99;
+			for(int index=0; index<8; index++) //no clever ordering this time, as this loop is only run once per block anyway
+			{ 
+				int error= (clamp(alpha +(int)(alphaTable[besttable][index]))-data[ix+x+(iy+y)*width])*(clamp(alpha +(int)(alphaTable[besttable][index]))-data[ix+x+(iy+y)*width]);
+				if(error<besterror) 
+				{
+					besterror=error;
+					bestindex=index;
+				}
+			}
+			//best table index has been determined.
+			//pack 3-bit index into compressed data, one bit at a time
+			for(int numbit=0; numbit<3; numbit++) 
+			{
+				returnData[byte]|=getbit(bestindex,2-numbit,7-bit);
+
+				bit++;
+				if(bit>7) 
+				{
+					bit=0;
+					byte++;
+				}
+			}
+		}
+	}
+}
+
+// Calculate weighted PSNR
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculateWeightedPSNR(uint8 *lossyimg, uint8 *origimg, int width, int height, double w1, double w2, double w3)
+{
+	// Note: This calculation of PSNR uses the formula
+	//
+	// PSNR = 10 * log_10 ( 255^2 / wMSE ) 
+	// 
+	// where the wMSE is calculated as
+	//
+	// 1/(N*M) * sum ( ( w1*(R' - R)^2 + w2*(G' - G)^2 + w3*(B' - B)^2) ) 
+	//
+	// typical weights are  0.299,   0.587,   0.114  for perceptually weighted PSNR and
+  //                     1.0/3.0, 1.0/3.0, 1.0/3.0 for nonweighted PSNR
+
+	int x,y;
+	double wMSE;
+	double PSNR;
+	double err;
+	wMSE = 0;
+
+	for(y=0;y<height;y++)
+	{
+		for(x=0;x<width;x++)
+		{
+			err = lossyimg[y*width*3+x*3+0] - origimg[y*width*3+x*3+0];
+		    wMSE = wMSE + (w1*(err * err));
+			err = lossyimg[y*width*3+x*3+1] - origimg[y*width*3+x*3+1];
+		    wMSE = wMSE + (w2*(err * err));
+			err = lossyimg[y*width*3+x*3+2] - origimg[y*width*3+x*3+2];
+		    wMSE = wMSE + (w3*(err * err));
+		}
+	}
+	wMSE = wMSE / (width * height);
+	if(wMSE == 0)
+	{
+		printf("********************************************************************\n");
+		printf("There is no difference at all between image files --- infinite PSNR.\n");
+		printf("********************************************************************\n");
+	}
+	PSNR = 10*log((1.0*255*255)/wMSE)/log(10.0);
+	return PSNR;
+}
+
+// Calculate unweighted PSNR (weights are (1,1,1))
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculatePSNR(uint8 *lossyimg, uint8 *origimg, int width, int height)
+{
+	// Note: This calculation of PSNR uses the formula
+	//
+	// PSNR = 10 * log_10 ( 255^2 / MSE ) 
+	// 
+	// where the MSE is calculated as
+	//
+	// 1/(N*M) * sum ( 1/3 * ((R' - R)^2 + (G' - G)^2 + (B' - B)^2) ) 
+	//
+	// The reason for having the 1/3 factor is the following:
+	// Presume we have a grayscale image, that is acutally just the red component 
+	// of a color image.. The squared error is then (R' - R)^2.
+	// Assume that we have a certain signal to noise ratio, say 30 dB. If we add
+	// another two components (say green and blue) with the same signal to noise 
+	// ratio, we want the total signal to noise ratio be the same. For the
+	// squared error to remain constant we must divide by three after adding
+	// together the squared errors of the components. 
+
+  return calculateWeightedPSNR(lossyimg, origimg, width, height, (1.0/3.0), (1.0/3.0), (1.0/3.0));
+}
+
+// Decompresses a file
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void uncompressFile(char *srcfile, uint8* &img, uint8 *&alphaimg, int& active_width, int& active_height)
+{
+	FILE *f;
+	int width,height;
+	unsigned int block_part1, block_part2;
+	uint8 *newimg, *newalphaimg, *alphaimg2;
+	unsigned short w, h;
+	int xx, yy;
+	unsigned char magic[4];
+	unsigned char version[2];
+	unsigned short texture_type;
+	if(f=fopen(srcfile,"rb"))
+	{
+		// Load table
+		readCompressParams();
+		if(ktxFile) 
+		{
+			//read ktx header..
+			KTX_header header;
+			fread(&header,sizeof(KTX_header),1,f);
+			//read size parameter, which we don't actually need..
+			unsigned int bitsize;
+			fread(&bitsize,sizeof(unsigned int),1,f);
+	
+			active_width=header.pixelWidth;
+			active_height = header.pixelHeight;
+			w = ((active_width+3)/4)*4;
+			h = ((active_height+3)/4)*4;
+			width=w;
+			height=h;
+
+			if(header.glInternalFormat==GL_COMPRESSED_SIGNED_R11_EAC) 
+			{
+				format=ETC2PACKAGE_R_NO_MIPMAPS;
+				formatSigned=1;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_R11_EAC) 
+			{
+				format=ETC2PACKAGE_R_NO_MIPMAPS;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_SIGNED_RG11_EAC) 
+			{
+				format=ETC2PACKAGE_RG_NO_MIPMAPS;
+				formatSigned=1;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_RG11_EAC) 
+			{
+				format=ETC2PACKAGE_RG_NO_MIPMAPS;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_RGB8_ETC2) 
+			{
+				format=ETC2PACKAGE_RGB_NO_MIPMAPS;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_SRGB8_ETC2) 
+			{
+				format=ETC2PACKAGE_sRGB_NO_MIPMAPS;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_RGBA8_ETC2_EAC) 
+			{
+				format=ETC2PACKAGE_RGBA_NO_MIPMAPS;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC) 
+			{
+				format=ETC2PACKAGE_sRGBA_NO_MIPMAPS;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2) 
+			{
+				format=ETC2PACKAGE_RGBA1_NO_MIPMAPS;
+			}
+			else if(header.glInternalFormat==GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2) 
+			{
+				format=ETC2PACKAGE_sRGBA1_NO_MIPMAPS;
+			}
+			else if(header.glInternalFormat==GL_ETC1_RGB8_OES) 
+			{
+				format=ETC1_RGB_NO_MIPMAPS;
+				codec=CODEC_ETC;
+			}
+			else 
+			{
+				printf("ktx file has unknown glInternalFormat (not etc compressed)!\n");
+				exit(1);
+			}
+		}
+		else 
+		{
+			// Read magic nunmber
+			fread(&magic[0], sizeof(unsigned char), 1, f);
+			fread(&magic[1], sizeof(unsigned char), 1, f);
+			fread(&magic[2], sizeof(unsigned char), 1, f);
+			fread(&magic[3], sizeof(unsigned char), 1, f);
+			if(!(magic[0] == 'P' && magic[1] == 'K' && magic[2] == 'M' && magic[3] == ' '))
+			{
+				printf("\n\n The file %s is not a .pkm file.\n",srcfile);
+				exit(1);
+			}
+		
+			// Read version
+			fread(&version[0], sizeof(unsigned char), 1, f);
+			fread(&version[1], sizeof(unsigned char), 1, f);
+			if( version[0] == '1' && version[1] == '0' )
+			{
+
+				// Read texture type
+				read_big_endian_2byte_word(&texture_type, f);
+				if(!(texture_type == ETC1_RGB_NO_MIPMAPS))
+				{
+					printf("\n\n The file %s (of version %c.%c) does not contain a texture of known format.\n", srcfile, version[0],version[1]);
+					printf("Known formats: ETC1_RGB_NO_MIPMAPS.\n", srcfile);
+					exit(1);
+				}
+			}
+			else if( version[0] == '2' && version[1] == '0' )
+			{
+				// Read texture type
+				read_big_endian_2byte_word(&texture_type, f);
+				if(texture_type==ETC2PACKAGE_RG_SIGNED_NO_MIPMAPS) 
+				{
+					texture_type=ETC2PACKAGE_RG_NO_MIPMAPS;
+					formatSigned=1;
+					//printf("Decompressing 2-channel signed data\n");
+				}
+				if(texture_type==ETC2PACKAGE_R_SIGNED_NO_MIPMAPS) 
+				{
+					texture_type=ETC2PACKAGE_R_NO_MIPMAPS;
+					formatSigned=1;
+					//printf("Decompressing 1-channel signed data\n");
+				}
+        if(texture_type==ETC2PACKAGE_sRGB_NO_MIPMAPS)
+        {
+          // The SRGB formats are decoded just as RGB formats -- use RGB format for decompression.
+          texture_type=ETC2PACKAGE_RGB_NO_MIPMAPS;
+        }
+        if(texture_type==ETC2PACKAGE_sRGBA_NO_MIPMAPS)
+        {
+          // The SRGB formats are decoded just as RGB formats -- use RGB format for decompression.
+          texture_type=ETC2PACKAGE_RGBA_NO_MIPMAPS;
+        }
+        if(texture_type==ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+        {
+          // The SRGB formats are decoded just as RGB formats -- use RGB format for decompression.
+          texture_type=ETC2PACKAGE_sRGBA1_NO_MIPMAPS;
+        }
+				if(texture_type==ETC2PACKAGE_RGBA_NO_MIPMAPS_OLD) 
+				{
+					printf("\n\nThe file %s contains a compressed texture created using an old version of ETCPACK.\n",srcfile);
+					printf("decompression is not supported in this version.\n");
+					exit(1);
+				}
+				if(!(texture_type==ETC2PACKAGE_RGB_NO_MIPMAPS||texture_type==ETC2PACKAGE_sRGB_NO_MIPMAPS||texture_type==ETC2PACKAGE_RGBA_NO_MIPMAPS||texture_type==ETC2PACKAGE_sRGBA_NO_MIPMAPS||texture_type==ETC2PACKAGE_R_NO_MIPMAPS||texture_type==ETC2PACKAGE_RG_NO_MIPMAPS||texture_type==ETC2PACKAGE_RGBA1_NO_MIPMAPS||texture_type==ETC2PACKAGE_sRGBA1_NO_MIPMAPS))
+				{
+					printf("\n\n The file %s does not contain a texture of known format.\n", srcfile);
+					printf("Known formats: ETC2PACKAGE_RGB_NO_MIPMAPS.\n", srcfile);
+					exit(1);
+				}
+			}
+			else
+			{
+				printf("\n\n The file %s is not of version 1.0 or 2.0 but of version %c.%c.\n",srcfile, version[0], version[1]);
+				printf("Aborting.\n");
+				exit(1);
+			}
+			format=texture_type;
+			printf("textype: %d\n",texture_type);
+			// ETC2 is backwards compatible, which means that an ETC2-capable decompressor can also handle
+			// old ETC1 textures without any problems. Thus a version 1.0 file with ETC1_RGB_NO_MIPMAPS and a 
+			// version 2.0 file with ETC2PACKAGE_RGB_NO_MIPMAPS can be handled by the same ETC2-capable decompressor
+
+			// Read how many pixels the blocks make up
+
+			read_big_endian_2byte_word(&w, f);
+			read_big_endian_2byte_word(&h, f);
+			width = w;
+			height = h;
+
+			// Read how many pixels contain active data (the rest are just
+			// for making sure we have a 4*a x 4*b size).
+
+			read_big_endian_2byte_word(&w, f);
+			read_big_endian_2byte_word(&h, f);
+			active_width = w;
+			active_height = h;
+		}
+		printf("Width = %d, Height = %d\n",width, height);
+		printf("active pixel area: top left %d x %d area.\n",active_width, active_height);
+
+		if(format==ETC2PACKAGE_RG_NO_MIPMAPS)
+			img=(uint8*)malloc(3*width*height*2);
+		else
+			img=(uint8*)malloc(3*width*height);
+		if(!img)
+		{
+			printf("Error: could not allocate memory\n");
+			exit(0);
+		}
+		if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_R_NO_MIPMAPS||format==ETC2PACKAGE_RG_NO_MIPMAPS||format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+		{
+			//printf("alpha channel decompression\n");
+			alphaimg=(uint8*)malloc(width*height*2);
+			setupAlphaTableAndValtab();
+			if(!alphaimg)
+			{
+				printf("Error: could not allocate memory for alpha\n");
+				exit(0);
+			}
+		}
+		if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+		{
+			alphaimg2=(uint8*)malloc(width*height*2);
+			if(!alphaimg2)
+			{
+				printf("Error: could not allocate memory\n");
+				exit(0);
+			}
+		}
+
+		for(int y=0;y<height/4;y++)
+		{
+			for(int x=0;x<width/4;x++)
+			{
+				//decode alpha channel for RGBA
+				if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS) 
+				{
+					uint8 alphablock[8];
+					fread(alphablock,1,8,f);
+					decompressBlockAlpha(alphablock,alphaimg,width,height,4*x,4*y);
+				}
+				//color channels for most normal modes
+				if(format!=ETC2PACKAGE_R_NO_MIPMAPS&&format!=ETC2PACKAGE_RG_NO_MIPMAPS) 
+				{
+					//we have normal ETC2 color channels, decompress these
+					read_big_endian_4byte_word(&block_part1,f);
+					read_big_endian_4byte_word(&block_part2,f);
+					if(format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+						decompressBlockETC21BitAlpha(block_part1, block_part2,img,alphaimg,width,height,4*x,4*y);
+					else
+						decompressBlockETC2(block_part1, block_part2,img,width,height,4*x,4*y);		
+				}
+				//one or two 11-bit alpha channels for R or RG.
+				if(format==ETC2PACKAGE_R_NO_MIPMAPS||format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+				{
+					uint8 alphablock[8];
+					fread(alphablock,1,8,f);
+					decompressBlockAlpha16bit(alphablock,alphaimg,width,height,4*x,4*y);
+				}
+				if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+				{
+					uint8 alphablock[8];
+					fread(alphablock,1,8,f);
+					decompressBlockAlpha16bit(alphablock,alphaimg2,width,height,4*x,4*y);
+				}
+			}
+		}
+		if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+		{
+			for(int y=0;y<height;y++)
+			{
+				for(int x=0;x<width;x++)
+				{
+					img[6*(y*width+x)]=alphaimg[2*(y*width+x)];
+					img[6*(y*width+x)+1]=alphaimg[2*(y*width+x)+1];
+					img[6*(y*width+x)+2]=alphaimg2[2*(y*width+x)];
+					img[6*(y*width+x)+3]=alphaimg2[2*(y*width+x)+1];
+					img[6*(y*width+x)+4]=0;
+					img[6*(y*width+x)+5]=0;
+				}
+			}
+		}
+
+		// Ok, and now only write out the active pixels to the .ppm file.
+		// (But only if the active pixels differ from the total pixels)
+
+		if( !(height == active_height && width == active_width) )
+		{
+			if(format==ETC2PACKAGE_RG_NO_MIPMAPS)
+				newimg=(uint8*)malloc(3*active_width*active_height*2);
+			else
+				newimg=(uint8*)malloc(3*active_width*active_height);
+			
+			if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_R_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+			{
+				newalphaimg = (uint8*)malloc(active_width*active_height*2);
+			}
+
+			if(!newimg)
+			{
+				printf("Error: could not allocate memory\n");
+				exit(0);
+			}
+			
+			// Convert from total area to active area:
+
+			for(yy = 0; yy<active_height; yy++)
+			{
+				for(xx = 0; xx< active_width; xx++)
+				{
+					if(format!=ETC2PACKAGE_R_NO_MIPMAPS&&format!=ETC2PACKAGE_RG_NO_MIPMAPS) 
+					{
+						newimg[ (yy*active_width)*3 + xx*3 + 0 ] = img[ (yy*width)*3 + xx*3 + 0];
+						newimg[ (yy*active_width)*3 + xx*3 + 1 ] = img[ (yy*width)*3 + xx*3 + 1];
+						newimg[ (yy*active_width)*3 + xx*3 + 2 ] = img[ (yy*width)*3 + xx*3 + 2];
+					}
+					else if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+					{
+						newimg[ (yy*active_width)*6 + xx*6 + 0 ] = img[ (yy*width)*6 + xx*6 + 0];
+						newimg[ (yy*active_width)*6 + xx*6 + 1 ] = img[ (yy*width)*6 + xx*6 + 1];
+						newimg[ (yy*active_width)*6 + xx*6 + 2 ] = img[ (yy*width)*6 + xx*6 + 2];
+						newimg[ (yy*active_width)*6 + xx*6 + 3 ] = img[ (yy*width)*6 + xx*6 + 3];
+						newimg[ (yy*active_width)*6 + xx*6 + 4 ] = img[ (yy*width)*6 + xx*6 + 4];
+						newimg[ (yy*active_width)*6 + xx*6 + 5 ] = img[ (yy*width)*6 + xx*6 + 5];
+					}
+					if(format==ETC2PACKAGE_R_NO_MIPMAPS) 
+					{
+						newalphaimg[ ((yy*active_width) + xx)*2]   = alphaimg[2*((yy*width) + xx)];
+						newalphaimg[ ((yy*active_width) + xx)*2+1] = alphaimg[2*((yy*width) + xx)+1];
+					}
+					if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS) 
+					{
+						newalphaimg[ ((yy*active_width) + xx)]   = alphaimg[((yy*width) + xx)];
+					}
+				}
+			}
+
+			free(img);
+			img = newimg;
+			if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_R_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+			{
+				free(alphaimg);
+				alphaimg=newalphaimg;
+			}
+			if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+			{
+				free(alphaimg);
+				free(alphaimg2);
+        alphaimg = NULL;
+        alphaimg2 = NULL;
+			}
+		}
+	}
+	else
+  {
+		printf("Error: could not open <%s>.\n",srcfile);
+    exit(1);
+  }
+	height=active_height;
+	width=active_width;
+	fclose(f);
+}
+
+// Writes output file 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void writeOutputFile(char *dstfile, uint8* img, uint8* alphaimg, int width, int height) 
+{
+	char str[300];
+
+	if(format!=ETC2PACKAGE_R_NO_MIPMAPS&&format!=ETC2PACKAGE_RG_NO_MIPMAPS) 
+	{
+		fWritePPM("tmp.ppm",width,height,img,8,false);
+		printf("Saved file tmp.ppm \n\n");
+	}
+	else if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+	{
+		fWritePPM("tmp.ppm",width,height,img,16,false);
+	}
+	if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+		fWritePGM("alphaout.pgm",width,height,alphaimg,false,8);
+	if(format==ETC2PACKAGE_R_NO_MIPMAPS)
+		fWritePGM("alphaout.pgm",width,height,alphaimg,false,16);
+
+	// Delete destination file if it exists
+	if(fileExist(dstfile))
+	{
+		sprintf(str, "del %s\n",dstfile);	
+		system(str);
+	}
+
+	int q = find_pos_of_extension(dstfile);
+	if(!strcmp(&dstfile[q],".ppm")&&format!=ETC2PACKAGE_R_NO_MIPMAPS) 
+	{
+		// Already a .ppm file. Just rename. 
+		sprintf(str,"move tmp.ppm %s\n",dstfile);
+		printf("Renaming destination file to %s\n",dstfile);
+	}
+	else
+	{
+		// Converting from .ppm to other file format
+		// 
+		// Use your favorite command line image converter program,
+		// for instance Image Magick. Just make sure the syntax can
+		// be written as below:
+		// 
+		// C:\imconv source.ppm dest.jpg
+		//
+		if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS) 
+		{
+            // Somewhere after version 6.7.1-2 of ImageMagick the following command gives the wrong result due to a bug. 
+			// sprintf(str,"composite -compose CopyOpacity alphaout.pgm tmp.ppm %s\n",dstfile);
+            // Instead we read the file and write a tga.
+
+			printf("Converting destination file from .ppm/.pgm to %s with alpha\n",dstfile);
+            int rw, rh;
+            unsigned char *pixelsRGB;
+            unsigned char *pixelsA;
+			fReadPPM("tmp.ppm", rw, rh, pixelsRGB, 8);
+            fReadPGM("alphaout.pgm", rw, rh, pixelsA, 8);
+			fWriteTGAfromRGBandA(dstfile, rw, rh, pixelsRGB, pixelsA, true);
+            free(pixelsRGB);
+            free(pixelsA);
+            sprintf(str,""); // Nothing to execute.
+		}
+		else if(format==ETC2PACKAGE_R_NO_MIPMAPS) 
+		{
+			sprintf(str,"imconv alphaout.pgm %s\n",dstfile);
+			printf("Converting destination file from .pgm to %s\n",dstfile);
+		}
+		else 
+		{
+			sprintf(str,"imconv tmp.ppm %s\n",dstfile);
+			printf("Converting destination file from .ppm to %s\n",dstfile);
+		}
+	}
+	// Execute system call
+	system(str);
+	
+	free(img);
+	if(alphaimg!=NULL)
+		free(alphaimg);
+}
+
+// Calculates the PSNR between two files
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculatePSNRfile(char *srcfile, uint8 *origimg, uint8* origalpha)
+{
+	uint8 *alphaimg, *img;
+	int active_width, active_height;
+	uncompressFile(srcfile,img,alphaimg,active_width,active_height);
+
+	// calculate Mean Square Error (MSE)
+	double MSER=0,MSEG=0,MSEB=0,MSEA, PSNRR,PSNRG,PSNRA;
+	double MSE;
+	double wMSE;
+	double PSNR=0;
+	double wPSNR;
+	double err;
+	MSE = 0;
+	MSEA=0;
+	wMSE = 0;
+	int width=((active_width+3)/4)*4;
+	int height=((active_height+3)/4)*4;
+	int numpixels = 0;
+	for(int y=0;y<active_height;y++)
+	{
+		for(int x=0;x<active_width;x++)
+		{
+			if(format!=ETC2PACKAGE_R_NO_MIPMAPS&&format!=ETC2PACKAGE_RG_NO_MIPMAPS) 
+			{
+				//we have regular color channels..
+				if((format != ETC2PACKAGE_RGBA1_NO_MIPMAPS && format != ETC2PACKAGE_sRGBA1_NO_MIPMAPS) || alphaimg[y*width + x] > 0)
+				{
+					err = img[y*active_width*3+x*3+0] - origimg[y*width*3+x*3+0];
+					MSE  += ((err * err)/3.0);
+					wMSE += PERCEPTUAL_WEIGHT_R_SQUARED * (err*err);
+					err = img[y*active_width*3+x*3+1] - origimg[y*width*3+x*3+1];
+					MSE  += ((err * err)/3.0);
+					wMSE += PERCEPTUAL_WEIGHT_G_SQUARED * (err*err);
+					err = img[y*active_width*3+x*3+2] - origimg[y*width*3+x*3+2];
+					MSE  += ((err * err)/3.0);
+					wMSE += PERCEPTUAL_WEIGHT_B_SQUARED * (err*err);
+					numpixels++;
+				}
+			}
+			else if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+			{
+				int rorig = (origimg[6*(y*width+x)+0]<<8)+origimg[6*(y*width+x)+1];
+				int rnew =  (    img[6*(y*active_width+x)+0]<<8)+    img[6*(y*active_width+x)+1];
+				int gorig = (origimg[6*(y*width+x)+2]<<8)+origimg[6*(y*width+x)+3];
+				int gnew =  (    img[6*(y*active_width+x)+2]<<8)+    img[6*(y*active_width+x)+3];
+				err=rorig-rnew;
+				MSER+=(err*err);
+				err=gorig-gnew;
+				MSEG+=(err*err);
+			}
+			else if(format==ETC2PACKAGE_R_NO_MIPMAPS) 
+			{
+				int aorig = (((int)origalpha[2*(y*width+x)+0])<<8)+origalpha[2*(y*width+x)+1];
+				int anew =  (((int)alphaimg[2*(y*active_width+x)+0])<<8)+alphaimg[2*(y*active_width+x)+1];
+				err=aorig-anew;
+				MSEA+=(err*err);
+			}
+		}
+	}
+	if(format == ETC2PACKAGE_RGBA1_NO_MIPMAPS || format == ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+	{
+		MSE = MSE / (1.0 * numpixels);
+		wMSE = wMSE / (1.0 * numpixels);
+		PSNR = 10*log((1.0*255*255)/MSE)/log(10.0);
+		wPSNR = 10*log((1.0*255*255)/wMSE)/log(10.0);
+		printf("PSNR only calculated on pixels where compressed alpha > 0\n");
+		printf("color PSNR: %lf\nweighted PSNR: %lf\n",PSNR,wPSNR);
+	}
+	else if(format!=ETC2PACKAGE_R_NO_MIPMAPS&&format!=ETC2PACKAGE_RG_NO_MIPMAPS) 
+	{
+		MSE = MSE / (active_width * active_height);
+		wMSE = wMSE / (active_width * active_height);
+		PSNR = 10*log((1.0*255*255)/MSE)/log(10.0);
+		wPSNR = 10*log((1.0*255*255)/wMSE)/log(10.0);
+		if(format == ETC2PACKAGE_RGBA_NO_MIPMAPS || format == ETC2PACKAGE_sRGBA_NO_MIPMAPS)
+			printf("PSNR only calculated on RGB, not on alpha\n");
+		printf("color PSNR: %lf\nweighted PSNR: %lf\n",PSNR,wPSNR);
+	}
+	else if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+	{
+		MSER = MSER / (active_width * active_height);
+		MSEG = MSEG / (active_width * active_height);
+		PSNRR = 10*log((1.0*65535*65535)/MSER)/log(10.0);
+		PSNRG = 10*log((1.0*65535*65535)/MSEG)/log(10.0);
+		printf("red PSNR: %lf\ngreen PSNR: %lf\n",PSNRR,PSNRG);
+	}
+	else if(format==ETC2PACKAGE_R_NO_MIPMAPS) 
+	{
+		MSEA = MSEA / (active_width * active_height);
+		PSNRA = 10*log((1.0*65535.0*65535.0)/MSEA)/log(10.0);
+		printf("PSNR: %lf\n",PSNRA);
+	}
+	free(img);
+	return PSNR;
+}
+
+//// Exhaustive code starts here.
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precomutes a table that is used when compressing a block exhaustively
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+inline unsigned int precompute_3bittable_all_subblocksRG_withtest_perceptual1000(uint8 *block,uint8 *avg_color, unsigned int *precalc_err_UL_R, unsigned int *precalc_err_UR_R, unsigned int *precalc_err_LL_R, unsigned int *precalc_err_LR_R,unsigned int *precalc_err_UL_RG, unsigned int *precalc_err_UR_RG, unsigned int *precalc_err_LL_RG, unsigned int *precalc_err_LR_RG, unsigned int best_err)
+{
+	int table;
+	int index;
+	int orig[3],approx[3][4];
+	int x;
+	int intensity_modifier;
+	const int *table_indices;
+
+	int good_enough_to_test;
+	unsigned int err[4];
+	unsigned int err_this_table_upper;
+	unsigned int err_this_table_lower;
+	unsigned int err_this_table_left;
+	unsigned int err_this_table_right;
+
+	// If the error in the red and green component is already larger than best_err for all 8 tables in 
+	// all of upper, lower, left and right, this combination of red and green will never be used in 
+	// the optimal color configuration. Therefore we can avoid testing all the blue colors for this 
+	// combination. 
+	good_enough_to_test = false;	
+
+	for(table=0;table<8;table++)		// try all the 8 tables. 
+	{
+		table_indices = &compressParamsFast[table*4];
+
+		intensity_modifier = table_indices[0];
+		approx[1][0]=CLAMP(0, avg_color[1]+intensity_modifier,255);
+		intensity_modifier = table_indices[1];
+		approx[1][1]=CLAMP(0, avg_color[1]+intensity_modifier,255);
+		intensity_modifier = table_indices[2];
+		approx[1][2]=CLAMP(0, avg_color[1]+intensity_modifier,255);
+		intensity_modifier = table_indices[3];
+		approx[1][3]=CLAMP(0, avg_color[1]+intensity_modifier,255);
+
+		err_this_table_upper = 0;
+		err_this_table_lower = 0;
+		err_this_table_left = 0;
+		err_this_table_right = 0;
+		for(x=0; x<4; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+			for(index=0;index<4;index++)
+			{
+				err[index] = precalc_err_UL_R[table*4*4+x*4+index] 
+					+ PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000 * SQUARE(approx[1][index]-orig[1]);
+				precalc_err_UL_RG[table*4*4+x*4+index] = err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_upper+=err[0];
+			err_this_table_left+=err[0];
+		}
+		for(x=4; x<8; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+			for(index=0;index<4;index++)
+			{
+				err[index] = precalc_err_UR_R[table*4*4+(x-4)*4+index] 
+				  + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000 * SQUARE(approx[1][index]-orig[1]);
+				precalc_err_UR_RG[table*4*4+(x-4)*4+index] = err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_upper+=err[0];
+			err_this_table_right+=err[0];
+		}
+		for(x=8; x<12; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+
+			for(index=0;index<4;index++)
+			{
+				err[index] = precalc_err_LL_R[table*4*4+(x-8)*4+index]
+					+ PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000 * SQUARE(approx[1][index]-orig[1]);
+				precalc_err_LL_RG[table*4*4+(x-8)*4+index] = err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_lower+=err[0];
+			err_this_table_left+=err[0];
+		}
+		for(x=12; x<16; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+
+			for(index=0;index<4;index++)
+			{
+				err[index] = precalc_err_LR_R[table*4*4+(x-12)*4+index]
+					+ PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000 * SQUARE(approx[1][index]-orig[1]);
+				precalc_err_LR_RG[table*4*4+(x-12)*4+index] = err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_lower+=err[0];
+			err_this_table_right+=err[0];
+		}
+		if(err_this_table_upper < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_lower < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_left < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_right < best_err)
+			good_enough_to_test = true;
+	}
+	return good_enough_to_test;
+} 
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precomutes a table that is used when compressing a block exhaustively
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+inline int precompute_3bittable_all_subblocksRG_withtest(uint8 *block,uint8 *avg_color, unsigned int *precalc_err_UL_R, unsigned int *precalc_err_UR_R, unsigned int *precalc_err_LL_R, unsigned int *precalc_err_LR_R,unsigned int *precalc_err_UL_RG, unsigned int *precalc_err_UR_RG, unsigned int *precalc_err_LL_RG, unsigned int *precalc_err_LR_RG, unsigned int best_err)
+{
+	int table;
+	int index;
+	int orig[3],approx[3][4];
+	int x;
+	int intensity_modifier;
+	const int *table_indices;
+
+	int good_enough_to_test;
+	unsigned int err[4];
+	unsigned int err_this_table_upper;
+	unsigned int err_this_table_lower;
+	unsigned int err_this_table_left;
+	unsigned int err_this_table_right;
+
+	// If the error in the red and green component is already larger than best_err for all 8 tables in 
+	// all of upper, lower, left and right, this combination of red and green will never be used in 
+	// the optimal color configuration. Therefore we can avoid testing all the blue colors for this 
+	// combination. 
+	good_enough_to_test = false;	
+
+	for(table=0;table<8;table++)		// try all the 8 tables. 
+	{
+		table_indices = &compressParamsFast[table*4];
+
+		intensity_modifier = table_indices[0];
+		approx[1][0]=CLAMP(0, avg_color[1]+intensity_modifier,255);
+		intensity_modifier = table_indices[1];
+		approx[1][1]=CLAMP(0, avg_color[1]+intensity_modifier,255);
+		intensity_modifier = table_indices[2];
+		approx[1][2]=CLAMP(0, avg_color[1]+intensity_modifier,255);
+		intensity_modifier = table_indices[3];
+		approx[1][3]=CLAMP(0, avg_color[1]+intensity_modifier,255);
+
+		err_this_table_upper = 0;
+		err_this_table_lower = 0;
+		err_this_table_left = 0;
+		err_this_table_right = 0;
+		for(x=0; x<4; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+			for(index=0;index<4;index++)
+			{
+				err[index] = precalc_err_UL_R[table*4*4+x*4+index]+SQUARE(approx[1][index]-orig[1]);
+				precalc_err_UL_RG[table*4*4+x*4+index] = err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_upper+=err[0];
+			err_this_table_left+=err[0];
+		}
+		for(x=4; x<8; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+			for(index=0;index<4;index++)
+			{
+				err[index] = precalc_err_UR_R[table*4*4+(x-4)*4+index]+SQUARE(approx[1][index]-orig[1]);
+				precalc_err_UR_RG[table*4*4+(x-4)*4+index] = err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_upper+=err[0];
+			err_this_table_right+=err[0];
+		}
+		for(x=8; x<12; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+
+			for(index=0;index<4;index++)
+			{
+				err[index] = precalc_err_LL_R[table*4*4+(x-8)*4+index]+SQUARE(approx[1][index]-orig[1]);
+				precalc_err_LL_RG[table*4*4+(x-8)*4+index] = err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_lower+=err[0];
+			err_this_table_left+=err[0];
+		}
+		for(x=12; x<16; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+
+			for(index=0;index<4;index++)
+			{
+				err[index] = precalc_err_LR_R[table*4*4+(x-12)*4+index]+SQUARE(approx[1][index]-orig[1]);
+				precalc_err_LR_RG[table*4*4+(x-12)*4+index] = err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_lower+=err[0];
+			err_this_table_right+=err[0];
+		}
+		if(err_this_table_upper < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_lower < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_left < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_right < best_err)
+			good_enough_to_test = true;
+	}
+	return good_enough_to_test;
+} 
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precomutes a table that is used when compressing a block exhaustively
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+inline unsigned int precompute_3bittable_all_subblocksR_with_test_perceptual1000(uint8 *block,uint8 *avg_color, unsigned int *precalc_err_UL_R, unsigned int *precalc_err_UR_R, unsigned int *precalc_err_LL_R, unsigned int *precalc_err_LR_R, unsigned int best_err)
+{
+	int table;
+	int index;
+	int orig[3],approx[3][4];
+	int x;
+	int intensity_modifier;
+	const int *table_indices;
+
+	unsigned int err[4];
+	unsigned int err_this_table_upper;
+	unsigned int err_this_table_lower;
+	unsigned int err_this_table_left;
+	unsigned int err_this_table_right;
+
+	int good_enough_to_test;
+
+	good_enough_to_test = false;
+
+	for(table=0;table<8;table++)		// try all the 8 tables. 
+	{
+		err_this_table_upper = 0;
+		err_this_table_lower = 0;
+		err_this_table_left = 0;
+		err_this_table_right = 0;
+
+		table_indices = &compressParamsFast[table*4];
+
+		intensity_modifier = table_indices[0];
+		approx[0][0]=CLAMP(0, avg_color[0]+intensity_modifier,255);
+		intensity_modifier = table_indices[1];
+		approx[0][1]=CLAMP(0, avg_color[0]+intensity_modifier,255);
+		intensity_modifier = table_indices[2];
+		approx[0][2]=CLAMP(0, avg_color[0]+intensity_modifier,255);
+		intensity_modifier = table_indices[3];
+		approx[0][3]=CLAMP(0, avg_color[0]+intensity_modifier,255);
+
+		for(x=0; x<4; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+			for(index=0;index<4;index++)
+			{
+				err[index]=PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(approx[0][index]-orig[0]);
+				precalc_err_UL_R[table*4*4+x*4+index]=err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_upper+=err[0];
+			err_this_table_left+=err[0];
+		}
+		for(x=4; x<8; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+			for(index=0;index<4;index++)
+			{
+				err[index]=PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(approx[0][index]-orig[0]);
+				precalc_err_UR_R[table*4*4+(x-4)*4+index]=err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_upper+=err[0];
+			err_this_table_right+=err[0];
+		}
+		for(x=8; x<12; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+
+			for(index=0;index<4;index++)
+			{
+				err[index]=PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(approx[0][index]-orig[0]);
+				precalc_err_LL_R[table*4*4+(x-8)*4+index]=err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_lower+=err[0];
+			err_this_table_left+=err[0];
+			 
+		}
+		for(x=12; x<16; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+
+			for(index=0;index<4;index++)
+			{
+				err[index]=PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(approx[0][index]-orig[0]);
+				precalc_err_LR_R[table*4*4+(x-12)*4+index]=err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_lower+=err[0];
+			err_this_table_right+=err[0];
+		}
+		if(err_this_table_upper < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_lower < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_left < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_right < best_err)
+			good_enough_to_test = true;
+	}
+	return good_enough_to_test;
+} 
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precomutes a table that is used when compressing a block exhaustively
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+inline int precompute_3bittable_all_subblocksR_with_test(uint8 *block,uint8 *avg_color, unsigned int *precalc_err_UL_R, unsigned int *precalc_err_UR_R, unsigned int *precalc_err_LL_R, unsigned int *precalc_err_LR_R, unsigned int best_err)
+{
+	int table;
+	int index;
+	int orig[3],approx[3][4];
+	int x;
+	int intensity_modifier;
+	const int *table_indices;
+
+	unsigned int err[4];
+	unsigned int err_this_table_upper;
+	unsigned int err_this_table_lower;
+	unsigned int err_this_table_left;
+	unsigned int err_this_table_right;
+
+	int good_enough_to_test;
+
+	good_enough_to_test = false;
+
+	for(table=0;table<8;table++)		// try all the 8 tables. 
+	{
+		err_this_table_upper = 0;
+		err_this_table_lower = 0;
+		err_this_table_left = 0;
+		err_this_table_right = 0;
+
+		table_indices = &compressParamsFast[table*4];
+
+		intensity_modifier = table_indices[0];
+		approx[0][0]=CLAMP(0, avg_color[0]+intensity_modifier,255);
+		intensity_modifier = table_indices[1];
+		approx[0][1]=CLAMP(0, avg_color[0]+intensity_modifier,255);
+		intensity_modifier = table_indices[2];
+		approx[0][2]=CLAMP(0, avg_color[0]+intensity_modifier,255);
+		intensity_modifier = table_indices[3];
+		approx[0][3]=CLAMP(0, avg_color[0]+intensity_modifier,255);
+
+		for(x=0; x<4; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+			for(index=0;index<4;index++)
+			{
+				err[index]=SQUARE(approx[0][index]-orig[0]);
+				precalc_err_UL_R[table*4*4+x*4+index]=err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_upper+=err[0];
+			err_this_table_left+=err[0];
+		}
+		for(x=4; x<8; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+			for(index=0;index<4;index++)
+			{
+				err[index]=SQUARE(approx[0][index]-orig[0]);
+				precalc_err_UR_R[table*4*4+(x-4)*4+index]=err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_upper+=err[0];
+			err_this_table_right+=err[0];
+		}
+		for(x=8; x<12; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+
+			for(index=0;index<4;index++)
+			{
+				err[index]=SQUARE(approx[0][index]-orig[0]);
+				precalc_err_LL_R[table*4*4+(x-8)*4+index]=err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_lower+=err[0];
+			err_this_table_left+=err[0];
+			 
+		}
+		for(x=12; x<16; x++)
+		{
+			orig[0]=block[x*4];
+			orig[1]=block[x*4+1];
+			orig[2]=block[x*4+2];
+
+			for(index=0;index<4;index++)
+			{
+				err[index]=SQUARE(approx[0][index]-orig[0]);
+				precalc_err_LR_R[table*4*4+(x-12)*4+index]=err[index];
+			}
+			if(err[0] > err[1])
+				err[0] = err[1];
+			if(err[2] > err[3])
+				err[2] = err[3];
+			if(err[0] > err[2])
+				err[0] = err[2];
+			err_this_table_lower+=err[0];
+			err_this_table_right+=err[0];
+		}
+		if(err_this_table_upper < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_lower < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_left < best_err)
+			good_enough_to_test = true;
+		if(err_this_table_right < best_err)
+			good_enough_to_test = true;
+	}
+	return good_enough_to_test;
+} 
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Tries all index-tables, used when compressing a block exhaustively
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+inline void tryalltables_3bittable_all_subblocks_using_precalc(uint8 *block_2x2,uint8 *color_quant1, unsigned int *precalc_err_UL_RG, unsigned int *precalc_err_UR_RG, unsigned int *precalc_err_LL_RG, unsigned int *precalc_err_LR_RG, unsigned int &err_upper, unsigned int &err_lower, unsigned int &err_left, unsigned int &err_right, unsigned int best_err)
+{
+	unsigned int err_this_table_upper;
+	unsigned int err_this_table_lower;
+	unsigned int err_this_table_left;
+	unsigned int err_this_table_right;
+	int orig[3],approx[4];
+	int err[4];
+	err_upper = 3*255*255*16;	
+	err_lower = 3*255*255*16;	
+	err_left = 3*255*255*16;	
+	err_right = 3*255*255*16;	
+
+#define ONE_PIXEL_UL(table_nbr,xx)\
+			orig[0]=block_2x2[xx*4];\
+			orig[1]=block_2x2[xx*4+1];\
+			orig[2]=block_2x2[xx*4+2];\
+			/* unrolled loop for(index=0;index<4;index++)*/\
+				err[0]=precalc_err_UL_RG[table_nbr*4*4+xx*4+0] + square_table[approx[0]-orig[2]];\
+				err[1]=precalc_err_UL_RG[table_nbr*4*4+xx*4+1] + square_table[approx[1]-orig[2]];\
+				err[2]=precalc_err_UL_RG[table_nbr*4*4+xx*4+2] + square_table[approx[2]-orig[2]];\
+				err[3]=precalc_err_UL_RG[table_nbr*4*4+xx*4+3] + square_table[approx[3]-orig[2]];\
+			/* end unrolled loop*/\
+			if(err[0] > err[1])\
+				err[0] = err[1];\
+			if(err[2] > err[3])\
+				err[2] = err[3];\
+			if(err[0] > err[2])\
+				err[0] = err[2];\
+			err_this_table_upper+=err[0];\
+			err_this_table_left+=err[0];\
+
+#define ONE_PIXEL_UR(table_nbr,xx)\
+			orig[0]=block_2x2[xx*4];\
+			orig[1]=block_2x2[xx*4+1];\
+			orig[2]=block_2x2[xx*4+2];\
+			/* unrolled loop for(index=0;index<4;index++)*/\
+				err[0]=precalc_err_UR_RG[table_nbr*4*4+(xx-4)*4+0] + square_table[approx[0]-orig[2]];\
+ 				err[1]=precalc_err_UR_RG[table_nbr*4*4+(xx-4)*4+1] + square_table[approx[1]-orig[2]];\
+ 				err[2]=precalc_err_UR_RG[table_nbr*4*4+(xx-4)*4+2] + square_table[approx[2]-orig[2]];\
+ 				err[3]=precalc_err_UR_RG[table_nbr*4*4+(xx-4)*4+3] + square_table[approx[3]-orig[2]];\
+			/* end unrolled loop */\
+ 			if(err[0] > err[1])\
+ 				err[0] = err[1];\
+ 			if(err[2] > err[3])\
+ 				err[2] = err[3];\
+ 			if(err[0] > err[2])\
+ 				err[0] = err[2];\
+ 			err_this_table_upper+=err[0];\
+ 			err_this_table_right+=err[0];
+
+#define ONE_PIXEL_LL(table_nbr,xx)\
+			orig[0]=block_2x2[xx*4];\
+			orig[1]=block_2x2[xx*4+1];\
+			orig[2]=block_2x2[xx*4+2];\
+			/* unrolled loop for(index=0;index<4;index++)*/\
+				err[0]=precalc_err_LL_RG[table_nbr*4*4+(xx-8)*4+0] + square_table[approx[0]-orig[2]];\
+				err[1]=precalc_err_LL_RG[table_nbr*4*4+(xx-8)*4+1] + square_table[approx[1]-orig[2]];\
+				err[2]=precalc_err_LL_RG[table_nbr*4*4+(xx-8)*4+2] + square_table[approx[2]-orig[2]];\
+				err[3]=precalc_err_LL_RG[table_nbr*4*4+(xx-8)*4+3] + square_table[approx[3]-orig[2]];\
+			/* end unrolled loop*/\
+		 	if(err[0] > err[1])\
+				err[0] = err[1];\
+			if(err[2] > err[3])\
+				err[2] = err[3];\
+			if(err[0] > err[2])\
+				err[0] = err[2];\
+			err_this_table_lower+=err[0];\
+			err_this_table_left+=err[0];\
+
+#define ONE_PIXEL_LR(table_nbr,xx)\
+			orig[0]=block_2x2[xx*4];\
+			orig[1]=block_2x2[xx*4+1];\
+			orig[2]=block_2x2[xx*4+2];\
+			/* unrolled loop for(index=0;index<4;index++)*/\
+				err[0]=precalc_err_LR_RG[table_nbr*4*4+(xx-12)*4+0] + square_table[approx[0]-orig[2]];\
+				err[1]=precalc_err_LR_RG[table_nbr*4*4+(xx-12)*4+1] + square_table[approx[1]-orig[2]];\
+				err[2]=precalc_err_LR_RG[table_nbr*4*4+(xx-12)*4+2] + square_table[approx[2]-orig[2]];\
+				err[3]=precalc_err_LR_RG[table_nbr*4*4+(xx-12)*4+3] + square_table[approx[3]-orig[2]];\
+			/* end unrolled loop*/\
+			if(err[0] > err[1])\
+				err[0] = err[1];\
+			if(err[2] > err[3])\
+				err[2] = err[3];\
+			if(err[0] > err[2])\
+				err[0] = err[2];\
+			err_this_table_lower+=err[0];\
+			err_this_table_right+=err[0];\
+
+#define ONE_TABLE_3(table_nbr)\
+		err_this_table_upper = 0;\
+		err_this_table_lower = 0;\
+		err_this_table_left = 0;\
+		err_this_table_right = 0;\
+		approx[0]=clamp_table_plus_255[color_quant1[2]+compressParamsFast[table_nbr*4+0]+255];\
+		approx[1]=clamp_table_plus_255[color_quant1[2]+compressParamsFast[table_nbr*4+1]+255];\
+		approx[2]=clamp_table_plus_255[color_quant1[2]+compressParamsFast[table_nbr*4+2]+255];\
+		approx[3]=clamp_table_plus_255[color_quant1[2]+compressParamsFast[table_nbr*4+3]+255];\
+		/* unroll loop for(xx=0; xx<4; xx++) */\
+			ONE_PIXEL_UL(table_nbr,0)\
+			ONE_PIXEL_UL(table_nbr,1)\
+			ONE_PIXEL_UL(table_nbr,2)\
+			ONE_PIXEL_UL(table_nbr,3)\
+		/* end unroll loop */\
+		/* unroll loop for(xx=4; xx<8; xx++) */\
+			ONE_PIXEL_LR(table_nbr,12)\
+			ONE_PIXEL_LR(table_nbr,13)\
+			ONE_PIXEL_LR(table_nbr,14)\
+			ONE_PIXEL_LR(table_nbr,15)\
+		/* end unroll loop */\
+		/* If error in the top left 2x2 pixel area is already larger than the best error, and */\
+		/* The same is true for the bottom right 2x2 pixel area, this combination of table and color */\
+		/* can never be part of an optimal solution and therefore we do not need to test the other */\
+		/* two 2x2 pixel areas */\
+		if((err_this_table_upper<best_err)||(err_this_table_lower<best_err))\
+		{\
+			/* unroll loop for(xx=4; xx<8; xx++) */\
+ 				ONE_PIXEL_UR(table_nbr,4)\
+ 				ONE_PIXEL_UR(table_nbr,5)\
+ 				ONE_PIXEL_UR(table_nbr,6)\
+ 				ONE_PIXEL_UR(table_nbr,7)\
+			/* end unroll loop */\
+			/* unroll loop for(xx=4; xx<8; xx++) */\
+				ONE_PIXEL_LL(table_nbr,8)\
+				ONE_PIXEL_LL(table_nbr,9)\
+				ONE_PIXEL_LL(table_nbr,10)\
+				ONE_PIXEL_LL(table_nbr,11)\
+			/* end unroll loop */\
+			if(err_this_table_upper<err_upper)\
+				err_upper = err_this_table_upper;\
+			if(err_this_table_lower<err_lower)\
+				err_lower = err_this_table_lower;\
+			if(err_this_table_left<err_left)\
+				err_left = err_this_table_left;\
+			if(err_this_table_right<err_right)\
+				err_right = err_this_table_right;\
+		}\
+
+	/*unroll loop for(table_nbr=0;table_nbr<8;table_nbr++)*/
+		ONE_TABLE_3(0);
+		ONE_TABLE_3(1);
+		ONE_TABLE_3(2);
+		ONE_TABLE_3(3);
+		ONE_TABLE_3(4);
+		ONE_TABLE_3(5);
+		ONE_TABLE_3(6);
+		ONE_TABLE_3(7);
+	/*end unroll loop*/
+} 
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Tries all index-tables, used when compressing a block exhaustively using perceptual error measure
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+inline void tryalltables_3bittable_all_subblocks_using_precalc_perceptual1000(uint8 *block_2x2,uint8 *color_quant1, unsigned int *precalc_err_UL_RG, unsigned int *precalc_err_UR_RG, unsigned int *precalc_err_LL_RG, unsigned int *precalc_err_LR_RG, unsigned int &err_upper, unsigned int &err_lower, unsigned int &err_left, unsigned int &err_right, unsigned int best_err)
+{
+	unsigned int err_this_table_upper;
+	unsigned int err_this_table_lower;
+	unsigned int err_this_table_left;
+	unsigned int err_this_table_right;
+	int orig[3],approx[4];
+	int err[4];
+	err_upper = MAXERR1000;	
+	err_lower = MAXERR1000;	
+	err_left = MAXERR1000;	
+	err_right =MAXERR1000;	
+
+#define ONE_PIXEL_UL_PERCEP(table_nbr,xx)\
+			orig[0]=block_2x2[xx*4];\
+			orig[1]=block_2x2[xx*4+1];\
+			orig[2]=block_2x2[xx*4+2];\
+			/* unrolled loop for(index=0;index<4;index++)*/\
+				err[0]=precalc_err_UL_RG[table_nbr*4*4+xx*4+0] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[0]-orig[2]];\
+				err[1]=precalc_err_UL_RG[table_nbr*4*4+xx*4+1] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[1]-orig[2]];\
+				err[2]=precalc_err_UL_RG[table_nbr*4*4+xx*4+2] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[2]-orig[2]];\
+				err[3]=precalc_err_UL_RG[table_nbr*4*4+xx*4+3] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[3]-orig[2]];\
+			/* end unrolled loop*/\
+			if(err[0] > err[1])\
+				err[0] = err[1];\
+			if(err[2] > err[3])\
+				err[2] = err[3];\
+			if(err[0] > err[2])\
+				err[0] = err[2];\
+			err_this_table_upper+=err[0];\
+			err_this_table_left+=err[0];\
+
+#define ONE_PIXEL_UR_PERCEP(table_nbr,xx)\
+			orig[0]=block_2x2[xx*4];\
+			orig[1]=block_2x2[xx*4+1];\
+			orig[2]=block_2x2[xx*4+2];\
+			/* unrolled loop for(index=0;index<4;index++)*/\
+				err[0]=precalc_err_UR_RG[table_nbr*4*4+(xx-4)*4+0] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[0]-orig[2]];\
+ 				err[1]=precalc_err_UR_RG[table_nbr*4*4+(xx-4)*4+1] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[1]-orig[2]];\
+ 				err[2]=precalc_err_UR_RG[table_nbr*4*4+(xx-4)*4+2] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[2]-orig[2]];\
+ 				err[3]=precalc_err_UR_RG[table_nbr*4*4+(xx-4)*4+3] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[3]-orig[2]];\
+			/* end unrolled loop */\
+ 			if(err[0] > err[1])\
+ 				err[0] = err[1];\
+ 			if(err[2] > err[3])\
+ 				err[2] = err[3];\
+ 			if(err[0] > err[2])\
+ 				err[0] = err[2];\
+ 			err_this_table_upper+=err[0];\
+ 			err_this_table_right+=err[0];
+
+#define ONE_PIXEL_LL_PERCEP(table_nbr,xx)\
+			orig[0]=block_2x2[xx*4];\
+			orig[1]=block_2x2[xx*4+1];\
+			orig[2]=block_2x2[xx*4+2];\
+			/* unrolled loop for(index=0;index<4;index++)*/\
+				err[0]=precalc_err_LL_RG[table_nbr*4*4+(xx-8)*4+0] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[0]-orig[2]];\
+				err[1]=precalc_err_LL_RG[table_nbr*4*4+(xx-8)*4+1] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[1]-orig[2]];\
+				err[2]=precalc_err_LL_RG[table_nbr*4*4+(xx-8)*4+2] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[2]-orig[2]];\
+				err[3]=precalc_err_LL_RG[table_nbr*4*4+(xx-8)*4+3] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[3]-orig[2]];\
+			/* end unrolled loop*/\
+		 	if(err[0] > err[1])\
+				err[0] = err[1];\
+			if(err[2] > err[3])\
+				err[2] = err[3];\
+			if(err[0] > err[2])\
+				err[0] = err[2];\
+			err_this_table_lower+=err[0];\
+			err_this_table_left+=err[0];\
+
+#define ONE_PIXEL_LR_PERCEP(table_nbr,xx)\
+			orig[0]=block_2x2[xx*4];\
+			orig[1]=block_2x2[xx*4+1];\
+			orig[2]=block_2x2[xx*4+2];\
+			/* unrolled loop for(index=0;index<4;index++)*/\
+				err[0]=precalc_err_LR_RG[table_nbr*4*4+(xx-12)*4+0] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[0]-orig[2]];\
+				err[1]=precalc_err_LR_RG[table_nbr*4*4+(xx-12)*4+1] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[1]-orig[2]];\
+				err[2]=precalc_err_LR_RG[table_nbr*4*4+(xx-12)*4+2] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[2]-orig[2]];\
+				err[3]=precalc_err_LR_RG[table_nbr*4*4+(xx-12)*4+3] + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[approx[3]-orig[2]];\
+			/* end unrolled loop*/\
+			if(err[0] > err[1])\
+				err[0] = err[1];\
+			if(err[2] > err[3])\
+				err[2] = err[3];\
+			if(err[0] > err[2])\
+				err[0] = err[2];\
+			err_this_table_lower+=err[0];\
+			err_this_table_right+=err[0];\
+
+#define ONE_TABLE_3_PERCEP(table_nbr)\
+		err_this_table_upper = 0;\
+		err_this_table_lower = 0;\
+		err_this_table_left = 0;\
+		err_this_table_right = 0;\
+		approx[0]=clamp_table_plus_255[color_quant1[2]+compressParamsFast[table_nbr*4+0]+255];\
+		approx[1]=clamp_table_plus_255[color_quant1[2]+compressParamsFast[table_nbr*4+1]+255];\
+		approx[2]=clamp_table_plus_255[color_quant1[2]+compressParamsFast[table_nbr*4+2]+255];\
+		approx[3]=clamp_table_plus_255[color_quant1[2]+compressParamsFast[table_nbr*4+3]+255];\
+		/* unroll loop for(xx=0; xx<4; xx++) */\
+			ONE_PIXEL_UL_PERCEP(table_nbr,0)\
+			ONE_PIXEL_UL_PERCEP(table_nbr,1)\
+			ONE_PIXEL_UL_PERCEP(table_nbr,2)\
+			ONE_PIXEL_UL_PERCEP(table_nbr,3)\
+		/* end unroll loop */\
+		/* unroll loop for(xx=4; xx<8; xx++) */\
+			ONE_PIXEL_LR_PERCEP(table_nbr,12)\
+			ONE_PIXEL_LR_PERCEP(table_nbr,13)\
+			ONE_PIXEL_LR_PERCEP(table_nbr,14)\
+			ONE_PIXEL_LR_PERCEP(table_nbr,15)\
+		/* end unroll loop */\
+		/* If error in the top left 2x2 pixel area is already larger than the best error, and */\
+		/* The same is true for the bottom right 2x2 pixel area, this combination of table and color */\
+		/* can never be part of an optimal solution and therefore we do not need to test the other */\
+		/* two 2x2 pixel areas */\
+		if((err_this_table_upper<best_err)||(err_this_table_lower<best_err))\
+		{\
+			/* unroll loop for(xx=4; xx<8; xx++) */\
+ 				ONE_PIXEL_UR_PERCEP(table_nbr,4)\
+ 				ONE_PIXEL_UR_PERCEP(table_nbr,5)\
+ 				ONE_PIXEL_UR_PERCEP(table_nbr,6)\
+ 				ONE_PIXEL_UR_PERCEP(table_nbr,7)\
+			/* end unroll loop */\
+			/* unroll loop for(xx=4; xx<8; xx++) */\
+				ONE_PIXEL_LL_PERCEP(table_nbr,8)\
+				ONE_PIXEL_LL_PERCEP(table_nbr,9)\
+				ONE_PIXEL_LL_PERCEP(table_nbr,10)\
+				ONE_PIXEL_LL_PERCEP(table_nbr,11)\
+			/* end unroll loop */\
+			if(err_this_table_upper<err_upper)\
+				err_upper = err_this_table_upper;\
+			if(err_this_table_lower<err_lower)\
+				err_lower = err_this_table_lower;\
+			if(err_this_table_left<err_left)\
+				err_left = err_this_table_left;\
+			if(err_this_table_right<err_right)\
+				err_right = err_this_table_right;\
+		}\
+
+	/*unroll loop for(table_nbr=0;table_nbr<8;table_nbr++)*/
+		ONE_TABLE_3_PERCEP(0);
+		ONE_TABLE_3_PERCEP(1);
+		ONE_TABLE_3_PERCEP(2);
+		ONE_TABLE_3_PERCEP(3);
+		ONE_TABLE_3_PERCEP(4);
+		ONE_TABLE_3_PERCEP(5);
+		ONE_TABLE_3_PERCEP(6);
+		ONE_TABLE_3_PERCEP(7);
+	/*end unroll loop*/
+} 
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Compresses the individual mode exhaustively (perecptual error metric).
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockIndividualExhaustivePerceptual(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, unsigned int total_best_err)
+{
+	unsigned int best_err_norm_diff = MAXERR1000; 
+	unsigned int best_err_norm_444 = MAXERR1000; 
+	unsigned int best_err_flip_diff = MAXERR1000; 
+	unsigned int best_err_flip_444 = MAXERR1000; 
+	uint8 color_quant1[3], color_quant2[3];
+
+	int enc_color1[3];
+	int best_enc_color1[3], best_enc_color2[3];
+	
+	int min_error=MAXERR1000; 
+	unsigned int best_pixel_indices1_MSB=0;
+	unsigned int best_pixel_indices1_LSB=0;
+	unsigned int best_pixel_indices2_MSB=0;
+	unsigned int best_pixel_indices2_LSB=0;
+	unsigned int pixel_indices1_MSB=0;
+	unsigned int pixel_indices1_LSB=0;
+	unsigned int pixel_indices2_MSB=0;
+
+	unsigned int err_upper, err_lower;
+	unsigned int err_left, err_right;
+
+	unsigned int pixel_indices2_LSB=0;
+
+	unsigned int best_err_upper = MAXERR1000; 
+	unsigned int best_err_lower = MAXERR1000; 
+	unsigned int best_err_left = MAXERR1000; 
+	unsigned int best_err_right = MAXERR1000; 
+
+	int best_upper_col[3];
+	int best_lower_col[3];
+	int best_left_col[3];
+	int best_right_col[3];
+
+
+	unsigned int table1=0, table2=0;
+	unsigned int best_table1=0, best_table2=0;
+
+	unsigned int precalc_err_UL_R[8*4*4];
+	unsigned int precalc_err_UR_R[8*4*4];
+	unsigned int precalc_err_LL_R[8*4*4];
+	unsigned int precalc_err_LR_R[8*4*4];
+
+	unsigned int precalc_err_UL_RG[8*4*4];
+	unsigned int precalc_err_UR_RG[8*4*4];
+	unsigned int precalc_err_LL_RG[8*4*4];
+	unsigned int precalc_err_LR_RG[8*4*4];
+
+	int diffbit;
+	uint8 block_2x2[4*4*4];
+
+	unsigned int best_err;
+	int best_flip;
+
+	int xx,yy,count = 0;
+	// Reshuffle pixels so that the top left 2x2 pixels arrive first, then the top right 2x2 pixels etc. Also put use 4 bytes per pixel to make it 32-word aligned.
+	for(xx = 0; xx<2; xx++)
+	{
+		for(yy=0; yy<2; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 2; xx<4; xx++)
+	{
+		for(yy=0; yy<2; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 0; xx<2; xx++)
+	{
+		for(yy=2; yy<4; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 2; xx<4; xx++)
+	{
+		for(yy=2; yy<4; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+	unsigned int test1, test2;
+	best_err = (unsigned int)compressBlockOnlyIndividualAveragePerceptual1000(img, width, height, startx, starty, test1, test2, best_enc_color1, best_enc_color2, best_flip, best_err_upper, best_err_lower, best_err_left, best_err_right, best_upper_col, best_lower_col, best_left_col, best_right_col);
+	if(best_err < total_best_err)
+		total_best_err = best_err;
+
+	unsigned int tryblocks = 0;
+	unsigned int allblocks = 0;
+	int needtest;
+
+
+	for(enc_color1[0]=0; enc_color1[0]<16; enc_color1[0]++)
+	{
+		color_quant1[0] = enc_color1[0] << 4 | (enc_color1[0]);
+		if(precompute_3bittable_all_subblocksR_with_test_perceptual1000(block_2x2, color_quant1, precalc_err_UL_R, precalc_err_UR_R, precalc_err_LL_R, precalc_err_LR_R, total_best_err))
+		{
+			for(enc_color1[1]=0; enc_color1[1]<16; enc_color1[1]++)
+			{
+				color_quant1[1] = enc_color1[1] << 4 | (enc_color1[1]);
+				if(precompute_3bittable_all_subblocksRG_withtest_perceptual1000(block_2x2, color_quant1, precalc_err_UL_R, precalc_err_UR_R, precalc_err_LL_R, precalc_err_LR_R, precalc_err_UL_RG, precalc_err_UR_RG, precalc_err_LL_RG, precalc_err_LR_RG, total_best_err))
+				{
+					needtest = false;
+					for(enc_color1[2]=0; enc_color1[2]<16; enc_color1[2]++)
+					{
+ 						color_quant1[2] = enc_color1[2] << 4 | (enc_color1[2]);
+						tryalltables_3bittable_all_subblocks_using_precalc_perceptual1000(block_2x2, color_quant1, precalc_err_UL_RG, precalc_err_UR_RG, precalc_err_LL_RG, precalc_err_LR_RG, err_upper, err_lower, err_left, err_right, total_best_err);
+						if(err_upper<best_err_upper)
+						{
+							best_err_upper = err_upper;
+							best_upper_col[0] = enc_color1[0];
+							best_upper_col[1] = enc_color1[1];
+							best_upper_col[2] = enc_color1[2];
+							needtest = true;
+						}
+						if(err_lower<best_err_lower)
+						{
+							best_err_lower = err_lower;
+							best_lower_col[0] = enc_color1[0];
+							best_lower_col[1] = enc_color1[1];
+							best_lower_col[2] = enc_color1[2];
+							needtest=true;
+						}
+						if(err_left<best_err_left)
+						{
+							best_err_left = err_left;
+							best_left_col[0] = enc_color1[0];
+							best_left_col[1] = enc_color1[1];
+							best_left_col[2] = enc_color1[2];
+							needtest=true;
+						}
+						if(err_right<best_err_right)
+						{
+							best_err_right = err_right;
+							best_right_col[0] = enc_color1[0];
+							best_right_col[1] = enc_color1[1];
+							best_right_col[2] = enc_color1[2];
+							needtest = true;
+						}
+					}
+					if(needtest)
+					{
+						if(best_err_upper+best_err_lower < best_err_left+best_err_right)
+						{
+							best_err = best_err_upper+best_err_lower;
+							if(best_err < total_best_err)
+								total_best_err = best_err;
+						}
+						else
+						{
+							best_err = best_err_left+best_err_right;
+							if(best_err < total_best_err)
+								total_best_err = best_err;
+						}
+					}
+				}
+			}
+		}
+	}
+	
+	if(best_err_upper+best_err_lower < best_err_left+best_err_right)
+	{
+		best_flip = 1;
+		best_enc_color1[0] = best_upper_col[0];
+		best_enc_color1[1] = best_upper_col[1];
+		best_enc_color1[2] = best_upper_col[2];
+		best_enc_color2[0] = best_lower_col[0];
+		best_enc_color2[1] = best_lower_col[1];
+		best_enc_color2[2] = best_lower_col[2];
+		best_err = best_err_upper+best_err_lower;
+		if(best_err < total_best_err)
+			total_best_err = best_err;
+	}
+	else
+	{
+		best_flip = 0;
+		best_enc_color1[0] = best_left_col[0];
+		best_enc_color1[1] = best_left_col[1];
+		best_enc_color1[2] = best_left_col[2];
+		best_enc_color2[0] = best_right_col[0];
+		best_enc_color2[1] = best_right_col[1];
+		best_enc_color2[2] = best_right_col[2];
+		best_err = best_err_left+best_err_right;
+		if(best_err < total_best_err)
+			total_best_err = best_err;
+	}
+
+	color_quant1[0] = best_enc_color1[0] << 4 | (best_enc_color1[0]);
+	color_quant1[1] = best_enc_color1[1] << 4 | (best_enc_color1[1]);
+	color_quant1[2] = best_enc_color1[2] << 4 | (best_enc_color1[2]);
+	if(best_flip == 0)
+		tryalltables_3bittable2x4percep1000(img,width,height,startx,starty,color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	else
+		tryalltables_3bittable4x2percep1000(img,width,height,startx,starty,color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+	color_quant2[0] = best_enc_color2[0] << 4 | (best_enc_color2[0]);
+	color_quant2[1] = best_enc_color2[1] << 4 | (best_enc_color2[1]);
+	color_quant2[2] = best_enc_color2[2] << 4 | (best_enc_color2[2]);
+	if(best_flip == 0)
+		tryalltables_3bittable2x4percep1000(img,width,height,startx+2,starty,color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+	else
+		tryalltables_3bittable4x2percep1000(img,width,height,startx,starty+2,color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+	//     ETC1_RGB8_OES:
+	// 
+	//     a) bit layout in bits 63 through 32 if diffbit = 0
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+	//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	//     
+	//     b) bit layout in bits 63 through 32 if diffbit = 1
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	// 
+	//     c) bit layout in bits 31 through 0 (in both cases)
+	// 
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+	//      --------------------------------------------------------------------------------------------------
+	//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+	//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+	//      --------------------------------------------------------------------------------------------------      
+
+
+	diffbit = 1;
+	compressed1 = 0;
+	PUTBITSHIGH( compressed1, diffbit,		      0, 33);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[0], 4, 63);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[1], 4, 55);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[2], 4, 47);
+ 	PUTBITSHIGH( compressed1, best_enc_color2[0], 4, 59);
+ 	PUTBITSHIGH( compressed1, best_enc_color2[1], 4, 51);
+ 	PUTBITSHIGH( compressed1, best_enc_color2[2], 4, 43);
+ 	PUTBITSHIGH( compressed1, best_table1,	      3, 39);
+ 	PUTBITSHIGH( compressed1, best_table2,	      3, 36);
+ 	PUTBITSHIGH( compressed1, best_flip,		  1, 32);
+
+	if(best_flip == 0)
+	{
+		compressed2 = 0;
+		PUTBITS( compressed2, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+	else
+	{
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);		
+		compressed2 = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+
+	return best_err;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Compresses the individual mode exhaustively.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockIndividualExhaustive(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, unsigned int total_best_err)
+{
+	unsigned int best_err_norm_diff = 255*255*16*3;
+	unsigned int best_err_norm_444 = 255*255*16*3;
+	unsigned int best_err_flip_diff = 255*255*16*3;
+	unsigned int best_err_flip_444 = 255*255*16*3;
+	uint8 color_quant1[3], color_quant2[3];
+
+	int enc_color1[3];
+	int best_enc_color1[3], best_enc_color2[3];
+	
+	int min_error=255*255*8*3;
+	unsigned int best_pixel_indices1_MSB=0;
+	unsigned int best_pixel_indices1_LSB=0;
+	unsigned int best_pixel_indices2_MSB=0;
+	unsigned int best_pixel_indices2_LSB=0;
+	unsigned int pixel_indices1_MSB=0;
+	unsigned int pixel_indices1_LSB=0;
+	unsigned int pixel_indices2_MSB=0;
+
+	unsigned int err_upper, err_lower;
+	unsigned int err_left, err_right;
+
+	unsigned int pixel_indices2_LSB=0;
+
+	unsigned int best_err_upper = 255*255*16*3;
+	unsigned int best_err_lower = 255*255*16*3;
+	unsigned int best_err_left = 255*255*16*3;
+	unsigned int best_err_right = 255*255*16*3;
+
+	int best_upper_col[3];
+	int best_lower_col[3];
+	int best_left_col[3];
+	int best_right_col[3];
+
+
+	unsigned int table1=0, table2=0;
+	unsigned int best_table1=0, best_table2=0;
+
+	unsigned int precalc_err_UL_R[8*4*4];
+	unsigned int precalc_err_UR_R[8*4*4];
+	unsigned int precalc_err_LL_R[8*4*4];
+	unsigned int precalc_err_LR_R[8*4*4];
+
+	unsigned int precalc_err_UL_RG[8*4*4];
+	unsigned int precalc_err_UR_RG[8*4*4];
+	unsigned int precalc_err_LL_RG[8*4*4];
+	unsigned int precalc_err_LR_RG[8*4*4];
+
+	int diffbit;
+	uint8 block_2x2[4*4*4];
+
+	unsigned int best_err;
+	int best_flip;
+
+	int xx,yy,count = 0;
+	// Reshuffle pixels so that the top left 2x2 pixels arrive first, then the top right 2x2 pixels etc. Also put use 4 bytes per pixel to make it 32-word aligned.
+	for(xx = 0; xx<2; xx++)
+	{
+		for(yy=0; yy<2; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 2; xx<4; xx++)
+	{
+		for(yy=0; yy<2; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 0; xx<2; xx++)
+	{
+		for(yy=2; yy<4; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 2; xx<4; xx++)
+	{
+		for(yy=2; yy<4; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+ 	unsigned int test1, test2;
+	best_err = (unsigned int)compressBlockOnlyIndividualAverage(img, width, height, startx, starty, test1, test2, best_enc_color1, best_enc_color2, best_flip, best_err_upper, best_err_lower, best_err_left, best_err_right, best_upper_col, best_lower_col, best_left_col, best_right_col);
+
+	if(best_err < total_best_err)
+		total_best_err = best_err;
+
+
+	unsigned int tryblocks = 0;
+	unsigned int allblocks = 0;
+	int needtest;
+
+	for(enc_color1[0]=0; enc_color1[0]<16; enc_color1[0]++)
+	{
+		color_quant1[0] = enc_color1[0] << 4 | (enc_color1[0]);
+		if(precompute_3bittable_all_subblocksR_with_test(block_2x2, color_quant1, precalc_err_UL_R, precalc_err_UR_R, precalc_err_LL_R, precalc_err_LR_R, total_best_err))
+		{
+			for(enc_color1[1]=0; enc_color1[1]<16; enc_color1[1]++)
+			{
+				color_quant1[1] = enc_color1[1] << 4 | (enc_color1[1]);
+				if(precompute_3bittable_all_subblocksRG_withtest(block_2x2, color_quant1, precalc_err_UL_R, precalc_err_UR_R, precalc_err_LL_R, precalc_err_LR_R, precalc_err_UL_RG, precalc_err_UR_RG, precalc_err_LL_RG, precalc_err_LR_RG, total_best_err))
+				{
+					needtest = false;
+					for(enc_color1[2]=0; enc_color1[2]<16; enc_color1[2]++)
+					{
+ 						color_quant1[2] = enc_color1[2] << 4 | (enc_color1[2]);
+						tryalltables_3bittable_all_subblocks_using_precalc(block_2x2, color_quant1, precalc_err_UL_RG, precalc_err_UR_RG, precalc_err_LL_RG, precalc_err_LR_RG, err_upper, err_lower, err_left, err_right, total_best_err);
+						if(err_upper<best_err_upper)
+						{
+							best_err_upper = err_upper;
+							best_upper_col[0] = enc_color1[0];
+							best_upper_col[1] = enc_color1[1];
+							best_upper_col[2] = enc_color1[2];
+							needtest = true;
+						}
+						if(err_lower<best_err_lower)
+						{
+							best_err_lower = err_lower;
+							best_lower_col[0] = enc_color1[0];
+							best_lower_col[1] = enc_color1[1];
+							best_lower_col[2] = enc_color1[2];
+							needtest=true;
+						}
+						if(err_left<best_err_left)
+						{
+							best_err_left = err_left;
+							best_left_col[0] = enc_color1[0];
+							best_left_col[1] = enc_color1[1];
+							best_left_col[2] = enc_color1[2];
+							needtest=true;
+						}
+						if(err_right<best_err_right)
+						{
+							best_err_right = err_right;
+							best_right_col[0] = enc_color1[0];
+							best_right_col[1] = enc_color1[1];
+							best_right_col[2] = enc_color1[2];
+							needtest = true;
+						}
+					}
+					if(needtest)
+					{
+						if(best_err_upper+best_err_lower < best_err_left+best_err_right)
+						{
+							best_err = best_err_upper+best_err_lower;
+							if(best_err < total_best_err)
+								total_best_err = best_err;
+						}
+						else
+						{
+							best_err = best_err_left+best_err_right;
+							if(best_err < total_best_err)
+								total_best_err = best_err;
+						}
+					}
+				}
+			}
+		}
+	}
+	if(best_err_upper+best_err_lower < best_err_left+best_err_right)
+	{
+		best_flip = 1;
+		best_enc_color1[0] = best_upper_col[0];
+		best_enc_color1[1] = best_upper_col[1];
+		best_enc_color1[2] = best_upper_col[2];
+		best_enc_color2[0] = best_lower_col[0];
+		best_enc_color2[1] = best_lower_col[1];
+		best_enc_color2[2] = best_lower_col[2];
+		best_err = best_err_upper+best_err_lower;
+		if(best_err < total_best_err)
+			total_best_err = best_err;
+	}
+	else
+	{
+		best_flip = 0;
+		best_enc_color1[0] = best_left_col[0];
+		best_enc_color1[1] = best_left_col[1];
+		best_enc_color1[2] = best_left_col[2];
+		best_enc_color2[0] = best_right_col[0];
+		best_enc_color2[1] = best_right_col[1];
+		best_enc_color2[2] = best_right_col[2];
+		best_err = best_err_left+best_err_right;
+		if(best_err < total_best_err)
+			total_best_err = best_err;
+	}
+	color_quant1[0] = best_enc_color1[0] << 4 | (best_enc_color1[0]);
+	color_quant1[1] = best_enc_color1[1] << 4 | (best_enc_color1[1]);
+	color_quant1[2] = best_enc_color1[2] << 4 | (best_enc_color1[2]);
+	if(best_flip == 0)
+		tryalltables_3bittable2x4(img,width,height,startx,starty,color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	else
+		tryalltables_3bittable4x2(img,width,height,startx,starty,color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+	color_quant2[0] = best_enc_color2[0] << 4 | (best_enc_color2[0]);
+	color_quant2[1] = best_enc_color2[1] << 4 | (best_enc_color2[1]);
+	color_quant2[2] = best_enc_color2[2] << 4 | (best_enc_color2[2]);
+	if(best_flip == 0)
+		tryalltables_3bittable2x4(img,width,height,startx+2,starty,color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+	else
+		tryalltables_3bittable4x2(img,width,height,startx,starty+2,color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+	//     ETC1_RGB8_OES:
+	// 
+	//     a) bit layout in bits 63 through 32 if diffbit = 0
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+	//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	//     
+	//     b) bit layout in bits 63 through 32 if diffbit = 1
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	// 
+	//     c) bit layout in bits 31 through 0 (in both cases)
+	// 
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+	//      --------------------------------------------------------------------------------------------------
+	//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+	//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+	//      --------------------------------------------------------------------------------------------------      
+
+	diffbit = 1;
+	compressed1 = 0;
+	PUTBITSHIGH( compressed1, diffbit,		      0, 33);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[0], 4, 63);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[1], 4, 55);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[2], 4, 47);
+ 	PUTBITSHIGH( compressed1, best_enc_color2[0], 4, 59);
+ 	PUTBITSHIGH( compressed1, best_enc_color2[1], 4, 51);
+ 	PUTBITSHIGH( compressed1, best_enc_color2[2], 4, 43);
+ 	PUTBITSHIGH( compressed1, best_table1,	      3, 39);
+ 	PUTBITSHIGH( compressed1, best_table2,	      3, 36);
+ 	PUTBITSHIGH( compressed1, best_flip,		  1, 32);
+
+	if(best_flip == 0)
+	{
+		compressed2 = 0;
+		PUTBITS( compressed2, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+	else
+	{
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);		
+		compressed2 = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+
+	return best_err;
+}
+#endif
+ 
+#if EXHAUSTIVE_CODE_ACTIVE
+// Compresses the differential mode exhaustively (perecptual error metric).
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockDifferentialExhaustivePerceptual(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, unsigned int best_error_so_far)
+{
+	unsigned int best_err_norm_diff = MAXERR1000;
+	unsigned int best_err_norm_444 = MAXERR1000;
+	unsigned int best_err_flip_diff = MAXERR1000;
+	unsigned int best_err_flip_444 = MAXERR1000;
+	uint8 color_quant1[3], color_quant2[3];
+
+	int enc_color1[3], enc_color2[3], diff[3];
+	int best_enc_color1[3], best_enc_color2[3];
+	signed char bytediff[3];
+	
+	unsigned int best_pixel_indices1_MSB=0;
+	unsigned int best_pixel_indices1_LSB=0;
+	unsigned int best_pixel_indices2_MSB=0;
+	unsigned int best_pixel_indices2_LSB=0;
+	unsigned int pixel_indices1_MSB=0;
+	unsigned int pixel_indices1_LSB=0;
+	unsigned int pixel_indices2_MSB=0;
+
+	unsigned int *err_upper, *err_lower;
+	unsigned int *err_left, *err_right;
+
+	unsigned int pixel_indices2_LSB=0;
+
+	unsigned int table1=0, table2=0;
+	unsigned int best_table1=0, best_table2=0;
+
+	unsigned int precalc_err_UL_R[8*4*4];
+	unsigned int precalc_err_UR_R[8*4*4];
+	unsigned int precalc_err_LL_R[8*4*4];
+	unsigned int precalc_err_LR_R[8*4*4];
+
+	unsigned int precalc_err_UL_RG[8*4*4];
+	unsigned int precalc_err_UR_RG[8*4*4];
+	unsigned int precalc_err_LL_RG[8*4*4];
+	unsigned int precalc_err_LR_RG[8*4*4];
+
+	unsigned int best_error_using_diff_mode;
+
+	int diffbit;
+	uint8 block_2x2[4*4*4];
+
+	unsigned int error, error_lying, error_standing;
+	unsigned int *err_lower_adr;
+	int best_flip;
+	unsigned int *err_right_adr;
+
+	int xx,yy,count = 0;
+
+	// Reshuffle pixels so that the top left 2x2 pixels arrive first, then the top right 2x2 pixels etc. Also put use 4 bytes per pixel to make it 32-word aligned.
+	for(xx = 0; xx<2; xx++)
+	{
+		for(yy=0; yy<2; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 2; xx<4; xx++)
+	{
+		for(yy=0; yy<2; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 0; xx<2; xx++)
+	{
+		for(yy=2; yy<4; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 2; xx<4; xx++)
+	{
+		for(yy=2; yy<4; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+ 	unsigned int test1, test2;
+	best_error_using_diff_mode = compressBlockOnlyDiffFlipAveragePerceptual1000(img, width, height, startx, starty, test1, test2);
+	if(best_error_using_diff_mode < best_error_so_far)
+		best_error_so_far = best_error_using_diff_mode;
+
+	// Decode the parameters so that we have a worst case color pair and a flip status
+	best_flip = test1 & 1;
+	best_enc_color1[0] = GETBITSHIGH( test1, 5, 63);
+	best_enc_color1[1] = GETBITSHIGH( test1, 5, 55);
+	best_enc_color1[2] = GETBITSHIGH( test1, 5, 47);
+	bytediff[0] = GETBITSHIGH( test1, 3, 58);
+	bytediff[1] = GETBITSHIGH( test1, 3, 50);
+	bytediff[2] = GETBITSHIGH( test1, 3, 42);
+	bytediff[0] = (bytediff[0] << 5);
+	bytediff[1] = (bytediff[1] << 5);
+	bytediff[2] = (bytediff[2] << 5);
+	bytediff[0] = bytediff[0] >> 5;
+	bytediff[1] = bytediff[1] >> 5;
+	bytediff[2] = bytediff[2] >> 5;
+	best_enc_color2[0]= best_enc_color1[0] + bytediff[0];
+	best_enc_color2[1]= best_enc_color1[1] + bytediff[1];
+	best_enc_color2[2]= best_enc_color1[2] + bytediff[2];
+
+	// allocate memory for errors:
+	err_upper = (unsigned int*) malloc(32*32*32*sizeof(unsigned int));
+	if(!err_upper){printf("Out of memory allocating \n");exit(1);}
+	err_lower = (unsigned int*) malloc(32*32*32*sizeof(unsigned int));
+	if(!err_lower){printf("Out of memory allocating \n");exit(1);}
+	err_left = (unsigned int*) malloc(32*32*32*sizeof(unsigned int));
+	if(!err_left){printf("Out of memory allocating \n");exit(1);}
+	err_right = (unsigned int*) malloc(32*32*32*sizeof(unsigned int));
+	if(!err_right){printf("Out of memory allocating \n");exit(1);}
+
+	int q;
+	// Calculate all errors
+	for(enc_color1[0]=0; enc_color1[0]<32; enc_color1[0]++)
+	{
+		color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		if(precompute_3bittable_all_subblocksR_with_test_perceptual1000(block_2x2, color_quant1, precalc_err_UL_R, precalc_err_UR_R, precalc_err_LL_R, precalc_err_LR_R, best_error_so_far))
+		{
+			for(enc_color1[1]=0; enc_color1[1]<32; enc_color1[1]++)
+			{
+				color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+				if(precompute_3bittable_all_subblocksRG_withtest_perceptual1000(block_2x2, color_quant1, precalc_err_UL_R, precalc_err_UR_R, precalc_err_LL_R, precalc_err_LR_R, precalc_err_UL_RG, precalc_err_UR_RG, precalc_err_LL_RG, precalc_err_LR_RG, best_error_so_far))
+				{
+					for(enc_color1[2]=0; enc_color1[2]<32; enc_color1[2]++)
+					{
+ 						color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+						tryalltables_3bittable_all_subblocks_using_precalc_perceptual1000(block_2x2, color_quant1, precalc_err_UL_RG, precalc_err_UR_RG, precalc_err_LL_RG, precalc_err_LR_RG, err_upper[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]], err_lower[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]], err_left[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]], err_right[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]], best_error_so_far);
+					}
+				}
+				else
+				{
+					for(q=0;q<32;q++)
+					{
+						err_upper[32*32*enc_color1[0]+32*enc_color1[1]+q] = MAXERR1000;
+						err_lower[32*32*enc_color1[0]+32*enc_color1[1]+q] = MAXERR1000;
+						err_left[32*32*enc_color1[0]+32*enc_color1[1]+q] = MAXERR1000;
+						err_right[32*32*enc_color1[0]+32*enc_color1[1]+q] = MAXERR1000;
+					}
+				}
+			}
+		}
+		else
+		{
+			for(q=0;q<32*32;q++)
+			{
+				err_upper[32*32*enc_color1[0]+q] = MAXERR1000;
+				err_lower[32*32*enc_color1[0]+q] = MAXERR1000;
+				err_left[32*32*enc_color1[0]+q] = MAXERR1000;
+				err_right[32*32*enc_color1[0]+q] = MAXERR1000;
+			}
+		}
+	}
+	for(enc_color1[0]=0; enc_color1[0]<32; enc_color1[0]++)
+	{
+		for(enc_color1[1]=0; enc_color1[1]<32; enc_color1[1]++)
+		{
+			for(enc_color1[2]=0; enc_color1[2]<4; enc_color1[2]++)
+			{
+				error_lying = err_upper[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				error_standing = err_left[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				if(error_lying < best_error_so_far || error_standing < best_error_so_far)
+				{
+					for(enc_color2[0]=JAS_MAX(0,enc_color1[0]-4); enc_color2[0]<JAS_MIN(enc_color1[0]+4,32); enc_color2[0]++)
+					{
+						for(enc_color2[1]=JAS_MAX(0,enc_color1[1]-4); enc_color2[1]<JAS_MIN(enc_color1[1]+4,32); enc_color2[1]++)
+						{
+							err_lower_adr = &err_lower[32*32*enc_color2[0]+32*enc_color2[1]];
+							err_right_adr = &err_right[32*32*enc_color2[0]+32*enc_color2[1]];
+							for(enc_color2[2]=JAS_MAX(0,enc_color1[2]-4); enc_color2[2]<JAS_MIN(enc_color1[2]+4,32); enc_color2[2]++)
+							{
+								error = error_lying+err_lower_adr[enc_color2[2]];
+								if(error<best_error_so_far)
+								{
+									best_flip = 1;
+									best_error_so_far = error;
+									best_error_using_diff_mode = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+								error = error_standing+err_right_adr[enc_color2[2]];
+								if(error<best_error_so_far)
+								{
+									best_flip = 0;
+									best_error_so_far = error;
+									best_error_using_diff_mode = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+							}
+						}
+					}
+				}
+			}
+			for(enc_color1[2]=4; enc_color1[2]<28; enc_color1[2]++)
+			{
+				error_lying = err_upper[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				error_standing = err_left[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				if(error_lying < best_error_so_far || error_standing < best_error_so_far)
+				{
+					for(enc_color2[0]=JAS_MAX(0,enc_color1[0]-4); enc_color2[0]<JAS_MIN(enc_color1[0]+4,32); enc_color2[0]++)
+					{
+						for(enc_color2[1]=JAS_MAX(0,enc_color1[1]-4); enc_color2[1]<JAS_MIN(enc_color1[1]+4,32); enc_color2[1]++)
+						{
+							err_lower_adr = &err_lower[32*32*enc_color2[0]+32*enc_color2[1]];
+							err_right_adr = &err_right[32*32*enc_color2[0]+32*enc_color2[1]];
+							// since enc_color[2] is between 4 and 29 we do not need to clamp the loop on the next line 
+							for(enc_color2[2]=enc_color1[2]-4; enc_color2[2]<enc_color1[2]+4; enc_color2[2]++)
+							{
+								error = error_lying+err_lower_adr[enc_color2[2]];
+								if(error<best_error_so_far)
+								{
+									best_flip = 1;
+									best_error_so_far = error;
+									best_error_using_diff_mode = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+								error = error_standing+err_right_adr[enc_color2[2]];
+								if(error<best_error_so_far)
+								{
+									best_flip = 0;
+									best_error_so_far = error;
+									best_error_using_diff_mode = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+							}
+						}
+					}
+				}
+			}
+			for(enc_color1[2]=28; enc_color1[2]<32; enc_color1[2]++)
+			{
+				error_lying = err_upper[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				error_standing = err_left[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				if(error_lying < best_error_so_far || error_standing < best_error_so_far)
+				{
+					for(enc_color2[0]=JAS_MAX(0,enc_color1[0]-4); enc_color2[0]<JAS_MIN(enc_color1[0]+4,32); enc_color2[0]++)
+					{
+						for(enc_color2[1]=JAS_MAX(0,enc_color1[1]-4); enc_color2[1]<JAS_MIN(enc_color1[1]+4,32); enc_color2[1]++)
+						{
+							err_lower_adr = &err_lower[32*32*enc_color2[0]+32*enc_color2[1]];
+							err_right_adr = &err_right[32*32*enc_color2[0]+32*enc_color2[1]];
+							for(enc_color2[2]=JAS_MAX(0,enc_color1[2]-4); enc_color2[2]<JAS_MIN(enc_color1[2]+4,32); enc_color2[2]++)
+							{
+								error = error_lying+err_lower_adr[enc_color2[2]];
+								if(error<best_error_so_far)
+								{
+									best_flip = 1;
+									best_error_so_far = error;
+									best_error_using_diff_mode = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+								error = error_standing+err_right_adr[enc_color2[2]];
+								if(error<best_error_so_far)
+								{
+									best_flip = 0;
+									best_error_so_far = error;
+									best_error_using_diff_mode = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+ 	}
+
+ 	free(err_upper);
+ 	free(err_lower);
+ 	free(err_left);
+ 	free(err_right);
+
+	color_quant1[0] = best_enc_color1[0] << 3 | (best_enc_color1[0] >> 2);
+	color_quant1[1] = best_enc_color1[1] << 3 | (best_enc_color1[1] >> 2);
+	color_quant1[2] = best_enc_color1[2] << 3 | (best_enc_color1[2] >> 2);
+	if(best_flip == 0)
+		tryalltables_3bittable2x4percep1000(img,width,height,startx,starty,color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	else
+		tryalltables_3bittable4x2percep1000(img,width,height,startx,starty,color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+	color_quant2[0] = best_enc_color2[0] << 3 | (best_enc_color2[0] >> 2);
+	color_quant2[1] = best_enc_color2[1] << 3 | (best_enc_color2[1] >> 2);
+	color_quant2[2] = best_enc_color2[2] << 3 | (best_enc_color2[2] >> 2);
+	if(best_flip == 0)
+		tryalltables_3bittable2x4percep1000(img,width,height,startx+2,starty,color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+	else
+		tryalltables_3bittable4x2percep1000(img,width,height,startx,starty+2,color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+	diff[0] = best_enc_color2[0]-best_enc_color1[0];	
+	diff[1] = best_enc_color2[1]-best_enc_color1[1];	
+	diff[2] = best_enc_color2[2]-best_enc_color1[2];
+
+	//     ETC1_RGB8_OES:
+	// 
+	//     a) bit layout in bits 63 through 32 if diffbit = 0
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+	//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	//     
+	//     b) bit layout in bits 63 through 32 if diffbit = 1
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	// 
+	//     c) bit layout in bits 31 through 0 (in both cases)
+	// 
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+	//      --------------------------------------------------------------------------------------------------
+	//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+	//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+	//      --------------------------------------------------------------------------------------------------      
+
+	diffbit = 1;
+	compressed1 = 0;
+	PUTBITSHIGH( compressed1, diffbit,		      1, 33);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[0], 5, 63);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[1], 5, 55);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[2], 5, 47);
+ 	PUTBITSHIGH( compressed1, diff[0],            3, 58);
+ 	PUTBITSHIGH( compressed1, diff[1],            3, 50);
+ 	PUTBITSHIGH( compressed1, diff[2],            3, 42);
+ 	PUTBITSHIGH( compressed1, best_table1,	      3, 39);
+ 	PUTBITSHIGH( compressed1, best_table2,	      3, 36);
+ 	PUTBITSHIGH( compressed1, best_flip,		  1, 32);
+
+	if(best_flip == 0)
+	{
+		compressed2 = 0;
+		PUTBITS( compressed2, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+	else
+	{
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);		
+		compressed2 = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+	return best_error_using_diff_mode;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Compresses the differential mode exhaustively.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockDifferentialExhaustive(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, unsigned int previous_best_err)
+{
+	unsigned int best_err_norm_diff = 255*255*16*3;
+	unsigned int best_err_norm_444 = 255*255*16*3;
+	unsigned int best_err_flip_diff = 255*255*16*3;
+	unsigned int best_err_flip_444 = 255*255*16*3;
+	uint8 color_quant1[3], color_quant2[3];
+
+	int enc_color1[3], enc_color2[3], diff[3];
+	int best_enc_color1[3], best_enc_color2[3];
+	
+	int min_error=255*255*8*3;
+	unsigned int best_pixel_indices1_MSB=0;
+	unsigned int best_pixel_indices1_LSB=0;
+	unsigned int best_pixel_indices2_MSB=0;
+	unsigned int best_pixel_indices2_LSB=0;
+	unsigned int pixel_indices1_MSB=0;
+	unsigned int pixel_indices1_LSB=0;
+	unsigned int pixel_indices2_MSB=0;
+
+	unsigned int *err_upper, *err_lower;
+	unsigned int *err_left, *err_right;
+
+	unsigned int pixel_indices2_LSB=0;
+
+	unsigned int table1=0, table2=0;
+	unsigned int best_table1=0, best_table2=0;
+
+	unsigned int precalc_err_UL_R[8*4*4];
+	unsigned int precalc_err_UR_R[8*4*4];
+	unsigned int precalc_err_LL_R[8*4*4];
+	unsigned int precalc_err_LR_R[8*4*4];
+
+	unsigned int precalc_err_UL_RG[8*4*4];
+	unsigned int precalc_err_UR_RG[8*4*4];
+	unsigned int precalc_err_LL_RG[8*4*4];
+	unsigned int precalc_err_LR_RG[8*4*4];
+
+	int diffbit;
+	uint8 block_2x2[4*4*4];
+
+	unsigned int error, error_lying, error_standing, best_err, total_best_err;
+	unsigned int *err_lower_adr;
+	int best_flip;
+	unsigned int *err_right_adr;
+
+	int xx,yy,count = 0;
+
+	// Reshuffle pixels so that the top left 2x2 pixels arrive first, then the top right 2x2 pixels etc. Also put use 4 bytes per pixel to make it 32-word aligned.
+	for(xx = 0; xx<2; xx++)
+	{
+		for(yy=0; yy<2; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 2; xx<4; xx++)
+	{
+		for(yy=0; yy<2; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 0; xx<2; xx++)
+	{
+		for(yy=2; yy<4; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+	for(xx = 2; xx<4; xx++)
+	{
+		for(yy=2; yy<4; yy++)
+		{
+			block_2x2[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block_2x2[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block_2x2[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block_2x2[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+
+	unsigned int test1, test2;
+	best_err = (unsigned int)compressBlockOnlyDiffFlipAverage(img, width, height, startx, starty, test1, test2, best_enc_color1, best_enc_color2, best_flip);
+	if(previous_best_err < best_err)
+		total_best_err = previous_best_err;
+	else
+		total_best_err = best_err;
+
+	// allocate memory for errors:
+	err_upper = (unsigned int*) malloc(32*32*32*sizeof(unsigned int));
+	if(!err_upper){printf("Out of memory allocating \n");exit(1);}
+	err_lower = (unsigned int*) malloc(32*32*32*sizeof(unsigned int));
+	if(!err_lower){printf("Out of memory allocating \n");exit(1);}
+	err_left = (unsigned int*) malloc(32*32*32*sizeof(unsigned int));
+	if(!err_left){printf("Out of memory allocating \n");exit(1);}
+	err_right = (unsigned int*) malloc(32*32*32*sizeof(unsigned int));
+	if(!err_right){printf("Out of memory allocating \n");exit(1);}
+
+	int q;
+	// Calculate all errors
+	for(enc_color1[0]=0; enc_color1[0]<32; enc_color1[0]++)
+	{
+		color_quant1[0] = enc_color1[0] << 3 | (enc_color1[0] >> 2);
+		if(precompute_3bittable_all_subblocksR_with_test(block_2x2, color_quant1, precalc_err_UL_R, precalc_err_UR_R, precalc_err_LL_R, precalc_err_LR_R, total_best_err))
+		{
+			for(enc_color1[1]=0; enc_color1[1]<32; enc_color1[1]++)
+			{
+				color_quant1[1] = enc_color1[1] << 3 | (enc_color1[1] >> 2);
+				if(precompute_3bittable_all_subblocksRG_withtest(block_2x2, color_quant1, precalc_err_UL_R, precalc_err_UR_R, precalc_err_LL_R, precalc_err_LR_R, precalc_err_UL_RG, precalc_err_UR_RG, precalc_err_LL_RG, precalc_err_LR_RG, total_best_err))
+				{
+					for(enc_color1[2]=0; enc_color1[2]<32; enc_color1[2]++)
+					{
+ 						color_quant1[2] = enc_color1[2] << 3 | (enc_color1[2] >> 2);
+						tryalltables_3bittable_all_subblocks_using_precalc(block_2x2, color_quant1, precalc_err_UL_RG, precalc_err_UR_RG, precalc_err_LL_RG, precalc_err_LR_RG, err_upper[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]], err_lower[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]], err_left[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]], err_right[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]], total_best_err);
+					}
+				}
+				else
+				{
+					for(q=0;q<32;q++)
+					{
+						err_upper[32*32*enc_color1[0]+32*enc_color1[1]+q] = 255*255*16*3;
+						err_lower[32*32*enc_color1[0]+32*enc_color1[1]+q] = 255*255*16*3;
+						err_left[32*32*enc_color1[0]+32*enc_color1[1]+q] = 255*255*16*3;
+						err_right[32*32*enc_color1[0]+32*enc_color1[1]+q] = 255*255*16*3;
+					}
+				}
+			}
+		}
+		else
+		{
+			for(q=0;q<32*32;q++)
+			{
+				err_upper[32*32*enc_color1[0]+q] = 255*255*16*3;
+				err_lower[32*32*enc_color1[0]+q] = 255*255*16*3;
+				err_left[32*32*enc_color1[0]+q] = 255*255*16*3;
+				err_right[32*32*enc_color1[0]+q] = 255*255*16*3;
+			}
+		}
+	}
+
+	for(enc_color1[0]=0; enc_color1[0]<32; enc_color1[0]++)
+	{
+		for(enc_color1[1]=0; enc_color1[1]<32; enc_color1[1]++)
+		{
+			for(enc_color1[2]=0; enc_color1[2]<4; enc_color1[2]++)
+			{
+				error_lying = err_upper[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				error_standing = err_left[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				if(error_lying < total_best_err || error_standing < total_best_err)
+				{
+					for(enc_color2[0]=JAS_MAX(0,enc_color1[0]-4); enc_color2[0]<JAS_MIN(enc_color1[0]+4,32); enc_color2[0]++)
+					{
+						for(enc_color2[1]=JAS_MAX(0,enc_color1[1]-4); enc_color2[1]<JAS_MIN(enc_color1[1]+4,32); enc_color2[1]++)
+						{
+							err_lower_adr = &err_lower[32*32*enc_color2[0]+32*enc_color2[1]];
+							err_right_adr = &err_right[32*32*enc_color2[0]+32*enc_color2[1]];
+							for(enc_color2[2]=JAS_MAX(0,enc_color1[2]-4); enc_color2[2]<JAS_MIN(enc_color1[2]+4,32); enc_color2[2]++)
+							{
+								error = error_lying+err_lower_adr[enc_color2[2]];
+								if(error<best_err)
+								{
+									best_flip = 1;
+									best_err = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+								error = error_standing+err_right_adr[enc_color2[2]];
+								if(error<best_err)
+								{
+									best_flip = 0;
+									best_err = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+							}
+						}
+					}
+					if(best_err < total_best_err)
+						total_best_err = best_err;
+				}
+			}
+			for(enc_color1[2]=4; enc_color1[2]<28; enc_color1[2]++)
+			{
+				error_lying = err_upper[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				error_standing = err_left[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				if(error_lying < total_best_err || error_standing < total_best_err)
+				{
+					for(enc_color2[0]=JAS_MAX(0,enc_color1[0]-4); enc_color2[0]<JAS_MIN(enc_color1[0]+4,32); enc_color2[0]++)
+					{
+						for(enc_color2[1]=JAS_MAX(0,enc_color1[1]-4); enc_color2[1]<JAS_MIN(enc_color1[1]+4,32); enc_color2[1]++)
+						{
+							err_lower_adr = &err_lower[32*32*enc_color2[0]+32*enc_color2[1]];
+							err_right_adr = &err_right[32*32*enc_color2[0]+32*enc_color2[1]];
+							// since enc_color[2] is between 4 and 29 we do not need to clamp the loop on the next line 
+							for(enc_color2[2]=enc_color1[2]-4; enc_color2[2]<enc_color1[2]+4; enc_color2[2]++)
+							{
+								error = error_lying+err_lower_adr[enc_color2[2]];
+								if(error<best_err)
+								{
+									best_flip = 1;
+									best_err = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+								error = error_standing+err_right_adr[enc_color2[2]];
+								if(error<best_err)
+								{
+									best_flip = 0;
+									best_err = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+							}
+						}
+					}
+					if(best_err < total_best_err)
+						total_best_err = best_err;
+
+				}
+			}
+			for(enc_color1[2]=28; enc_color1[2]<32; enc_color1[2]++)
+			{
+				error_lying = err_upper[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				error_standing = err_left[32*32*enc_color1[0]+32*enc_color1[1]+enc_color1[2]];
+				if(error_lying < total_best_err || error_standing < total_best_err)
+				{
+					for(enc_color2[0]=JAS_MAX(0,enc_color1[0]-4); enc_color2[0]<JAS_MIN(enc_color1[0]+4,32); enc_color2[0]++)
+					{
+						for(enc_color2[1]=JAS_MAX(0,enc_color1[1]-4); enc_color2[1]<JAS_MIN(enc_color1[1]+4,32); enc_color2[1]++)
+						{
+							err_lower_adr = &err_lower[32*32*enc_color2[0]+32*enc_color2[1]];
+							err_right_adr = &err_right[32*32*enc_color2[0]+32*enc_color2[1]];
+							for(enc_color2[2]=JAS_MAX(0,enc_color1[2]-4); enc_color2[2]<JAS_MIN(enc_color1[2]+4,32); enc_color2[2]++)
+							{
+								error = error_lying+err_lower_adr[enc_color2[2]];
+								if(error<best_err)
+								{
+									best_flip = 1;
+									best_err = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+								error = error_standing+err_right_adr[enc_color2[2]];
+								if(error<best_err)
+								{
+									best_flip = 0;
+									best_err = error;
+									best_enc_color1[0] = enc_color1[0];
+									best_enc_color1[1] = enc_color1[1];
+									best_enc_color1[2] = enc_color1[2];
+									best_enc_color2[0] = enc_color2[0];
+									best_enc_color2[1] = enc_color2[1];
+									best_enc_color2[2] = enc_color2[2];
+								}
+							}
+						}
+					}
+					if(best_err < total_best_err)
+						total_best_err = best_err;
+				}
+			}
+		}
+ 	}
+
+	free(err_upper);
+	free(err_lower);
+	free(err_left);
+	free(err_right);
+
+
+	color_quant1[0] = best_enc_color1[0] << 3 | (best_enc_color1[0] >> 2);
+	color_quant1[1] = best_enc_color1[1] << 3 | (best_enc_color1[1] >> 2);
+	color_quant1[2] = best_enc_color1[2] << 3 | (best_enc_color1[2] >> 2);
+	if(best_flip == 0)
+		tryalltables_3bittable2x4(img,width,height,startx,starty,color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+	else
+		tryalltables_3bittable4x2(img,width,height,startx,starty,color_quant1,best_table1,best_pixel_indices1_MSB, best_pixel_indices1_LSB);
+
+	color_quant2[0] = best_enc_color2[0] << 3 | (best_enc_color2[0] >> 2);
+	color_quant2[1] = best_enc_color2[1] << 3 | (best_enc_color2[1] >> 2);
+	color_quant2[2] = best_enc_color2[2] << 3 | (best_enc_color2[2] >> 2);
+	if(best_flip == 0)
+		tryalltables_3bittable2x4(img,width,height,startx+2,starty,color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+	else
+		tryalltables_3bittable4x2(img,width,height,startx,starty+2,color_quant2,best_table2,best_pixel_indices2_MSB, best_pixel_indices2_LSB);
+
+	diff[0] = best_enc_color2[0]-best_enc_color1[0];	
+	diff[1] = best_enc_color2[1]-best_enc_color1[1];	
+	diff[2] = best_enc_color2[2]-best_enc_color1[2];
+
+	//     ETC1_RGB8_OES:
+	// 
+	//     a) bit layout in bits 63 through 32 if diffbit = 0
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1 | base col2 | base col1 | base col2 | base col1 | base col2 | table  | table  |diff|flip|
+	//     | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)| B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	//     
+	//     b) bit layout in bits 63 through 32 if diffbit = 1
+	// 
+	//      63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32 
+	//      ---------------------------------------------------------------------------------------------------
+	//     | base col1    | dcol 2 | base col1    | dcol 2 | base col 1   | dcol 2 | table  | table  |diff|flip|
+	//     | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
+	//      ---------------------------------------------------------------------------------------------------
+	// 
+	//     c) bit layout in bits 31 through 0 (in both cases)
+	// 
+	//      31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
+	//      --------------------------------------------------------------------------------------------------
+	//     |       most significant pixel index bits       |         least significant pixel index bits       |  
+	//     | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
+	//      --------------------------------------------------------------------------------------------------      
+
+	diffbit = 1;
+	compressed1 = 0;
+	PUTBITSHIGH( compressed1, diffbit,		      1, 33);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[0], 5, 63);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[1], 5, 55);
+ 	PUTBITSHIGH( compressed1, best_enc_color1[2], 5, 47);
+ 	PUTBITSHIGH( compressed1, diff[0],            3, 58);
+ 	PUTBITSHIGH( compressed1, diff[1],            3, 50);
+ 	PUTBITSHIGH( compressed1, diff[2],            3, 42);
+ 	PUTBITSHIGH( compressed1, best_table1,	      3, 39);
+ 	PUTBITSHIGH( compressed1, best_table2,	      3, 36);
+ 	PUTBITSHIGH( compressed1, best_flip,		  1, 32);
+
+	if(best_flip == 0)
+	{
+		compressed2 = 0;
+		PUTBITS( compressed2, (best_pixel_indices1_MSB     ), 8, 23);
+		PUTBITS( compressed2, (best_pixel_indices2_MSB     ), 8, 31);
+		PUTBITS( compressed2, (best_pixel_indices1_LSB     ), 8, 7);
+		PUTBITS( compressed2, (best_pixel_indices2_LSB     ), 8, 15);
+	}
+	else
+	{
+		best_pixel_indices1_MSB |= (best_pixel_indices2_MSB << 2);
+		best_pixel_indices1_LSB |= (best_pixel_indices2_LSB << 2);		
+		compressed2 = ((best_pixel_indices1_MSB & 0xffff) << 16) | (best_pixel_indices1_LSB & 0xffff);
+	}
+	return best_err;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// This function uses real exhaustive search for the planar mode. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockPlanar57ExhaustivePerceptual(uint8 *img, int width,int height,int startx,int starty, unsigned int &compressed57_1, unsigned int &compressed57_2, unsigned int best_error_sofar, unsigned int best_error_planar_red, unsigned int best_error_planar_green, unsigned int best_error_planar_blue)
+{
+	int colorO_enc[3], colorH_enc[3], colorV_enc[3];
+	int best_colorO_enc[3], best_colorH_enc[3], best_colorV_enc[3];
+
+	unsigned int error;
+	unsigned int best_error;
+	unsigned int lowest_possible_error;
+	unsigned int best_error_red_sofar;
+	unsigned int best_error_green_sofar;
+	unsigned int best_error_blue_sofar;
+	unsigned int BBBtable[128*128];
+	unsigned int CCCtable[128*128];
+
+	uint8 block[4*4*4];
+
+	// Use 4 bytes per pixel to make it 32-word aligned.
+	int count = 0;
+	int xx, yy;
+	for(yy=0; yy<4; yy++)
+	{
+		for(xx = 0; xx<4; xx++)
+		{
+			block[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+	// The task is to calculate the sum of the error over the entire area of the block.
+	//
+	// The block can be partitioned into: O A A A
+	//                                    B D D C
+	//                                    B D C D
+	//                                    B C D D
+	// where the error in 
+	// O only depends on colorO
+	// A only depends on colorO and colorH
+	// B only depends on colorO and colorV
+	// C only depends on colorH and colorV
+	// D depends on all three (colorO, colorH and colorV)
+	//
+	// Note that B can be precalculated for all combinations of colorO and colorV
+	// and the precalculated values can be used instead of calculating it in the inner loop.
+	// The same applies to C.
+	//
+	// In the code below, the squared error over O A A A is calculated and stored in lowest_possible_error
+
+	// Precalc BBB errors
+	for(colorO_enc[0] = 0; colorO_enc[0]<64; colorO_enc[0]++)
+	{
+		for(colorV_enc[0] = 0; colorV_enc[0]<64; colorV_enc[0]++)
+		{
+			BBBtable[colorO_enc[0]*64+colorV_enc[0]] = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*calcBBBred(block, colorO_enc[0], colorV_enc[0]);
+		}
+	}
+	// Precalc CCC errors
+	for(colorH_enc[0] = 0; colorH_enc[0]<64; colorH_enc[0]++)
+	{
+		for(colorV_enc[0] = 0; colorV_enc[0]<64; colorV_enc[0]++)
+		{
+			CCCtable[colorH_enc[0]*64+colorV_enc[0]] = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*calcCCCred(block, colorH_enc[0], colorV_enc[0]);
+		}
+	}
+	best_error = MAXERR1000;
+
+	best_error_red_sofar = JAS_MIN(best_error_planar_red, best_error_sofar);
+	for(colorO_enc[0] = 0; colorO_enc[0]<64; colorO_enc[0]++)
+	{
+		for(colorH_enc[0] = 0; colorH_enc[0]<64; colorH_enc[0]++)
+		{
+			lowest_possible_error = calcLowestPossibleRedOHperceptual(block, colorO_enc[0], colorH_enc[0], best_error_red_sofar);
+			if(lowest_possible_error <= best_error_red_sofar)
+			{
+				for(colorV_enc[0] = 0; colorV_enc[0]<64; colorV_enc[0]++)
+				{
+					error = calcErrorPlanarOnlyRedPerceptual(block, colorO_enc[0], colorH_enc[0], colorV_enc[0], lowest_possible_error, BBBtable[colorO_enc[0]*64+colorV_enc[0]], CCCtable[colorH_enc[0]*64+colorV_enc[0]], best_error_red_sofar);
+					if(error < best_error)
+					{
+						best_error = error;
+						best_colorO_enc[0] = colorO_enc[0];
+						best_colorH_enc[0] = colorH_enc[0];
+						best_colorV_enc[0] = colorV_enc[0];
+					}
+				}
+			}
+		}
+	}
+
+    if(best_error < best_error_planar_red)
+        best_error_planar_red = best_error;
+
+    if(best_error_planar_red > best_error_sofar)
+	{
+       // The red component in itself is already bigger than the previously best value ---- we can give up.
+       // use the dummy color black for all colors and report that the errors for the different color components are infinite
+       best_error_planar_green = MAXERR1000;
+       best_error_planar_blue = MAXERR1000;
+       compressed57_1 = 0;
+       compressed57_2 = 0;
+       return;
+	}
+
+	// The task is to calculate the sum of the error over the entire area of the block.
+	//
+	// The block can be partitioned into: O A A A
+	//                                    B D D C
+	//                                    B D C D
+	//                                    B C D D
+	// where the error in 
+	// O only depends on colorO
+	// A only depends on colorO and colorH
+	// B only depends on colorO and colorV
+	// C only depends on colorH and colorV
+	// D depends on all three (colorO, colorH and colorV)
+	//
+	// Note that B can be precalculated for all combinations of colorO and colorV
+	// and the precalculated values can be used instead of calculating it in the inner loop.
+	// The same applies to C.
+	//
+	// In the code below, the squared error over O A A A is calculated and store in lowest_possible_error
+
+	// Precalc BBB errors
+	for(colorO_enc[1] = 0; colorO_enc[1]<128; colorO_enc[1]++)
+	{
+		for(colorV_enc[1] = 0; colorV_enc[1]<128; colorV_enc[1]++)
+		{
+			BBBtable[colorO_enc[1]*128+colorV_enc[1]] = PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*calcBBBgreen(block, colorO_enc[1], colorV_enc[1]);
+		}
+	}
+	// Precalc CCC errors
+	for(colorH_enc[1] = 0; colorH_enc[1]<128; colorH_enc[1]++)
+	{
+		for(colorV_enc[1] = 0; colorV_enc[1]<128; colorV_enc[1]++)
+		{
+			CCCtable[colorH_enc[1]*128+colorV_enc[1]] = PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*calcCCCgreen(block, colorH_enc[1], colorV_enc[1]);
+		}
+	}
+	best_error = MAXERR1000;
+	best_error_green_sofar = JAS_MIN(best_error_planar_green, best_error_sofar);
+	for(colorO_enc[1] = 0; colorO_enc[1]<128; colorO_enc[1]++)
+	{
+		for(colorH_enc[1] = 0; colorH_enc[1]<128; colorH_enc[1]++)
+		{
+			lowest_possible_error = calcLowestPossibleGreenOHperceptual(block, colorO_enc[1], colorH_enc[1], best_error_green_sofar);
+			if(lowest_possible_error <= best_error_green_sofar)
+			{
+				for(colorV_enc[1] = 0; colorV_enc[1]<128; colorV_enc[1]++)
+				{
+					error = calcErrorPlanarOnlyGreenPerceptual(block, colorO_enc[1], colorH_enc[1], colorV_enc[1], lowest_possible_error, BBBtable[colorO_enc[1]*128+colorV_enc[1]], CCCtable[colorH_enc[1]*128+colorV_enc[1]], best_error_green_sofar);
+					if(error < best_error)
+					{
+						best_error = error;
+						best_colorO_enc[1] = colorO_enc[1];
+						best_colorH_enc[1] = colorH_enc[1];
+						best_colorV_enc[1] = colorV_enc[1];
+					}
+				}
+			}
+		}
+	}
+
+    if(best_error < best_error_planar_green)
+        best_error_planar_green = best_error;
+
+    if(best_error_planar_red + best_error_planar_green > best_error_sofar)
+	{
+       // The red component in itself is already bigger than the previously best value ---- we can give up.
+       // use the dummy color black for all colors and report that the errors for the different color components are infinite
+       best_error_planar_blue = MAXERR1000;
+       compressed57_1 = 0;
+       compressed57_2 = 0;
+       return;
+	}
+
+	// The task is to calculate the sum of the error over the entire area of the block.
+	//
+	// The block can be partitioned into: O A A A
+	//                                    B D D C
+	//                                    B D C D
+	//                                    B C D D
+	// where the error in 
+	// O only depends on colorO
+	// A only depends on colorO and colorH
+	// B only depends on colorO and colorV
+	// C only depends on colorH and colorV
+	// D depends on all three (colorO, colorH and colorV)
+	//
+	// Note that B can be precalculated for all combinations of colorO and colorV
+	// and the precalculated values can be used instead of calculating it in the inner loop.
+	// The same applies to C.
+	//
+	// In the code below, the squared error over O A A A is calculated and store in lowest_possible_error
+
+	// Precalc BBB errors
+	for(colorO_enc[2] = 0; colorO_enc[2]<64; colorO_enc[2]++)
+	{
+		for(colorV_enc[2] = 0; colorV_enc[2]<64; colorV_enc[2]++)
+		{
+			BBBtable[colorO_enc[2]*64+colorV_enc[2]] = calcBBBbluePerceptual(block, colorO_enc[2], colorV_enc[2]);
+		}
+	}
+	// Precalc CCC errors
+	for(colorH_enc[2] = 0; colorH_enc[2]<64; colorH_enc[2]++)
+	{
+		for(colorV_enc[2] = 0; colorV_enc[2]<64; colorV_enc[2]++)
+		{
+			CCCtable[colorH_enc[2]*64+colorV_enc[2]] = calcCCCbluePerceptual(block, colorH_enc[2], colorV_enc[2]);
+		}
+	}
+	best_error = MAXERR1000;
+	best_error_blue_sofar = JAS_MIN(best_error_planar_blue, best_error_sofar);
+	for(colorO_enc[2] = 0; colorO_enc[2]<64; colorO_enc[2]++)
+	{
+		for(colorH_enc[2] = 0; colorH_enc[2]<64; colorH_enc[2]++)
+		{
+			lowest_possible_error = calcLowestPossibleBlueOHperceptual(block, colorO_enc[2], colorH_enc[2], best_error_blue_sofar);
+			if(lowest_possible_error <= best_error_blue_sofar)
+			{
+				for(colorV_enc[2] = 0; colorV_enc[2]<64; colorV_enc[2]++)
+				{
+					error = calcErrorPlanarOnlyBluePerceptual(block, colorO_enc[2], colorH_enc[2], colorV_enc[2], lowest_possible_error, BBBtable[colorO_enc[2]*64+colorV_enc[2]], CCCtable[colorH_enc[2]*64+colorV_enc[2]], best_error_blue_sofar);
+					if(error < best_error)
+					{
+						best_error = error;
+						best_colorO_enc[2] = colorO_enc[2];
+						best_colorH_enc[2] = colorH_enc[2];
+						best_colorV_enc[2] = colorV_enc[2];
+					}
+				}
+			}
+		}
+	}
+
+	if(best_error < best_error_planar_blue)
+		best_error_planar_blue = best_error;
+
+	compressed57_1 = 0;
+	compressed57_2 = 0;
+	PUTBITSHIGH( compressed57_1, best_colorO_enc[0], 6, 63);
+	PUTBITSHIGH( compressed57_1, best_colorO_enc[1], 7, 57);
+	PUTBITSHIGH( compressed57_1, best_colorO_enc[2], 6, 50);
+	PUTBITSHIGH( compressed57_1, best_colorH_enc[0], 6, 44);
+	PUTBITSHIGH( compressed57_1, best_colorH_enc[1], 7, 38);
+	PUTBITS(     compressed57_2, best_colorH_enc[2], 6, 31);
+	PUTBITS(     compressed57_2, best_colorV_enc[0], 6, 25);
+	PUTBITS(     compressed57_2, best_colorV_enc[1], 7, 19);
+	PUTBITS(     compressed57_2, best_colorV_enc[2], 6, 12);
+	
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// This function uses real exhaustive search for the planar mode. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockPlanar57Exhaustive(uint8 *img, int width,int height,int startx,int starty, unsigned int &compressed57_1, unsigned int &compressed57_2, unsigned int best_error_sofar, unsigned int best_error_red, unsigned int best_error_green, unsigned int best_error_blue)
+{
+	int colorO_enc[3], colorH_enc[3], colorV_enc[3];
+	int best_colorO_enc[3], best_colorH_enc[3], best_colorV_enc[3];
+
+	unsigned int error;
+	unsigned int best_error;
+	unsigned int lowest_possible_error;
+	unsigned int best_error_red_sofar;
+	unsigned int best_error_green_sofar;
+	unsigned int best_error_blue_sofar;
+	unsigned int BBBtable[128*128];
+	unsigned int CCCtable[128*128];
+
+	uint8 block[4*4*4];
+
+	// Use 4 bytes per pixel to make it 32-word aligned.
+	int count = 0;
+	int xx, yy;
+	for(yy=0; yy<4; yy++)
+	{
+		for(xx = 0; xx<4; xx++)
+		{
+			block[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+	// The task is to calculate the sum of the error over the entire area of the block.
+	//
+	// The block can be partitioned into: O A A A
+	//                                    B D D C
+	//                                    B D C D
+	//                                    B C D D
+	// where the error in 
+	// O only depends on colorO
+	// A only depends on colorO and colorH
+	// B only depends on colorO and colorV
+	// C only depends on colorH and colorV
+	// D depends on all three (colorO, colorH and colorV)
+	//
+	// Note that B can be precalculated for all combinations of colorO and colorV
+	// and the precalculated values can be used instead of calculating it in the inner loop.
+	// The same applies to C.
+	//
+	// In the code below, the squared error over O A A A is calculated and store in lowest_possible_error
+
+	// Precalc BBB errors
+	for(colorO_enc[0] = 0; colorO_enc[0]<64; colorO_enc[0]++)
+	{
+		for(colorV_enc[0] = 0; colorV_enc[0]<64; colorV_enc[0]++)
+		{
+			BBBtable[colorO_enc[0]*64+colorV_enc[0]] = calcBBBred(block, colorO_enc[0], colorV_enc[0]);
+		}
+	}
+	// Precalc CCC errors
+	for(colorH_enc[0] = 0; colorH_enc[0]<64; colorH_enc[0]++)
+	{
+		for(colorV_enc[0] = 0; colorV_enc[0]<64; colorV_enc[0]++)
+		{
+			CCCtable[colorH_enc[0]*64+colorV_enc[0]] = calcCCCred(block, colorH_enc[0], colorV_enc[0]);
+		}
+	}
+	best_error = MAXERR1000;
+	best_error_red_sofar = JAS_MIN(best_error_red, best_error_sofar);
+	for(colorO_enc[0] = 0; colorO_enc[0]<64; colorO_enc[0]++)
+	{
+		for(colorH_enc[0] = 0; colorH_enc[0]<64; colorH_enc[0]++)
+		{
+			lowest_possible_error = calcLowestPossibleRedOH(block, colorO_enc[0], colorH_enc[0], best_error_red_sofar);
+			if(lowest_possible_error <= best_error_red_sofar)
+			{
+				for(colorV_enc[0] = 0; colorV_enc[0]<64; colorV_enc[0]++)
+				{
+					error = calcErrorPlanarOnlyRed(block, colorO_enc[0], colorH_enc[0], colorV_enc[0], lowest_possible_error, BBBtable[colorO_enc[0]*64+colorV_enc[0]], CCCtable[colorH_enc[0]*64+colorV_enc[0]], best_error_red_sofar);
+					if(error < best_error)
+					{
+						best_error = error;
+						best_colorO_enc[0] = colorO_enc[0];
+						best_colorH_enc[0] = colorH_enc[0];
+						best_colorV_enc[0] = colorV_enc[0];
+					}
+				}
+			}
+		}
+	}
+
+	// The task is to calculate the sum of the error over the entire area of the block.
+	//
+	// The block can be partitioned into: O A A A
+	//                                    B D D C
+	//                                    B D C D
+	//                                    B C D D
+	// where the error in 
+	// O only depends on colorO
+	// A only depends on colorO and colorH
+	// B only depends on colorO and colorV
+	// C only depends on colorH and colorV
+	// D depends on all three (colorO, colorH and colorV)
+	//
+	// Note that B can be precalculated for all combinations of colorO and colorV
+	// and the precalculated values can be used instead of calculating it in the inner loop.
+	// The same applies to C.
+	//
+	// In the code below, the squared error over O A A A is calculated and store in lowest_possible_error
+
+	// Precalc BBB errors
+	for(colorO_enc[1] = 0; colorO_enc[1]<128; colorO_enc[1]++)
+	{
+		for(colorV_enc[1] = 0; colorV_enc[1]<128; colorV_enc[1]++)
+		{
+			BBBtable[colorO_enc[1]*128+colorV_enc[1]] = calcBBBgreen(block, colorO_enc[1], colorV_enc[1]);
+		}
+	}
+	// Precalc CCC errors
+	for(colorH_enc[1] = 0; colorH_enc[1]<128; colorH_enc[1]++)
+	{
+		for(colorV_enc[1] = 0; colorV_enc[1]<128; colorV_enc[1]++)
+		{
+			CCCtable[colorH_enc[1]*128+colorV_enc[1]] = calcCCCgreen(block, colorH_enc[1], colorV_enc[1]);
+		}
+	}
+	best_error = MAXERR1000;
+	best_error_green_sofar = JAS_MIN(best_error_green, best_error_sofar);
+	for(colorO_enc[1] = 0; colorO_enc[1]<128; colorO_enc[1]++)
+	{
+		for(colorH_enc[1] = 0; colorH_enc[1]<128; colorH_enc[1]++)
+		{
+			lowest_possible_error = calcLowestPossibleGreenOH(block, colorO_enc[1], colorH_enc[1], best_error_green_sofar);
+			if(lowest_possible_error <= best_error_green_sofar)
+			{
+				for(colorV_enc[1] = 0; colorV_enc[1]<128; colorV_enc[1]++)
+				{
+					error = calcErrorPlanarOnlyGreen(block, colorO_enc[1], colorH_enc[1], colorV_enc[1], lowest_possible_error, BBBtable[colorO_enc[1]*128+colorV_enc[1]], CCCtable[colorH_enc[1]*128+colorV_enc[1]], best_error_green_sofar);
+					if(error < best_error)
+					{
+						best_error = error;
+						best_colorO_enc[1] = colorO_enc[1];
+						best_colorH_enc[1] = colorH_enc[1];
+						best_colorV_enc[1] = colorV_enc[1];
+					}
+				}
+			}
+		}
+	}
+
+	// The task is to calculate the sum of the error over the entire area of the block.
+	//
+	// The block can be partitioned into: O A A A
+	//                                    B D D C
+	//                                    B D C D
+	//                                    B C D D
+	// where the error in 
+	// O only depends on colorO
+	// A only depends on colorO and colorH
+	// B only depends on colorO and colorV
+	// C only depends on colorH and colorV
+	// D depends on all three (colorO, colorH and colorV)
+	//
+	// Note that B can be precalculated for all combinations of colorO and colorV
+	// and the precalculated values can be used instead of calculating it in the inner loop.
+	// The same applies to C.
+	//
+	// In the code below, the squared error over O A A A is calculated and store in lowest_possible_error
+
+	// Precalc BBB errors
+	for(colorO_enc[2] = 0; colorO_enc[2]<64; colorO_enc[2]++)
+	{
+		for(colorV_enc[2] = 0; colorV_enc[2]<64; colorV_enc[2]++)
+		{
+			BBBtable[colorO_enc[2]*64+colorV_enc[2]] = calcBBBblue(block, colorO_enc[2], colorV_enc[2]);
+		}
+	}
+	// Precalc CCC errors
+	for(colorH_enc[2] = 0; colorH_enc[2]<64; colorH_enc[2]++)
+	{
+		for(colorV_enc[2] = 0; colorV_enc[2]<64; colorV_enc[2]++)
+		{
+			CCCtable[colorH_enc[2]*64+colorV_enc[2]] = calcCCCblue(block, colorH_enc[2], colorV_enc[2]);
+		}
+	}
+	best_error = MAXERR1000;
+	best_error_blue_sofar = JAS_MIN(best_error_blue, best_error_sofar);
+	for(colorO_enc[2] = 0; colorO_enc[2]<64; colorO_enc[2]++)
+	{
+		for(colorH_enc[2] = 0; colorH_enc[2]<64; colorH_enc[2]++)
+		{
+			lowest_possible_error = calcLowestPossibleBlueOH(block, colorO_enc[2], colorH_enc[2], best_error_blue_sofar);
+			if(lowest_possible_error <= best_error_blue_sofar)
+			{
+				for(colorV_enc[2] = 0; colorV_enc[2]<64; colorV_enc[2]++)
+				{
+					error = calcErrorPlanarOnlyBlue(block, colorO_enc[2], colorH_enc[2], colorV_enc[2], lowest_possible_error, BBBtable[colorO_enc[2]*64+colorV_enc[2]], CCCtable[colorH_enc[2]*64+colorV_enc[2]], best_error_blue_sofar);
+					if(error < best_error)
+					{
+						best_error = error;
+						best_colorO_enc[2] = colorO_enc[2];
+						best_colorH_enc[2] = colorH_enc[2];
+						best_colorV_enc[2] = colorV_enc[2];
+					}
+				}
+			}
+		}
+	}
+
+	compressed57_1 = 0;
+	compressed57_2 = 0;
+	PUTBITSHIGH( compressed57_1, best_colorO_enc[0], 6, 63);
+	PUTBITSHIGH( compressed57_1, best_colorO_enc[1], 7, 57);
+	PUTBITSHIGH( compressed57_1, best_colorO_enc[2], 6, 50);
+	PUTBITSHIGH( compressed57_1, best_colorH_enc[0], 6, 44);
+	PUTBITSHIGH( compressed57_1, best_colorH_enc[1], 7, 38);
+	PUTBITS(     compressed57_2, best_colorH_enc[2], 6, 31);
+	PUTBITS(     compressed57_2, best_colorV_enc[0], 6, 25);
+	PUTBITS(     compressed57_2, best_colorV_enc[1], 7, 19);
+	PUTBITS(     compressed57_2, best_colorV_enc[2], 6, 12);
+	
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col0_Rpercep1000(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col0_R)
+{
+	unsigned int block_error = 0, 
+	  	     	 best_block_error = MAXERR1000,
+			     pixel_error, 
+		         best_pixel_error;
+	int diff;
+	uint8 color;
+	uint8 possible_colors[3];
+
+	color = ((colorRGB444_packed >> 8) & 0xf)*17;
+
+	// Test all distances
+	for (uint8 d = 0; d < 8; d++)
+	{
+
+		possible_colors[0] = CLAMP(0,color - table59T[d],255);
+		possible_colors[1] = CLAMP(0,color,255);
+		possible_colors[2] = CLAMP(0,color + table59T[d],255);
+
+		// Loop block
+		for (int x = 0; x < 16; x++)
+		{
+			best_pixel_error = MAXERR1000;
+
+			// Loop possible block colors
+			for (uint8 c = 0; c < 3; c++) 
+			{
+			
+				diff = block[4*x + R] - CLAMP(0,possible_colors[c],255);
+
+				pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff);
+
+				// Choose best error
+				if (pixel_error < best_pixel_error) 
+					best_pixel_error = pixel_error;
+			}
+
+			precalc_err_col0_R[((colorRGB444_packed>>8)*8 + d)*16 + x] = (unsigned int) best_pixel_error;
+		}
+
+	}
+		
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col0_R(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col0_R)
+{
+	unsigned int block_error = 0, 
+	  	     	   best_block_error = MAXIMUM_ERROR, 
+							 pixel_error, 
+							 best_pixel_error;
+	int diff;
+	uint8 color;
+	uint8 possible_colors[3];
+
+	color = ((colorRGB444_packed >> 8) & 0xf)*17;
+
+	// Test all distances
+	for (uint8 d = 0; d < 8; d++)
+	{
+
+		possible_colors[0] = CLAMP(0,color - table59T[d],255);
+		possible_colors[1] = CLAMP(0,color,255);
+		possible_colors[2] = CLAMP(0,color + table59T[d],255);
+
+		// Loop block
+		for (int x = 0; x < 16; x++)
+		{
+			best_pixel_error = MAXIMUM_ERROR;
+
+			// Loop possible block colors
+			for (uint8 c = 0; c < 3; c++) 
+			{
+			
+				diff = block[4*x + R] - CLAMP(0,possible_colors[c],255);
+
+				pixel_error = SQUARE(diff);
+
+				// Choose best error
+				if (pixel_error < best_pixel_error) 
+					best_pixel_error = pixel_error;
+			}
+			precalc_err_col0_R[((colorRGB444_packed>>8)*8 + d)*16 + x] = (unsigned int) best_pixel_error;
+		}
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col0_RGpercep1000(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col0_RG)
+{
+	unsigned int block_error = 0, 
+	  	     	 best_block_error = MAXERR1000,
+			     pixel_error, 
+		         best_pixel_error;
+	int diff[3];
+	uint8 color[3];
+	uint8 possible_colors[3][2];
+
+	color[R] = ((colorRGB444_packed >> 8) & 0xf)*17;
+	color[G] = ((colorRGB444_packed >> 4) & 0xf)*17;
+
+	// Test all distances
+	for (uint8 d = 0; d < 8; d++)
+	{
+
+		possible_colors[0][R] = CLAMP(0,color[R] - table59T[d],255);
+		possible_colors[0][G] = CLAMP(0,color[G] - table59T[d],255);
+
+		possible_colors[1][R] = CLAMP(0,color[R],255);
+		possible_colors[1][G] = CLAMP(0,color[G],255);
+
+		possible_colors[2][R] = CLAMP(0,color[R] + table59T[d],255);
+		possible_colors[2][G] = CLAMP(0,color[G] + table59T[d],255);
+
+		
+
+		// Loop block
+		for (int x = 0; x < 16; x++)
+		{
+			best_pixel_error = MAXERR1000;
+
+			// Loop possible block colors
+			for (uint8 c = 0; c < 3; c++) 
+			{
+			
+				diff[R] = block[4*x + R] - CLAMP(0,possible_colors[c][R],255);
+				diff[G] = block[4*x + G] - CLAMP(0,possible_colors[c][G],255);
+
+				pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff[R]) + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE(diff[G]);
+
+				// Choose best error
+				if (pixel_error < best_pixel_error) 
+					best_pixel_error = pixel_error;
+			}
+			precalc_err_col0_RG[((colorRGB444_packed>>4)*8 + d)*16 + x] = (unsigned int) best_pixel_error;
+		}
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col0_RG(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col0_RG)
+{
+	unsigned int block_error = 0, 
+	  	     	 best_block_error = MAXIMUM_ERROR, 
+			     pixel_error, 
+		         best_pixel_error;
+	int diff[3];
+	uint8 color[3];
+	uint8 possible_colors[3][2];
+
+	color[R] = ((colorRGB444_packed >> 8) & 0xf)*17;
+	color[G] = ((colorRGB444_packed >> 4) & 0xf)*17;
+
+	// Test all distances
+	for (uint8 d = 0; d < 8; d++)
+	{
+
+		possible_colors[0][R] = CLAMP(0,color[R] - table59T[d],255);
+		possible_colors[0][G] = CLAMP(0,color[G] - table59T[d],255);
+
+		possible_colors[1][R] = CLAMP(0,color[R],255);
+		possible_colors[1][G] = CLAMP(0,color[G],255);
+
+		possible_colors[2][R] = CLAMP(0,color[R] + table59T[d],255);
+		possible_colors[2][G] = CLAMP(0,color[G] + table59T[d],255);
+
+		// Loop block
+		for (int x = 0; x < 16; x++)
+		{
+			best_pixel_error = MAXIMUM_ERROR;
+
+			// Loop possible block colors
+			for (uint8 c = 0; c < 3; c++) 
+			{
+				diff[R] = block[4*x + R] - CLAMP(0,possible_colors[c][R],255);
+				diff[G] = block[4*x + G] - CLAMP(0,possible_colors[c][G],255);
+
+				pixel_error = SQUARE(diff[R]) + SQUARE(diff[G]);
+
+				// Choose best error
+				if (pixel_error < best_pixel_error) 
+					best_pixel_error = pixel_error;
+			}
+			precalc_err_col0_RG[((colorRGB444_packed>>4)*8 + d)*16 + x] = (unsigned int) best_pixel_error;
+		}
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col1_Rpercep1000(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col1_R)
+{
+	unsigned int pixel_error; 
+	int diff;
+	uint8 color;
+
+	color = ((colorRGB444_packed >> 8) & 0xf)*17;
+
+	// Loop block
+	for (int x = 0; x < 16; x++)
+	{
+		diff = block[4*x + R] - color;
+		pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff);
+		precalc_err_col1_R[((colorRGB444_packed>>8))*16 + x] = (unsigned int) pixel_error;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+/**
+ * Calculate the error for the block at position (startx,starty)
+ * The parameters needed for reconstruction is calculated as well
+ *
+ * In the 59T bit mode, we only have pattern T.
+ */
+void precalcError59T_col1_R(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col1_R)
+{
+	unsigned int pixel_error; 
+	int diff;
+	uint8 color;
+
+	color = ((colorRGB444_packed >> 8) & 0xf)*17;
+
+	// Loop block
+	for (int x = 0; x < 16; x++)
+	{
+		diff = block[4*x + R] - color;
+		pixel_error = SQUARE(diff);
+		precalc_err_col1_R[((colorRGB444_packed>>8))*16 + x] = (unsigned int) pixel_error;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col1_RGpercep1000(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col1_RG)
+{
+	unsigned int pixel_error; 
+	int diff[3];
+	uint8 color[2];
+
+	color[R] = ((colorRGB444_packed >> 8) & 0xf)*17;
+	color[G] = ((colorRGB444_packed >> 4) & 0xf)*17;
+
+	// Loop block
+	for (int x = 0; x < 16; x++)
+	{
+		diff[R] = block[4*x + R] - color[R];
+		diff[G] = block[4*x + G] - color[G];
+		pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff[R]) + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE(diff[G]);
+		precalc_err_col1_RG[((colorRGB444_packed>>4))*16 + x] = (unsigned int) pixel_error;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col1_RG(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col1_RG)
+{
+	unsigned int pixel_error; 
+	int diff[3];
+	uint8 color[2];
+
+	color[R] = ((colorRGB444_packed >> 8) & 0xf)*17;
+	color[G] = ((colorRGB444_packed >> 4) & 0xf)*17;
+
+	// Loop block
+	for (int x = 0; x < 16; x++)
+	{
+		diff[R] = block[4*x + R] - color[R];
+		diff[G] = block[4*x + G] - color[G];
+		pixel_error = SQUARE(diff[R]) + SQUARE(diff[G]);
+		precalc_err_col1_RG[((colorRGB444_packed>>4))*16 + x] = (unsigned int) pixel_error;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col0_RGBpercep1000(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col0_RGB)
+{
+	unsigned int block_error = 0, 
+	  	     	 best_block_error = MAXERR1000,
+			     pixel_error, 
+		         best_pixel_error;
+	uint8 color[3];
+	int possible_colors[3][3];
+	unsigned int *precalc_err_col0_RGB_adr;
+
+#define ONEPOINT59RGB_PERCEP(xval) \
+			/* Loop possible block colors */\
+			/* unroll loop for (uint8 c = 0; c < 3; c++) */\
+			{\
+				best_pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*square_table[block[4*xval + R] - possible_colors[0][R]]\
+                            + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*square_table[block[4*xval + G] - possible_colors[0][G]] \
+							+ PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[block[4*xval + B] - possible_colors[0][B]];\
+				pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*square_table[block[4*xval + R] - possible_colors[1][R]]\
+                            + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*square_table[block[4*xval + G] - possible_colors[1][G]]\
+							+ PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[block[4*xval + B] - possible_colors[1][B]];\
+				if (pixel_error < best_pixel_error)\
+					best_pixel_error = pixel_error;\
+				pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*square_table[block[4*xval + R] - possible_colors[2][R]]\
+                            + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*square_table[block[4*xval + G] - possible_colors[2][G]]\
+							+ PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[block[4*xval + B] - possible_colors[2][B]];\
+				if (pixel_error < best_pixel_error)\
+					best_pixel_error = pixel_error;\
+			}\
+			precalc_err_col0_RGB_adr[xval] = (unsigned int) best_pixel_error;\
+
+#define ONETABLE59RGB_PERCEP(dval) \
+		possible_colors[0][R] = clamp_table[color[R] - table59T[dval]+255]-255;\
+		possible_colors[0][G] = clamp_table[color[G] - table59T[dval]+255]-255;\
+		possible_colors[0][B] = clamp_table[color[B] - table59T[dval]+255]-255;\
+		possible_colors[1][R] = color[R]-255;\
+		possible_colors[1][G] = color[G]-255;\
+		possible_colors[1][B] = color[B]-255;\
+		possible_colors[2][R] = clamp_table[color[R] + table59T[dval]+255]-255;\
+		possible_colors[2][G] = clamp_table[color[G] + table59T[dval]+255]-255;\
+		possible_colors[2][B] = clamp_table[color[B] + table59T[dval]+255]-255;\
+		precalc_err_col0_RGB_adr = &precalc_err_col0_RGB[(colorRGB444_packed*8 + dval)*16];\
+		/* Loop block */\
+		/* unroll loop for (int x = 0; x < 16; x++) */\
+		{\
+			ONEPOINT59RGB_PERCEP(0)\
+			ONEPOINT59RGB_PERCEP(1)\
+			ONEPOINT59RGB_PERCEP(2)\
+			ONEPOINT59RGB_PERCEP(3)\
+			ONEPOINT59RGB_PERCEP(4)\
+			ONEPOINT59RGB_PERCEP(5)\
+			ONEPOINT59RGB_PERCEP(6)\
+			ONEPOINT59RGB_PERCEP(7)\
+			ONEPOINT59RGB_PERCEP(8)\
+			ONEPOINT59RGB_PERCEP(9)\
+			ONEPOINT59RGB_PERCEP(10)\
+			ONEPOINT59RGB_PERCEP(11)\
+			ONEPOINT59RGB_PERCEP(12)\
+			ONEPOINT59RGB_PERCEP(13)\
+			ONEPOINT59RGB_PERCEP(14)\
+			ONEPOINT59RGB_PERCEP(15)\
+		}\
+
+	color[R] = (((colorRGB444_packed >> 8) ) << 4) | ((colorRGB444_packed >> 8) ) ;
+	color[G] = (((colorRGB444_packed >> 4) & 0xf) << 4) | ((colorRGB444_packed >> 4) & 0xf) ;
+	color[B] = (((colorRGB444_packed) & 0xf) << 4) | ((colorRGB444_packed) & 0xf) ;
+	
+	/* Test all distances */
+	/* unroll loop for (uint8 d = 0; d < 8; ++d) */
+	{
+		ONETABLE59RGB_PERCEP(0)
+		ONETABLE59RGB_PERCEP(1)
+		ONETABLE59RGB_PERCEP(2)
+		ONETABLE59RGB_PERCEP(3)
+		ONETABLE59RGB_PERCEP(4)
+		ONETABLE59RGB_PERCEP(5)
+		ONETABLE59RGB_PERCEP(6)
+		ONETABLE59RGB_PERCEP(7)
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col0_RGB(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col0_RGB)
+{
+	unsigned int block_error = 0, 
+	  	     	 best_block_error = MAXIMUM_ERROR, 
+			     pixel_error, 
+		         best_pixel_error;
+	uint8 color[3];
+	int possible_colors[3][3];
+	unsigned int *precalc_err_col0_RGB_adr;
+
+#define ONEPOINT59RGB(xval) \
+			/* Loop possible block colors */\
+			/* unroll loop for (uint8 c = 0; c < 3; c++) */\
+			{\
+				best_pixel_error = square_table[block[4*xval + R] - possible_colors[0][R]]\
+                            + square_table[block[4*xval + G] - possible_colors[0][G]] \
+							+ square_table[block[4*xval + B] - possible_colors[0][B]];\
+				pixel_error = square_table[block[4*xval + R] - possible_colors[1][R]]\
+                            + square_table[block[4*xval + G] - possible_colors[1][G]]\
+							+ square_table[block[4*xval + B] - possible_colors[1][B]];\
+				if (pixel_error < best_pixel_error)\
+					best_pixel_error = pixel_error;\
+				pixel_error = square_table[block[4*xval + R] - possible_colors[2][R]]\
+                            + square_table[block[4*xval + G] - possible_colors[2][G]]\
+							+ square_table[block[4*xval + B] - possible_colors[2][B]];\
+				if (pixel_error < best_pixel_error)\
+					best_pixel_error = pixel_error;\
+			}\
+			precalc_err_col0_RGB_adr[xval] = (unsigned int) best_pixel_error;\
+
+#define ONETABLE59RGB(dval) \
+		possible_colors[0][R] = clamp_table[color[R] - table59T[dval]+255]-255;\
+		possible_colors[0][G] = clamp_table[color[G] - table59T[dval]+255]-255;\
+		possible_colors[0][B] = clamp_table[color[B] - table59T[dval]+255]-255;\
+		possible_colors[1][R] = color[R]-255;\
+		possible_colors[1][G] = color[G]-255;\
+		possible_colors[1][B] = color[B]-255;\
+		possible_colors[2][R] = clamp_table[color[R] + table59T[dval]+255]-255;\
+		possible_colors[2][G] = clamp_table[color[G] + table59T[dval]+255]-255;\
+		possible_colors[2][B] = clamp_table[color[B] + table59T[dval]+255]-255;\
+		precalc_err_col0_RGB_adr = &precalc_err_col0_RGB[(colorRGB444_packed*8 + dval)*16];\
+		/* Loop block */\
+		/* unroll loop for (int x = 0; x < 16; x++) */\
+		{\
+			ONEPOINT59RGB(0)\
+			ONEPOINT59RGB(1)\
+			ONEPOINT59RGB(2)\
+			ONEPOINT59RGB(3)\
+			ONEPOINT59RGB(4)\
+			ONEPOINT59RGB(5)\
+			ONEPOINT59RGB(6)\
+			ONEPOINT59RGB(7)\
+			ONEPOINT59RGB(8)\
+			ONEPOINT59RGB(9)\
+			ONEPOINT59RGB(10)\
+			ONEPOINT59RGB(11)\
+			ONEPOINT59RGB(12)\
+			ONEPOINT59RGB(13)\
+			ONEPOINT59RGB(14)\
+			ONEPOINT59RGB(15)\
+		}\
+
+	color[R] = (((colorRGB444_packed >> 8) ) << 4) | ((colorRGB444_packed >> 8) ) ;
+	color[G] = (((colorRGB444_packed >> 4) & 0xf) << 4) | ((colorRGB444_packed >> 4) & 0xf) ;
+	color[B] = (((colorRGB444_packed) & 0xf) << 4) | ((colorRGB444_packed) & 0xf) ;
+
+	/* Test all distances */
+	/* unroll loop for (uint8 d = 0; d < 8; ++d) */
+	{
+		ONETABLE59RGB(0)
+		ONETABLE59RGB(1)
+		ONETABLE59RGB(2)
+		ONETABLE59RGB(3)
+		ONETABLE59RGB(4)
+		ONETABLE59RGB(5)
+		ONETABLE59RGB(6)
+		ONETABLE59RGB(7)
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col1_RGBpercep1000(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col1_RGB)
+{
+	unsigned int pixel_error;
+	int diff[3];
+	uint8 colorRGB[3];
+
+	colorRGB[0] = ((colorRGB444_packed >> 8) & 0xf)*17;
+	colorRGB[1] = ((colorRGB444_packed >> 4) & 0xf)*17;
+	colorRGB[2] = ((colorRGB444_packed >> 0) & 0xf)*17;
+
+	// Loop block
+	for (int x = 0; x < 16; x++)
+	{
+		diff[R] = block[4*x + R] - colorRGB[R];
+		diff[G] = block[4*x + G] - colorRGB[G];
+		diff[B] = block[4*x + B] - colorRGB[B];
+
+		pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff[R]) + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE(diff[G]) + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*SQUARE(diff[B]);
+		
+		precalc_err_col1_RGB[(colorRGB444_packed)*16 + x] = (unsigned int) pixel_error;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in exhaustive compression of the T-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError59T_col1_RGB(uint8* block, int colorRGB444_packed, unsigned int *precalc_err_col1_RGB)
+{
+	unsigned int pixel_error;
+	int diff[3];
+	uint8 colorRGB[3];
+
+	colorRGB[0] = ((colorRGB444_packed >> 8) & 0xf)*17;
+	colorRGB[1] = ((colorRGB444_packed >> 4) & 0xf)*17;
+	colorRGB[2] = ((colorRGB444_packed >> 0) & 0xf)*17;
+
+	// Loop block
+	for (int x = 0; x < 16; x++)
+	{
+		diff[R] = block[4*x + R] - colorRGB[R];
+		diff[G] = block[4*x + G] - colorRGB[G];
+		diff[B] = block[4*x + B] - colorRGB[B];
+
+		pixel_error = SQUARE(diff[R]) + SQUARE(diff[G]) + SQUARE(diff[B]);
+		precalc_err_col1_RGB[(colorRGB444_packed)*16 + x] = (unsigned int) pixel_error;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimal error for the T-mode when compressing exhaustively.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateError59TusingPrecalcRperceptual1000(uint8* block, int *colorsRGB444_packed, unsigned int *precalc_err_col0_R, unsigned int *precalc_err_col1_R, unsigned int best_error_so_far) 
+{
+	unsigned int	block_error = 0, 
+					best_block_error = MAXERR1000;
+
+	unsigned int *pixel_error_col0_base_adr;
+	unsigned int *pixel_error_col0_adr, *pixel_error_col1_adr;
+
+#define FIRSTCHOICE59R_PERCEP\
+			if(*pixel_error_col0_adr < *pixel_error_col1_adr)\
+				block_error = *pixel_error_col0_adr;\
+			else\
+				block_error = *pixel_error_col1_adr;\
+
+#define CHOICE59R_PERCEP(xval)\
+			if(pixel_error_col0_adr[xval] < pixel_error_col1_adr[xval])\
+				block_error += pixel_error_col0_adr[xval];\
+			else\
+				block_error += pixel_error_col1_adr[xval];\
+
+#define ONETABLE59R_PERCEP(dval) \
+		pixel_error_col0_adr = &pixel_error_col0_base_adr[dval*16];\
+		/* unroll loop for(int x = 0; block_error < best_error_so_far && x<16; x++) */\
+		{\
+			FIRSTCHOICE59R_PERCEP\
+			if( block_error < best_error_so_far)\
+			{\
+				CHOICE59R_PERCEP(1)\
+				if( block_error < best_error_so_far)\
+				{\
+					CHOICE59R_PERCEP(2)\
+					CHOICE59R_PERCEP(3)\
+					if( block_error < best_error_so_far)\
+					{\
+						CHOICE59R_PERCEP(4)\
+						CHOICE59R_PERCEP(5)\
+						if( block_error < best_error_so_far)\
+						{\
+							CHOICE59R_PERCEP(6)\
+							CHOICE59R_PERCEP(7)\
+							if( block_error < best_error_so_far)\
+							{\
+								CHOICE59R_PERCEP(8)\
+								CHOICE59R_PERCEP(9)\
+								if( block_error < best_error_so_far)\
+								{\
+									CHOICE59R_PERCEP(10)\
+									CHOICE59R_PERCEP(11)\
+									if( block_error < best_error_so_far)\
+									{\
+										CHOICE59R_PERCEP(12)\
+										CHOICE59R_PERCEP(13)\
+										if( block_error < best_error_so_far)\
+										{\
+											CHOICE59R_PERCEP(14)\
+											CHOICE59R_PERCEP(15)\
+										}\
+									}\
+								}\
+							}\
+						}\
+					}\
+				}\
+			}\
+		}\
+		if (block_error < best_block_error)\
+			best_block_error = block_error;\
+	
+	pixel_error_col0_base_adr = &precalc_err_col0_R[((colorsRGB444_packed[0]>>8)*8)*16];
+	pixel_error_col1_adr = &precalc_err_col1_R[((colorsRGB444_packed[1]>>8))*16];
+
+	// Test all distances
+	/* unroll loop for (uint8 d = 0; d < 8; d++) */
+	{
+		ONETABLE59R_PERCEP(0)
+		ONETABLE59R_PERCEP(1)
+		ONETABLE59R_PERCEP(2)
+		ONETABLE59R_PERCEP(3)
+		ONETABLE59R_PERCEP(4)
+		ONETABLE59R_PERCEP(5)
+		ONETABLE59R_PERCEP(6)
+		ONETABLE59R_PERCEP(7)
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimal error for the T-mode when compressing exhaustively.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateError59TusingPrecalcR(uint8* block, int *colorsRGB444_packed, unsigned int *precalc_err_col0_R, unsigned int *precalc_err_col1_R, unsigned int best_error_so_far) 
+{
+	unsigned int	block_error = 0, 
+					best_block_error = MAXIMUM_ERROR;
+
+	unsigned int *pixel_error_col0_base_adr;
+	unsigned int *pixel_error_col0_adr, *pixel_error_col1_adr;
+
+#define FIRSTCHOICE59R\
+			if(*pixel_error_col0_adr < *pixel_error_col1_adr)\
+				block_error = *pixel_error_col0_adr;\
+			else\
+				block_error = *pixel_error_col1_adr;\
+
+#define CHOICE59R(xval)\
+			if(pixel_error_col0_adr[xval] < pixel_error_col1_adr[xval])\
+				block_error += pixel_error_col0_adr[xval];\
+			else\
+				block_error += pixel_error_col1_adr[xval];\
+
+#define ONETABLE59R(dval) \
+		pixel_error_col0_adr = &pixel_error_col0_base_adr[dval*16];\
+		/* unroll loop for(int x = 0; block_error < best_error_so_far && x<16; x++) */\
+		{\
+			FIRSTCHOICE59R\
+			if( block_error < best_error_so_far)\
+			{\
+				CHOICE59R(1)\
+				if( block_error < best_error_so_far)\
+				{\
+					CHOICE59R(2)\
+					CHOICE59R(3)\
+					if( block_error < best_error_so_far)\
+					{\
+						CHOICE59R(4)\
+						CHOICE59R(5)\
+						if( block_error < best_error_so_far)\
+						{\
+							CHOICE59R(6)\
+							CHOICE59R(7)\
+							if( block_error < best_error_so_far)\
+							{\
+								CHOICE59R(8)\
+								CHOICE59R(9)\
+								if( block_error < best_error_so_far)\
+								{\
+									CHOICE59R(10)\
+									CHOICE59R(11)\
+									if( block_error < best_error_so_far)\
+									{\
+										CHOICE59R(12)\
+										CHOICE59R(13)\
+										if( block_error < best_error_so_far)\
+										{\
+											CHOICE59R(14)\
+											CHOICE59R(15)\
+										}\
+									}\
+								}\
+							}\
+						}\
+					}\
+				}\
+			}\
+		}\
+		if (block_error < best_block_error)\
+			best_block_error = block_error;\
+	
+	pixel_error_col0_base_adr = &precalc_err_col0_R[((colorsRGB444_packed[0]>>8)*8)*16];
+	pixel_error_col1_adr = &precalc_err_col1_R[((colorsRGB444_packed[1]>>8))*16];
+
+
+	// Test all distances
+	/* unroll loop for (uint8 d = 0; d < 8; d++) */
+	{
+		ONETABLE59R(0)
+		ONETABLE59R(1)
+		ONETABLE59R(2)
+		ONETABLE59R(3)
+		ONETABLE59R(4)
+		ONETABLE59R(5)
+		ONETABLE59R(6)
+		ONETABLE59R(7)
+	}
+	
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimal error for the T-mode when compressing exhaustively.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateError59TusingPrecalcRGperceptual1000(uint8* block, int *colorsRGB444_packed, unsigned int *precalc_err_col0_RG, unsigned int *precalc_err_col1_RG, unsigned int best_error_so_far) 
+{
+	unsigned int	block_error = 0, 
+					best_block_error = MAXERR1000;
+
+	unsigned int *pixel_error_col0_adr, *pixel_error_col1_adr;
+	unsigned int *pixel_error_col0_base_adr;
+
+#define FIRSTCHOICE59RG_PERCEP \
+		if(*pixel_error_col0_adr < *pixel_error_col1_adr)\
+			block_error = *pixel_error_col0_adr;\
+		else\
+			block_error = *pixel_error_col1_adr;\
+
+
+#define CHOICE59RG_PERCEP(xval) \
+		if(pixel_error_col0_adr[xval] < pixel_error_col1_adr[xval])\
+			block_error += pixel_error_col0_adr[xval];\
+		else\
+			block_error += pixel_error_col1_adr[xval];\
+
+#define ONETABLE59RG_PERCEP(dval)\
+		pixel_error_col0_adr = &pixel_error_col0_base_adr[dval*16];\
+		/* unroll loop for(int x = 0; block_error < best_error_so_far && x<16; x++) */\
+		{\
+			FIRSTCHOICE59RG_PERCEP\
+			if( block_error < best_error_so_far)\
+			{\
+				CHOICE59RG_PERCEP(1)\
+				if( block_error < best_error_so_far)\
+				{\
+					CHOICE59RG_PERCEP(2)\
+					CHOICE59RG_PERCEP(3)\
+					if( block_error < best_error_so_far)\
+					{\
+						CHOICE59RG_PERCEP(4)\
+						CHOICE59RG_PERCEP(5)\
+						if( block_error < best_error_so_far)\
+						{\
+							CHOICE59RG_PERCEP(6)\
+							CHOICE59RG_PERCEP(7)\
+							if( block_error < best_error_so_far)\
+							{\
+								CHOICE59RG_PERCEP(8)\
+								CHOICE59RG_PERCEP(9)\
+								if( block_error < best_error_so_far)\
+								{\
+									CHOICE59RG_PERCEP(10)\
+									CHOICE59RG_PERCEP(11)\
+									if( block_error < best_error_so_far)\
+									{\
+										CHOICE59RG_PERCEP(12)\
+										CHOICE59RG_PERCEP(13)\
+										if( block_error < best_error_so_far)\
+										{\
+											CHOICE59RG_PERCEP(14)\
+											CHOICE59RG_PERCEP(15)\
+										}\
+									}\
+								}\
+							}\
+						}\
+					}\
+				}\
+			}\
+		}\
+		if (block_error < best_block_error)\
+			best_block_error = block_error;\
+
+
+	pixel_error_col0_base_adr = &precalc_err_col0_RG[((colorsRGB444_packed[0]>>4)*8)*16];
+	pixel_error_col1_adr = &precalc_err_col1_RG[((colorsRGB444_packed[1]>>4))*16];
+
+	// Test all distances
+	/* unroll loop for (uint8 d = 0; d < 8; d++) */
+	{
+
+		ONETABLE59RG_PERCEP(0)
+		ONETABLE59RG_PERCEP(1)
+		ONETABLE59RG_PERCEP(2)
+		ONETABLE59RG_PERCEP(3)
+		ONETABLE59RG_PERCEP(4)
+		ONETABLE59RG_PERCEP(5)
+		ONETABLE59RG_PERCEP(6)
+		ONETABLE59RG_PERCEP(7)
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimal error for the T-mode when compressing exhaustively.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateError59TusingPrecalcRG(uint8* block, int *colorsRGB444_packed, unsigned int *precalc_err_col0_RG, unsigned int *precalc_err_col1_RG, unsigned int best_error_so_far) 
+{
+	unsigned int	block_error = 0, 
+					best_block_error = MAXIMUM_ERROR;
+
+	unsigned int *pixel_error_col0_adr, *pixel_error_col1_adr;
+	unsigned int *pixel_error_col0_base_adr;
+
+#define FIRSTCHOICE59RG \
+		if(*pixel_error_col0_adr < *pixel_error_col1_adr)\
+			block_error = *pixel_error_col0_adr;\
+		else\
+			block_error = *pixel_error_col1_adr;\
+
+#define CHOICE59RG(xval) \
+		if(pixel_error_col0_adr[xval] < pixel_error_col1_adr[xval])\
+			block_error += pixel_error_col0_adr[xval];\
+		else\
+			block_error += pixel_error_col1_adr[xval];\
+
+#define ONETABLE59RG(dval)\
+		pixel_error_col0_adr = &pixel_error_col0_base_adr[dval*16];\
+		/* unroll loop for(int x = 0; block_error < best_error_so_far && x<16; x++) */\
+		{\
+			FIRSTCHOICE59RG\
+			if( block_error < best_error_so_far)\
+			{\
+				CHOICE59RG(1)\
+				if( block_error < best_error_so_far)\
+				{\
+					CHOICE59RG(2)\
+					CHOICE59RG(3)\
+					if( block_error < best_error_so_far)\
+					{\
+						CHOICE59RG(4)\
+						CHOICE59RG(5)\
+						if( block_error < best_error_so_far)\
+						{\
+							CHOICE59RG(6)\
+							CHOICE59RG(7)\
+							if( block_error < best_error_so_far)\
+							{\
+								CHOICE59RG(8)\
+								CHOICE59RG(9)\
+								if( block_error < best_error_so_far)\
+								{\
+									CHOICE59RG(10)\
+									CHOICE59RG(11)\
+									if( block_error < best_error_so_far)\
+									{\
+										CHOICE59RG(12)\
+										CHOICE59RG(13)\
+										if( block_error < best_error_so_far)\
+										{\
+											CHOICE59RG(14)\
+											CHOICE59RG(15)\
+										}\
+									}\
+								}\
+							}\
+						}\
+					}\
+				}\
+			}\
+		}\
+		if (block_error < best_block_error)\
+			best_block_error = block_error;\
+
+	pixel_error_col0_base_adr = &precalc_err_col0_RG[((colorsRGB444_packed[0]>>4)*8)*16];
+	pixel_error_col1_adr = &precalc_err_col1_RG[((colorsRGB444_packed[1]>>4))*16];
+
+	// Test all distances
+	/* unroll loop for (uint8 d = 0; d < 8; d++) */
+	{
+		ONETABLE59RG(0)
+		ONETABLE59RG(1)
+		ONETABLE59RG(2)
+		ONETABLE59RG(3)
+		ONETABLE59RG(4)
+		ONETABLE59RG(5)
+		ONETABLE59RG(6)
+		ONETABLE59RG(7)
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimal error for the T-mode when compressing exhaustively.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateError59TusingPrecalcRGBperceptual1000(uint8* block, int *colorsRGB444_packed, unsigned int *precalc_err_col0_RGB, unsigned int *precalc_err_col1_RGB, unsigned int best_error_so_far) 
+{
+	unsigned int	block_error = 0, 
+					      best_block_error = MAXERR1000;
+	unsigned int *pixel_error_col0_adr, *pixel_error_col1_adr;
+	unsigned int *pixel_error_col0_base_adr;
+
+#define FIRSTCHOICE59_PERCEP \
+	if(*pixel_error_col0_adr < *pixel_error_col1_adr)\
+		block_error = *pixel_error_col0_adr;\
+	else\
+		block_error = *pixel_error_col1_adr;\
+
+#define CHOICE59_PERCEP(xval) \
+	if(pixel_error_col0_adr[xval] < pixel_error_col1_adr[xval])\
+		block_error += pixel_error_col0_adr[xval];\
+	else\
+		block_error += pixel_error_col1_adr[xval];\
+
+#define ONETABLE59T_PERCEP(dval)\
+		pixel_error_col0_adr = &pixel_error_col0_base_adr[dval*16];\
+		/* unroll for(int x = 0; block_error < best_error_so_far && x<16; x++) */\
+		{\
+			FIRSTCHOICE59_PERCEP\
+			if( block_error < best_error_so_far)\
+			{\
+				CHOICE59_PERCEP(1)\
+				if( block_error < best_error_so_far)\
+				{\
+					CHOICE59_PERCEP(2)\
+					CHOICE59_PERCEP(3)\
+					if( block_error < best_error_so_far)\
+					{\
+						CHOICE59_PERCEP(4)\
+						CHOICE59_PERCEP(5)\
+						if( block_error < best_error_so_far)\
+						{\
+							CHOICE59_PERCEP(6)\
+							CHOICE59_PERCEP(7)\
+							if( block_error < best_error_so_far)\
+							{\
+								CHOICE59_PERCEP(8)\
+								CHOICE59_PERCEP(9)\
+								if( block_error < best_error_so_far)\
+								{\
+									CHOICE59_PERCEP(10)\
+									CHOICE59_PERCEP(11)\
+									if( block_error < best_error_so_far)\
+									{\
+										CHOICE59_PERCEP(12)\
+										CHOICE59_PERCEP(13)\
+										if( block_error < best_error_so_far)\
+										{\
+											CHOICE59_PERCEP(14)\
+											CHOICE59_PERCEP(15)\
+										}\
+									}\
+								}\
+							}\
+						}\
+					}\
+				}\
+			}\
+		}\
+		if (block_error < best_block_error)\
+			best_block_error = block_error;\
+
+	pixel_error_col1_adr = &precalc_err_col1_RGB[(colorsRGB444_packed[1])*16];
+	pixel_error_col0_base_adr = &precalc_err_col0_RGB[(colorsRGB444_packed[0]*8)*16];
+
+	// Test all distances
+	/* unroll loop for (uint8 d = 0; d < 8; d++)*/
+	{
+		ONETABLE59T_PERCEP(0)
+		ONETABLE59T_PERCEP(1)
+		ONETABLE59T_PERCEP(2)
+		ONETABLE59T_PERCEP(3)
+		ONETABLE59T_PERCEP(4)
+		ONETABLE59T_PERCEP(5)
+		ONETABLE59T_PERCEP(6)
+		ONETABLE59T_PERCEP(7)
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimal error for the T-mode when compressing exhaustively.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateError59TusingPrecalcRGB(uint8* block, int *colorsRGB444_packed, unsigned int *precalc_err_col0_RGB, unsigned int *precalc_err_col1_RGB, unsigned int best_error_so_far) 
+{
+	unsigned int	block_error = 0, 
+					      best_block_error = MAXIMUM_ERROR;
+	unsigned int *pixel_error_col0_adr, *pixel_error_col1_adr;
+	unsigned int *pixel_error_col0_base_adr;
+
+#define FIRSTCHOICE59 \
+	if(*pixel_error_col0_adr < *pixel_error_col1_adr)\
+		block_error = *pixel_error_col0_adr;\
+	else\
+		block_error = *pixel_error_col1_adr;\
+
+#define CHOICE59(xval) \
+	if(pixel_error_col0_adr[xval] < pixel_error_col1_adr[xval])\
+		block_error += pixel_error_col0_adr[xval];\
+	else\
+		block_error += pixel_error_col1_adr[xval];\
+
+#define ONETABLE59T(dval)\
+		pixel_error_col0_adr = &pixel_error_col0_base_adr[dval*16];\
+		/* unroll for(int x = 0; block_error < best_error_so_far && x<16; x++) */\
+		{\
+			FIRSTCHOICE59\
+			if( block_error < best_error_so_far)\
+			{\
+				CHOICE59(1)\
+				if( block_error < best_error_so_far)\
+				{\
+					CHOICE59(2)\
+					CHOICE59(3)\
+					if( block_error < best_error_so_far)\
+					{\
+						CHOICE59(4)\
+						CHOICE59(5)\
+						if( block_error < best_error_so_far)\
+						{\
+							CHOICE59(6)\
+							CHOICE59(7)\
+							if( block_error < best_error_so_far)\
+							{\
+								CHOICE59(8)\
+								CHOICE59(9)\
+								if( block_error < best_error_so_far)\
+								{\
+									CHOICE59(10)\
+									CHOICE59(11)\
+									if( block_error < best_error_so_far)\
+									{\
+										CHOICE59(12)\
+										CHOICE59(13)\
+										if( block_error < best_error_so_far)\
+										{\
+											CHOICE59(14)\
+											CHOICE59(15)\
+										}\
+									}\
+								}\
+							}\
+						}\
+					}\
+				}\
+			}\
+		}\
+		if (block_error < best_block_error)\
+			best_block_error = block_error;\
+
+	pixel_error_col1_adr = &precalc_err_col1_RGB[(colorsRGB444_packed[1])*16];
+	pixel_error_col0_base_adr = &precalc_err_col0_RGB[(colorsRGB444_packed[0]*8)*16];
+
+	// Test all distances
+	/* unroll loop for (uint8 d = 0; d < 8; d++)*/
+	{
+		ONETABLE59T(0)
+		ONETABLE59T(1)
+		ONETABLE59T(2)
+		ONETABLE59T(3)
+		ONETABLE59T(4)
+		ONETABLE59T(5)
+		ONETABLE59T(6)
+		ONETABLE59T(7)
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// The below code should compress the block to 59 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+//
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// Note that this method might not return the best possible compression for the T-mode. It will only do so if the best possible T-representation
+// is less than best_error_so_far. To guarantee that the best possible T-representation is found, the function should be called using
+// best_error_so_far = 255*255*3*16, which is the maximum error for a block. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockTHUMB59TExhaustivePerceptual(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, unsigned int best_error_so_far) 
+{
+	uint8 colorsRGB444[2][3];
+	unsigned int pixel_indices;
+	uint8 distance;
+
+	uint8 block[4*4*4];
+
+	unsigned int *precalc_err_col0_RGB;
+	unsigned int *precalc_err_col1_RGB;
+	unsigned int *precalc_err_col0_RG;
+	unsigned int *precalc_err_col1_RG;
+	unsigned int *precalc_err_col0_R;
+	unsigned int *precalc_err_col1_R;
+
+	int colorRGB444_packed;
+
+	int colorsRGB444_packed[2];
+	int best_colorsRGB444_packed[2];
+
+	unsigned int best_error_using_Tmode;
+
+	// First compress block quickly to a resonable quality so that we can
+	// rule out all blocks that are of worse quality than that. 
+	best_error_using_Tmode = (unsigned int) compressBlockTHUMB59TFastestOnlyColorPerceptual1000(img, width, height, startx, starty, best_colorsRGB444_packed);
+	if(best_error_using_Tmode < best_error_so_far)
+		best_error_so_far = best_error_using_Tmode;
+
+	// Color numbering is reversed between the above function and the precalc functions below; swap colors.
+	int temp = best_colorsRGB444_packed[0];
+	best_colorsRGB444_packed[0] = best_colorsRGB444_packed[1];
+	best_colorsRGB444_packed[1] = temp;
+
+	int xx,yy,count = 0;
+
+	// Use 4 bytes per pixel to make it 32-word aligned.
+	for(xx = 0; xx<4; xx++)
+	{
+		for(yy=0; yy<4; yy++)
+		{
+			block[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+	// Precalculate error for color 0 (which produces the upper half of the T)
+	precalc_err_col0_RGB = (unsigned int*) malloc(4096*8*16*sizeof(unsigned int));
+	if(!precalc_err_col0_RGB){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed++)
+	{
+		precalcError59T_col0_RGBpercep1000(block, colorRGB444_packed, precalc_err_col0_RGB);
+	}
+
+	// Precalculate error for color 1 (which produces the lower half of the T -- the lone color)
+	precalc_err_col1_RGB = (unsigned int*) malloc(4096*16*sizeof(unsigned int));
+	if(!precalc_err_col1_RGB){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed++)
+	{
+		precalcError59T_col1_RGBpercep1000(block, colorRGB444_packed, precalc_err_col1_RGB);
+	}
+
+	precalc_err_col0_RG = (unsigned int*) malloc(16*16*8*16*sizeof(unsigned int));
+	if(!precalc_err_col0_RG){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16)
+	{
+		precalcError59T_col0_RGpercep1000(block, colorRGB444_packed, precalc_err_col0_RG);
+	}
+
+	precalc_err_col1_RG = (unsigned int*) malloc(16*16*16*sizeof(unsigned int));
+	if(!precalc_err_col1_RG){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16)
+	{
+		precalcError59T_col1_RGpercep1000(block, colorRGB444_packed, precalc_err_col1_RG);
+	}
+
+	precalc_err_col0_R = (unsigned int*) malloc(16*8*16*sizeof(unsigned int));
+	if(!precalc_err_col0_R){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16*16)
+	{
+		precalcError59T_col0_Rpercep1000(block, colorRGB444_packed, precalc_err_col0_R);
+	}
+
+	precalc_err_col1_R = (unsigned int*) malloc(16*16*sizeof(unsigned int));
+	if(!precalc_err_col1_R){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16*16)
+	{
+		precalcError59T_col1_Rpercep1000(block, colorRGB444_packed, precalc_err_col1_R);
+	}
+
+	unsigned int error;
+	unsigned int avoided = 0;
+	unsigned int notavoided = 0;
+
+	for(colorsRGB444[0][0] = 0; colorsRGB444[0][0] < 16; colorsRGB444[0][0]++)
+	{
+		for(colorsRGB444[1][0] = 0; colorsRGB444[1][0] < 16; colorsRGB444[1][0]++)
+		{
+			colorsRGB444_packed[0] = (colorsRGB444[0][0] << 8);
+			colorsRGB444_packed[1] = (colorsRGB444[1][0] << 8);
+			error = calculateError59TusingPrecalcRperceptual1000(block, colorsRGB444_packed, precalc_err_col0_R, precalc_err_col1_R, best_error_so_far);
+			if(error < best_error_so_far)
+			{
+				notavoided = notavoided + 1;
+				for(colorsRGB444[0][1] = 0; colorsRGB444[0][1] < 16; colorsRGB444[0][1]++)
+				{
+					colorsRGB444_packed[0] = (colorsRGB444[0][0] << 8) + (colorsRGB444[0][1] <<4);
+					for(colorsRGB444[1][1] = 0; colorsRGB444[1][1] < 16; colorsRGB444[1][1]++)
+					{
+						colorsRGB444_packed[1] = (colorsRGB444[1][0] << 8) + (colorsRGB444[1][1] <<4);
+						error = calculateError59TusingPrecalcRGperceptual1000(block, colorsRGB444_packed, precalc_err_col0_RG, precalc_err_col1_RG, best_error_so_far);
+						if(error < best_error_so_far)
+						{
+							for(colorsRGB444[0][2] = 0; colorsRGB444[0][2] < 16; colorsRGB444[0][2]++)
+							{
+								colorsRGB444_packed[0] = (colorsRGB444[0][0] << 8) + (colorsRGB444[0][1] <<4) + colorsRGB444[0][2];
+								for(colorsRGB444[1][2] = 0; colorsRGB444[1][2] < 16; colorsRGB444[1][2]++)
+								{
+									colorsRGB444_packed[1] = (colorsRGB444[1][0] << 8) + (colorsRGB444[1][1] <<4) + colorsRGB444[1][2];
+									error = calculateError59TusingPrecalcRGBperceptual1000(block, colorsRGB444_packed, precalc_err_col0_RGB, precalc_err_col1_RGB, best_error_so_far);
+
+									if(error < best_error_so_far)
+									{
+										best_error_so_far = error;
+										best_error_using_Tmode = error;
+										best_colorsRGB444_packed[0] = colorsRGB444_packed[0];
+										best_colorsRGB444_packed[1] = colorsRGB444_packed[1];
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	free(precalc_err_col0_RGB);
+	free(precalc_err_col1_RGB);
+	free(precalc_err_col0_RG);
+	free(precalc_err_col1_RG);
+	free(precalc_err_col0_R);
+	free(precalc_err_col1_R);
+
+	// We have got the two best colors. Now find the best distance and pixel indices. 
+
+	// Color numbering are reversed between precalc and noSwap
+	colorsRGB444[0][0] = (best_colorsRGB444_packed[1] >> 8) & 0xf;
+	colorsRGB444[0][1] = (best_colorsRGB444_packed[1] >> 4) & 0xf;
+	colorsRGB444[0][2] = (best_colorsRGB444_packed[1] >> 0) & 0xf;
+	
+	colorsRGB444[1][0] = (best_colorsRGB444_packed[0] >> 8) & 0xf;
+	colorsRGB444[1][1] = (best_colorsRGB444_packed[0] >> 4) & 0xf;
+	colorsRGB444[1][2] = (best_colorsRGB444_packed[0] >> 0) & 0xf;
+
+	calculateError59TnoSwapPerceptual1000(img, width, startx, starty, colorsRGB444, distance, pixel_indices);			
+
+	// Put the compress params into the compression block 
+	packBlock59T(colorsRGB444, distance, pixel_indices, compressed1, compressed2);
+
+	return best_error_using_Tmode;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// The below code should compress the block to 59 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+//
+//|63 62 61 60 59|58 57 56 55|54 53 52 51|50 49 48 47|46 45 44 43|42 41 40 39|38 37 36 35|34 33 32|
+//|----empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|--dist--|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// Note that this method might not return the best possible compression for the T-mode. It will only do so if the best possible T-representation
+// is less than best_error_so_far. To guarantee that the best possible T-representation is found, the function should be called using
+// best_error_so_far = 255*255*3*16, which is the maximum error for a block. 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockTHUMB59TExhaustive(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, unsigned int best_error_so_far) 
+{
+	uint8 colorsRGB444[2][3];
+	unsigned int pixel_indices;
+	uint8 distance;
+
+	uint8 block[4*4*4];
+
+	unsigned int *precalc_err_col0_RGB;
+	unsigned int *precalc_err_col1_RGB;
+	unsigned int *precalc_err_col0_RG;
+	unsigned int *precalc_err_col1_RG;
+	unsigned int *precalc_err_col0_R;
+	unsigned int *precalc_err_col1_R;
+
+	int colorRGB444_packed;
+
+	int colorsRGB444_packed[2];
+	int best_colorsRGB444_packed[2];
+
+	unsigned int best_error_using_Tmode;
+
+	// First compress block quickly to a resonable quality so that we can
+	// rule out all blocks that are of worse quality than that. 
+	best_error_using_Tmode = (unsigned int) compressBlockTHUMB59TFastestOnlyColor(img, width, height, startx, starty, best_colorsRGB444_packed);
+	if(best_error_using_Tmode < best_error_so_far)
+		best_error_so_far = best_error_using_Tmode;
+
+
+	// Colors numbering is reversed between the above function and the precalc below:
+	int temp = best_colorsRGB444_packed[0];
+	best_colorsRGB444_packed[0] = best_colorsRGB444_packed[1];
+	best_colorsRGB444_packed[1] = temp;
+
+	int xx,yy,count = 0;
+
+	// Use 4 bytes per pixel to make it 32-word aligned.
+	for(xx = 0; xx<4; xx++)
+	{
+		for(yy=0; yy<4; yy++)
+		{
+			block[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+	// Precalculate error for color 0 (which produces the upper half of the T)
+	precalc_err_col0_RGB = (unsigned int*) malloc(4096*8*16*sizeof(unsigned int));
+	if(!precalc_err_col0_RGB){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed++)
+	{
+		precalcError59T_col0_RGB(block, colorRGB444_packed, precalc_err_col0_RGB);
+	}
+
+	// Precalculate error for color 1 (which produces the lower half of the T -- the lone color)
+	precalc_err_col1_RGB = (unsigned int*) malloc(4096*16*sizeof(unsigned int));
+	if(!precalc_err_col1_RGB){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed++)
+	{
+		precalcError59T_col1_RGB(block, colorRGB444_packed, precalc_err_col1_RGB);
+	}
+
+	precalc_err_col0_RG = (unsigned int*) malloc(16*16*8*16*sizeof(unsigned int));
+	if(!precalc_err_col0_RG){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16)
+	{
+		precalcError59T_col0_RG(block, colorRGB444_packed, precalc_err_col0_RG);
+	}
+
+	precalc_err_col1_RG = (unsigned int*) malloc(16*16*16*sizeof(unsigned int));
+	if(!precalc_err_col1_RG){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16)
+	{
+		precalcError59T_col1_RG(block, colorRGB444_packed, precalc_err_col1_RG);
+	}
+
+	precalc_err_col0_R = (unsigned int*) malloc(16*8*16*sizeof(unsigned int));
+	if(!precalc_err_col0_R){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16*16)
+	{
+		precalcError59T_col0_R(block, colorRGB444_packed, precalc_err_col0_R);
+	}
+
+	precalc_err_col1_R = (unsigned int*) malloc(16*16*sizeof(unsigned int));
+	if(!precalc_err_col1_R){printf("Out of memory allocating \n");exit(1);}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16*16)
+	{
+		precalcError59T_col1_R(block, colorRGB444_packed, precalc_err_col1_R);
+	}
+
+	unsigned int error;
+	unsigned int avoided = 0;
+	unsigned int notavoided = 0;
+
+	for(colorsRGB444[0][0] = 0; colorsRGB444[0][0] < 16; colorsRGB444[0][0]++)
+	{
+		for(colorsRGB444[1][0] = 0; colorsRGB444[1][0] < 16; colorsRGB444[1][0]++)
+		{
+			colorsRGB444_packed[0] = (colorsRGB444[0][0] << 8);
+			colorsRGB444_packed[1] = (colorsRGB444[1][0] << 8);
+			error = calculateError59TusingPrecalcR(block, colorsRGB444_packed, precalc_err_col0_R, precalc_err_col1_R, best_error_so_far);
+			if(error < best_error_so_far)
+			{
+				notavoided = notavoided + 1;
+				for(colorsRGB444[0][1] = 0; colorsRGB444[0][1] < 16; colorsRGB444[0][1]++)
+				{
+					colorsRGB444_packed[0] = (colorsRGB444[0][0] << 8) + (colorsRGB444[0][1] <<4);
+					for(colorsRGB444[1][1] = 0; colorsRGB444[1][1] < 16; colorsRGB444[1][1]++)
+					{
+						colorsRGB444_packed[1] = (colorsRGB444[1][0] << 8) + (colorsRGB444[1][1] <<4);
+						error = calculateError59TusingPrecalcRG(block, colorsRGB444_packed, precalc_err_col0_RG, precalc_err_col1_RG, best_error_so_far);
+						if(error < best_error_so_far)
+						{
+							for(colorsRGB444[0][2] = 0; colorsRGB444[0][2] < 16; colorsRGB444[0][2]++)
+							{
+								colorsRGB444_packed[0] = (colorsRGB444[0][0] << 8) + (colorsRGB444[0][1] <<4) + colorsRGB444[0][2];
+								for(colorsRGB444[1][2] = 0; colorsRGB444[1][2] < 16; colorsRGB444[1][2]++)
+								{
+									colorsRGB444_packed[1] = (colorsRGB444[1][0] << 8) + (colorsRGB444[1][1] <<4) + colorsRGB444[1][2];
+									error = calculateError59TusingPrecalcRGB(block, colorsRGB444_packed, precalc_err_col0_RGB, precalc_err_col1_RGB, best_error_so_far);
+
+									if(error < best_error_so_far)
+									{
+										best_error_so_far = error;
+										best_error_using_Tmode = error;
+										best_colorsRGB444_packed[0] = colorsRGB444_packed[0];
+										best_colorsRGB444_packed[1] = colorsRGB444_packed[1];
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	free(precalc_err_col0_RGB);
+	free(precalc_err_col1_RGB);
+	free(precalc_err_col0_RG);
+	free(precalc_err_col1_RG);
+	free(precalc_err_col0_R);
+	free(precalc_err_col1_R);
+
+	// We have got the two best colors. Now find the best distance and pixel indices. 
+
+	// Color numbering are reversed between precalc and noSwap
+	colorsRGB444[0][0] = (best_colorsRGB444_packed[1] >> 8) & 0xf;
+	colorsRGB444[0][1] = (best_colorsRGB444_packed[1] >> 4) & 0xf;
+	colorsRGB444[0][2] = (best_colorsRGB444_packed[1] >> 0) & 0xf;
+	
+	colorsRGB444[1][0] = (best_colorsRGB444_packed[0] >> 8) & 0xf;
+	colorsRGB444[1][1] = (best_colorsRGB444_packed[0] >> 4) & 0xf;
+	colorsRGB444[1][2] = (best_colorsRGB444_packed[0] >> 0) & 0xf;
+
+	calculateError59TnoSwap(img, width, startx, starty, colorsRGB444, distance, pixel_indices);			
+
+	// Put the compress params into the compression block 
+	packBlock59T(colorsRGB444, distance, pixel_indices, compressed1, compressed2);
+
+	return best_error_using_Tmode;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates tables used in the exhaustive compression of the H-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcErrorR_58Hperceptual1000(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3],int colorRGB444_packed, unsigned int *precalc_errR) 
+{
+	unsigned int block_error = 0, 
+		   best_block_error = MAXERR1000, 
+		   pixel_error, 
+		   best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 possible_colors[2][3];
+	uint8 colors[2][3];
+	
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);
+
+	// Test all distances
+	for (uint8 d = 0; d < BINPOW(TABLE_BITS_58H); ++d) 
+	{
+		possible_colors[0][R] = CLAMP(0,colors[0][R] - table58H[d],255);
+		possible_colors[1][R] = CLAMP(0,colors[0][R] + table58H[d],255);
+
+		block_error = 0;	
+		pixel_colors = 0;
+
+		// Loop block
+		for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+			{
+				best_pixel_error = MAXERR1000;
+
+				// Loop possible block colors
+				for (uint8 c = 0; c < 2; ++c) 
+				{
+					diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+
+					pixel_error =	PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff[R]);
+
+					// Choose best error
+					if (pixel_error < best_pixel_error) 
+					{
+						best_pixel_error = pixel_error;
+					} 
+				}
+				precalc_errR[((colorRGB444_packed>>8)*8 + d)*16 + (y*4)+x] = (unsigned int) best_pixel_error;
+			}
+		}
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates tables used in the exhaustive compression of the H-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcErrorR_58H(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3],int colorRGB444_packed, unsigned int *precalc_errR) 
+{
+	double block_error = 0, 
+		   best_block_error = MAXIMUM_ERROR, 
+		   pixel_error, 
+		   best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 possible_colors[2][3];
+	uint8 colors[2][3];
+	
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);
+
+	// Test all distances
+	for (uint8 d = 0; d < BINPOW(TABLE_BITS_58H); ++d) 
+	{
+		possible_colors[0][R] = CLAMP(0,colors[0][R] - table58H[d],255);
+		possible_colors[1][R] = CLAMP(0,colors[0][R] + table58H[d],255);
+
+		block_error = 0;	
+		pixel_colors = 0;
+
+		// Loop block
+		for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+			{
+				best_pixel_error = MAXIMUM_ERROR;
+
+				// Loop possible block colors
+				for (uint8 c = 0; c < 2; ++c) 
+				{
+					diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+
+					pixel_error =	weight[R]*SQUARE(diff[R]);
+
+					// Choose best error
+					if (pixel_error < best_pixel_error) 
+					{
+						best_pixel_error = pixel_error;
+					} 
+				}
+				precalc_errR[((colorRGB444_packed>>8)*8 + d)*16 + (y*4)+x] = (unsigned int) best_pixel_error;
+			}
+		}
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates tables used in the exhaustive compression of the H-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcErrorRG_58Hperceptual1000(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3],int colorRGB444_packed, unsigned int *precalc_errRG) 
+{
+	unsigned int block_error = 0, 
+		   best_block_error = MAXERR1000,
+		   pixel_error, 
+		   best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 possible_colors[2][3];
+	uint8 colors[2][3];
+	
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);
+
+	// Test all distances
+	for (uint8 d = 0; d < BINPOW(TABLE_BITS_58H); ++d) 
+	{
+		possible_colors[0][R] = CLAMP(0,colors[0][R] - table58H[d],255);
+		possible_colors[0][G] = CLAMP(0,colors[0][G] - table58H[d],255);
+		possible_colors[1][R] = CLAMP(0,colors[0][R] + table58H[d],255);
+		possible_colors[1][G] = CLAMP(0,colors[0][G] + table58H[d],255);
+
+		block_error = 0;	
+		pixel_colors = 0;
+
+		// Loop block
+		for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+			{
+				best_pixel_error = MAXERR1000;
+
+				// Loop possible block colors
+				for (uint8 c = 0; c < 2; ++c) 
+				{
+					diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+					diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+
+					pixel_error =	PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*SQUARE(diff[R]) +
+									PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*SQUARE(diff[G]);
+
+					// Choose best error
+					if (pixel_error < best_pixel_error) 
+					{
+						best_pixel_error = pixel_error;
+					} 
+				}
+				precalc_errRG[((colorRGB444_packed>>4)*8 + d)*16 + (y*4)+x] = (unsigned int) best_pixel_error;
+			}
+		}
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates tables used in the exhaustive compression of the H-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcErrorRG_58H(uint8* srcimg, int width, int startx, int starty, uint8 (colorsRGB444)[2][3],int colorRGB444_packed, unsigned int *precalc_errRG) 
+{
+	double block_error = 0, 
+		   best_block_error = MAXIMUM_ERROR, 
+		   pixel_error, 
+		   best_pixel_error;
+	int diff[3];
+	unsigned int pixel_colors;
+	uint8 possible_colors[2][3];
+	uint8 colors[2][3];
+	
+	decompressColor(R_BITS58H, G_BITS58H, B_BITS58H, colorsRGB444, colors);
+
+	// Test all distances
+	for (uint8 d = 0; d < BINPOW(TABLE_BITS_58H); ++d) 
+	{
+		possible_colors[0][R] = CLAMP(0,colors[0][R] - table58H[d],255);
+		possible_colors[0][G] = CLAMP(0,colors[0][G] - table58H[d],255);
+		possible_colors[1][R] = CLAMP(0,colors[0][R] + table58H[d],255);
+		possible_colors[1][G] = CLAMP(0,colors[0][G] + table58H[d],255);
+
+		block_error = 0;	
+		pixel_colors = 0;
+
+		// Loop block
+		for (size_t y = 0; y < BLOCKHEIGHT; ++y) 
+		{
+			for (size_t x = 0; x < BLOCKWIDTH; ++x) 
+			{
+				best_pixel_error = MAXIMUM_ERROR;
+
+				// Loop possible block colors
+				for (uint8 c = 0; c < 2; ++c) 
+				{
+					diff[R] = srcimg[3*((starty+y)*width+startx+x)+R] - CLAMP(0,possible_colors[c][R],255);
+					diff[G] = srcimg[3*((starty+y)*width+startx+x)+G] - CLAMP(0,possible_colors[c][G],255);
+
+					pixel_error =	weight[R]*SQUARE(diff[R]) +
+									weight[G]*SQUARE(diff[G]);
+
+					// Choose best error
+					if (pixel_error < best_pixel_error) 
+					{
+						best_pixel_error = pixel_error;
+					} 
+				}
+				precalc_errRG[((colorRGB444_packed>>4)*8 + d)*16 + (y*4)+x] = (unsigned int) best_pixel_error;
+			}
+		}
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in the exhaustive compression of the H-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError58Hperceptual1000(uint8* block, uint8 (colorsRGB444)[2][3],int colorRGB444_packed, unsigned int *precalc_err) 
+{
+	unsigned int pixel_error, 
+		   best_pixel_error;
+	int possible_colors[2][3];
+	uint8 colors[2][3];
+	unsigned int *precalc_err_tab;
+ 	int red_original;
+ 	int green_original;
+    int	blue_original;
+
+#define PRECALC_ONE_58H_PERCEP(qvalue)\
+  			red_original = block[qvalue*4];\
+  			green_original = block[qvalue*4+1];\
+  			blue_original = block[qvalue*4+2];\
+			/* unroll loop for (color = 0; color< 2; color++) */\
+ 	 			best_pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*square_table[(possible_colors[0][R] - red_original)] \
+				                 + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*square_table[(possible_colors[0][G] - green_original)]\
+								 + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[(possible_colors[0][B] - blue_original)];\
+ 	 			pixel_error = PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000*square_table[(possible_colors[1][R] - red_original)]\
+	                        + PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000*square_table[(possible_colors[1][G] - green_original)]\
+						    + PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000*square_table[(possible_colors[1][B] - blue_original)];\
+				if (pixel_error < best_pixel_error)\
+					best_pixel_error = pixel_error;\
+			/* end unroll loop */\
+			precalc_err_tab[qvalue] = best_pixel_error;\
+
+#define PRECALC_ONE_TABLE_58H_PERCEP(dvalue)\
+		precalc_err_tab = &precalc_err[((colorRGB444_packed*8)+dvalue)*16];\
+		possible_colors[0][R] = CLAMP_LEFT_ZERO(colors[0][R] - table58H[dvalue])+255;\
+		possible_colors[0][G] = CLAMP_LEFT_ZERO(colors[0][G] - table58H[dvalue])+255;\
+		possible_colors[0][B] = CLAMP_LEFT_ZERO(colors[0][B] - table58H[dvalue])+255;\
+		possible_colors[1][R] = CLAMP_RIGHT_255(colors[0][R] + table58H[dvalue])+255;\
+		possible_colors[1][G] = CLAMP_RIGHT_255(colors[0][G] + table58H[dvalue])+255;\
+ 		possible_colors[1][B] = CLAMP_RIGHT_255(colors[0][B] + table58H[dvalue])+255;\
+		/* unrolled loop for(q = 0; q<16; q++)*/\
+			PRECALC_ONE_58H_PERCEP(0)\
+			PRECALC_ONE_58H_PERCEP(1)\
+			PRECALC_ONE_58H_PERCEP(2)\
+			PRECALC_ONE_58H_PERCEP(3)\
+			PRECALC_ONE_58H_PERCEP(4)\
+			PRECALC_ONE_58H_PERCEP(5)\
+			PRECALC_ONE_58H_PERCEP(6)\
+			PRECALC_ONE_58H_PERCEP(7)\
+			PRECALC_ONE_58H_PERCEP(8)\
+			PRECALC_ONE_58H_PERCEP(9)\
+			PRECALC_ONE_58H_PERCEP(10)\
+			PRECALC_ONE_58H_PERCEP(11)\
+			PRECALC_ONE_58H_PERCEP(12)\
+			PRECALC_ONE_58H_PERCEP(13)\
+			PRECALC_ONE_58H_PERCEP(14)\
+			PRECALC_ONE_58H_PERCEP(15)\
+		/* end unroll loop */\
+
+ 	colors[0][R] = (colorsRGB444[0][R] << 4) | colorsRGB444[0][R];
+ 	colors[0][G] = (colorsRGB444[0][G] << 4) | colorsRGB444[0][G];
+ 	colors[0][B] = (colorsRGB444[0][B] << 4) | colorsRGB444[0][B];
+
+	// Test all distances
+	/* unroll loop for (uint8 d = 0; d < 8; ++d) */
+
+		PRECALC_ONE_TABLE_58H_PERCEP(0)
+		PRECALC_ONE_TABLE_58H_PERCEP(1)
+		PRECALC_ONE_TABLE_58H_PERCEP(2)
+		PRECALC_ONE_TABLE_58H_PERCEP(3)
+		PRECALC_ONE_TABLE_58H_PERCEP(4)
+		PRECALC_ONE_TABLE_58H_PERCEP(5)
+		PRECALC_ONE_TABLE_58H_PERCEP(6)
+		PRECALC_ONE_TABLE_58H_PERCEP(7)
+
+	/* end unroll loop */
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Precalculates a table used in the exhaustive compression of the H-mode.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void precalcError58H(uint8* block, uint8 (colorsRGB444)[2][3],int colorRGB444_packed, unsigned int *precalc_err) 
+{
+	unsigned int pixel_error, 
+		   best_pixel_error;
+	int possible_colors[2][3];
+	uint8 colors[2][3];
+	unsigned int *precalc_err_tab;
+ 	int red_original;
+ 	int green_original;
+    int	blue_original;
+
+#define PRECALC_ONE_58H(qvalue)\
+  			red_original = block[qvalue*4];\
+  			green_original = block[qvalue*4+1];\
+  			blue_original = block[qvalue*4+2];\
+			/* unroll loop for (color = 0; color< 2; color++) */\
+ 	 			best_pixel_error = square_table[(possible_colors[0][R] - red_original)] + square_table[(possible_colors[0][G] - green_original)] + square_table[(possible_colors[0][B] - blue_original)];\
+ 	 			pixel_error = square_table[(possible_colors[1][R] - red_original)] + square_table[(possible_colors[1][G] - green_original)] + square_table[(possible_colors[1][B] - blue_original)];\
+				if (pixel_error < best_pixel_error)\
+					best_pixel_error = pixel_error;\
+			/* end unroll loop */\
+			precalc_err_tab[qvalue] = best_pixel_error;\
+
+#define PRECALC_ONE_TABLE_58H(dvalue)\
+		precalc_err_tab = &precalc_err[((colorRGB444_packed*8)+dvalue)*16];\
+		possible_colors[0][R] = CLAMP_LEFT_ZERO(colors[0][R] - table58H[dvalue])+255;\
+		possible_colors[0][G] = CLAMP_LEFT_ZERO(colors[0][G] - table58H[dvalue])+255;\
+		possible_colors[0][B] = CLAMP_LEFT_ZERO(colors[0][B] - table58H[dvalue])+255;\
+		possible_colors[1][R] = CLAMP_RIGHT_255(colors[0][R] + table58H[dvalue])+255;\
+		possible_colors[1][G] = CLAMP_RIGHT_255(colors[0][G] + table58H[dvalue])+255;\
+ 		possible_colors[1][B] = CLAMP_RIGHT_255(colors[0][B] + table58H[dvalue])+255;\
+		/* unrolled loop for(q = 0; q<16; q++)*/\
+			PRECALC_ONE_58H(0)\
+			PRECALC_ONE_58H(1)\
+			PRECALC_ONE_58H(2)\
+			PRECALC_ONE_58H(3)\
+			PRECALC_ONE_58H(4)\
+			PRECALC_ONE_58H(5)\
+			PRECALC_ONE_58H(6)\
+			PRECALC_ONE_58H(7)\
+			PRECALC_ONE_58H(8)\
+			PRECALC_ONE_58H(9)\
+			PRECALC_ONE_58H(10)\
+			PRECALC_ONE_58H(11)\
+			PRECALC_ONE_58H(12)\
+			PRECALC_ONE_58H(13)\
+			PRECALC_ONE_58H(14)\
+			PRECALC_ONE_58H(15)\
+		/* end unroll loop */\
+
+ 	colors[0][R] = (colorsRGB444[0][R] << 4) | colorsRGB444[0][R];
+ 	colors[0][G] = (colorsRGB444[0][G] << 4) | colorsRGB444[0][G];
+ 	colors[0][B] = (colorsRGB444[0][B] << 4) | colorsRGB444[0][B];
+
+	// Test all distances
+	/* unroll loop for (uint8 d = 0; d < 8; ++d) */
+
+		PRECALC_ONE_TABLE_58H(0)
+		PRECALC_ONE_TABLE_58H(1)
+		PRECALC_ONE_TABLE_58H(2)
+		PRECALC_ONE_TABLE_58H(3)
+		PRECALC_ONE_TABLE_58H(4)
+		PRECALC_ONE_TABLE_58H(5)
+		PRECALC_ONE_TABLE_58H(6)
+		PRECALC_ONE_TABLE_58H(7)
+
+	/* end unroll loop */
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimum error for the H-mode when doing exhaustive compression.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateErrorFromPrecalcR58Hperceptual1000(int *colorsRGB444_packed, unsigned int *precalc_errR, unsigned int best_err_so_far) 
+{
+	unsigned int block_error = 0;
+	unsigned int best_block_error = MAXERR1000;
+	unsigned int *precalc_col1, *precalc_col2;
+	unsigned int *precalc_col1tab, *precalc_col2tab;
+
+	precalc_col1 = &precalc_errR[(colorsRGB444_packed[0]>>8)*8*16];
+	precalc_col2 = &precalc_errR[(colorsRGB444_packed[1]>>8)*8*16];
+
+#define CHOICE_R58H_PERCEP(value)\
+	if(precalc_col1tab[value] < precalc_col2tab[value])\
+		block_error += precalc_col1tab[value];\
+	else\
+		block_error += precalc_col2tab[value];\
+
+	// Test all distances
+	for (uint8 d = 0; d < 8; ++d) 
+	{
+		block_error = 0;	
+		precalc_col1tab = &precalc_col1[d*16];
+		precalc_col2tab = &precalc_col2[d*16];
+		// Loop block
+
+		/* unroll loop for(q = 0; q<16 && block_error < best_err_so_far; q++) */
+		CHOICE_R58H_PERCEP(0)
+		if( block_error < best_err_so_far )
+		{
+			CHOICE_R58H_PERCEP(1)
+			if( block_error < best_err_so_far )
+			{
+				CHOICE_R58H_PERCEP(2)
+				if( block_error < best_err_so_far )
+				{
+					CHOICE_R58H_PERCEP(3)
+					if( block_error < best_err_so_far )
+					{
+						CHOICE_R58H_PERCEP(4)
+						if( block_error < best_err_so_far )
+						{
+							CHOICE_R58H_PERCEP(5)
+							if( block_error < best_err_so_far )
+							{
+								CHOICE_R58H_PERCEP(6)
+								if( block_error < best_err_so_far )
+								{
+									CHOICE_R58H_PERCEP(7)
+									if( block_error < best_err_so_far )
+									{
+										CHOICE_R58H_PERCEP(8)
+										if( block_error < best_err_so_far )
+										{
+											CHOICE_R58H_PERCEP(9)
+											if( block_error < best_err_so_far )
+											{
+												CHOICE_R58H_PERCEP(10)
+												if( block_error < best_err_so_far )
+												{
+													CHOICE_R58H_PERCEP(11)
+													if( block_error < best_err_so_far )
+													{
+														CHOICE_R58H_PERCEP(12)
+														if( block_error < best_err_so_far )
+														{
+															CHOICE_R58H_PERCEP(13)
+															if( block_error < best_err_so_far )
+															{
+																CHOICE_R58H_PERCEP(14)
+																if( block_error < best_err_so_far )
+																{
+																	CHOICE_R58H_PERCEP(15)
+																}
+															}
+														}
+													}
+												}
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+		/* end unroll loop */
+
+		if (block_error < best_block_error) 
+			best_block_error = block_error;
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimum error for the H-mode when doing exhaustive compression.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateErrorFromPrecalcR58H(int *colorsRGB444_packed, unsigned int *precalc_errR, unsigned int best_err_so_far) 
+{
+	unsigned int block_error = 0;
+	unsigned int best_block_error = MAXIMUM_ERROR;
+	unsigned int *precalc_col1, *precalc_col2;
+	unsigned int *precalc_col1tab, *precalc_col2tab;
+
+	precalc_col1 = &precalc_errR[(colorsRGB444_packed[0]>>8)*8*16];
+	precalc_col2 = &precalc_errR[(colorsRGB444_packed[1]>>8)*8*16];
+
+#define CHOICE_R58H(value)\
+	if(precalc_col1tab[value] < precalc_col2tab[value])\
+		block_error += precalc_col1tab[value];\
+	else\
+		block_error += precalc_col2tab[value];\
+
+	// Test all distances
+	for (uint8 d = 0; d < 8; ++d) 
+	{
+		block_error = 0;	
+		precalc_col1tab = &precalc_col1[d*16];
+		precalc_col2tab = &precalc_col2[d*16];
+		// Loop block
+
+		/* unroll loop for(q = 0; q<16 && block_error < best_err_so_far; q++) */
+		CHOICE_R58H(0)
+		if( block_error < best_err_so_far )
+		{
+			CHOICE_R58H(1)
+			if( block_error < best_err_so_far )
+			{
+				CHOICE_R58H(2)
+				if( block_error < best_err_so_far )
+				{
+					CHOICE_R58H(3)
+					if( block_error < best_err_so_far )
+					{
+						CHOICE_R58H(4)
+						if( block_error < best_err_so_far )
+						{
+							CHOICE_R58H(5)
+							if( block_error < best_err_so_far )
+							{
+								CHOICE_R58H(6)
+								if( block_error < best_err_so_far )
+								{
+									CHOICE_R58H(7)
+									if( block_error < best_err_so_far )
+									{
+										CHOICE_R58H(8)
+										if( block_error < best_err_so_far )
+										{
+											CHOICE_R58H(9)
+											if( block_error < best_err_so_far )
+											{
+												CHOICE_R58H(10)
+												if( block_error < best_err_so_far )
+												{
+													CHOICE_R58H(11)
+													if( block_error < best_err_so_far )
+													{
+														CHOICE_R58H(12)
+														if( block_error < best_err_so_far )
+														{
+															CHOICE_R58H(13)
+															if( block_error < best_err_so_far )
+															{
+																CHOICE_R58H(14)
+																if( block_error < best_err_so_far )
+																{
+																	CHOICE_R58H(15)
+																}
+															}
+														}
+													}
+												}
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+		/* end unroll loop */
+
+		if (block_error < best_block_error) 
+			best_block_error = block_error;
+
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimum error for the H-mode when doing exhaustive compression.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateErrorFromPrecalcRG58Hperceptual1000(int *colorsRGB444_packed, unsigned int *precalc_errRG, unsigned int best_err_so_far) 
+{
+	unsigned int block_error = 0;
+	unsigned int best_block_error = MAXIMUM_ERROR;
+	unsigned int *precalc_col1, *precalc_col2;
+	unsigned int *precalc_col1tab, *precalc_col2tab;
+
+	precalc_col1 = &precalc_errRG[(colorsRGB444_packed[0]>>4)*8*16];
+	precalc_col2 = &precalc_errRG[(colorsRGB444_packed[1]>>4)*8*16];
+
+#define CHOICE_RG58H_PERCEP(value)\
+	if(precalc_col1tab[value] < precalc_col2tab[value])\
+		block_error += precalc_col1tab[value];\
+	else\
+		block_error += precalc_col2tab[value];\
+
+	// Test all distances
+	for (uint8 d = 0; d < 8; ++d) 
+	{
+		block_error = 0;	
+		precalc_col1tab = &precalc_col1[d*16];
+		precalc_col2tab = &precalc_col2[d*16];
+		// Loop block
+
+		/* unroll loop for(q = 0; q<16 && block_error < best_err_so_far; q++) */
+		CHOICE_RG58H_PERCEP(0)
+		if( block_error < best_err_so_far )
+		{
+			CHOICE_RG58H_PERCEP(1)
+			if( block_error < best_err_so_far )
+			{
+				CHOICE_RG58H_PERCEP(2)
+				if( block_error < best_err_so_far )
+				{
+					CHOICE_RG58H_PERCEP(3)
+					if( block_error < best_err_so_far )
+					{
+						CHOICE_RG58H_PERCEP(4)
+						if( block_error < best_err_so_far )
+						{
+							CHOICE_RG58H_PERCEP(5)
+							if( block_error < best_err_so_far )
+							{
+								CHOICE_RG58H_PERCEP(6)
+								if( block_error < best_err_so_far )
+								{
+									CHOICE_RG58H_PERCEP(7)
+									if( block_error < best_err_so_far )
+									{
+										CHOICE_RG58H_PERCEP(8)
+										if( block_error < best_err_so_far )
+										{
+											CHOICE_RG58H_PERCEP(9)
+											if( block_error < best_err_so_far )
+											{
+												CHOICE_RG58H_PERCEP(10)
+												if( block_error < best_err_so_far )
+												{
+													CHOICE_RG58H_PERCEP(11)
+													if( block_error < best_err_so_far )
+													{
+														CHOICE_RG58H_PERCEP(12)
+														if( block_error < best_err_so_far )
+														{
+															CHOICE_RG58H_PERCEP(13)
+															if( block_error < best_err_so_far )
+															{
+																CHOICE_RG58H_PERCEP(14)
+																if( block_error < best_err_so_far )
+																{
+																	CHOICE_RG58H_PERCEP(15)
+																}
+															}
+														}
+													}
+												}
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+		/* end unroll loop */
+
+		if (block_error < best_block_error) 
+			best_block_error = block_error;
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimum error for the H-mode when doing exhaustive compression.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateErrorFromPrecalcRG58H(int *colorsRGB444_packed, unsigned int *precalc_errRG, unsigned int best_err_so_far) 
+{
+	unsigned int block_error = 0;
+	unsigned int best_block_error = MAXIMUM_ERROR;
+	unsigned int *precalc_col1, *precalc_col2;
+	unsigned int *precalc_col1tab, *precalc_col2tab;
+
+	precalc_col1 = &precalc_errRG[(colorsRGB444_packed[0]>>4)*8*16];
+	precalc_col2 = &precalc_errRG[(colorsRGB444_packed[1]>>4)*8*16];
+
+#define CHOICE_RG58H(value)\
+	if(precalc_col1tab[value] < precalc_col2tab[value])\
+		block_error += precalc_col1tab[value];\
+	else\
+		block_error += precalc_col2tab[value];\
+
+	// Test all distances
+	for (uint8 d = 0; d < 8; ++d) 
+	{
+		block_error = 0;	
+		precalc_col1tab = &precalc_col1[d*16];
+		precalc_col2tab = &precalc_col2[d*16];
+		// Loop block
+
+		/* unroll loop for(q = 0; q<16 && block_error < best_err_so_far; q++) */
+		CHOICE_RG58H(0)
+		if( block_error < best_err_so_far )
+		{
+			CHOICE_RG58H(1)
+			if( block_error < best_err_so_far )
+			{
+				CHOICE_RG58H(2)
+				if( block_error < best_err_so_far )
+				{
+					CHOICE_RG58H(3)
+					if( block_error < best_err_so_far )
+					{
+						CHOICE_RG58H(4)
+						if( block_error < best_err_so_far )
+						{
+							CHOICE_RG58H(5)
+							if( block_error < best_err_so_far )
+							{
+								CHOICE_RG58H(6)
+								if( block_error < best_err_so_far )
+								{
+									CHOICE_RG58H(7)
+									if( block_error < best_err_so_far )
+									{
+										CHOICE_RG58H(8)
+										if( block_error < best_err_so_far )
+										{
+											CHOICE_RG58H(9)
+											if( block_error < best_err_so_far )
+											{
+												CHOICE_RG58H(10)
+												if( block_error < best_err_so_far )
+												{
+													CHOICE_RG58H(11)
+													if( block_error < best_err_so_far )
+													{
+														CHOICE_RG58H(12)
+														if( block_error < best_err_so_far )
+														{
+															CHOICE_RG58H(13)
+															if( block_error < best_err_so_far )
+															{
+																CHOICE_RG58H(14)
+																if( block_error < best_err_so_far )
+																{
+																	CHOICE_RG58H(15)
+																}
+															}
+														}
+													}
+												}
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+		/* end unroll loop */
+
+		if (block_error < best_block_error) 
+			best_block_error = block_error;
+	}
+	return best_block_error;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimum error for the H-mode when doing exhaustive compression.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateErrorFromPrecalc58Hperceptual1000(int *colorsRGB444_packed, unsigned int *precalc_err, unsigned int total_best_err) 
+{
+	unsigned int block_error;\
+	unsigned int *precalc_col1, *precalc_col2;\
+	unsigned int *precalc_col1tab, *precalc_col2tab;\
+
+	unsigned int error;
+
+#define FIRSTCHOICE_RGB58H_PERCEP(value)\
+	if(precalc_col1tab[value] < precalc_col2tab[value])\
+		block_error = precalc_col1tab[value];\
+	else\
+		block_error = precalc_col2tab[value];\
+
+#define CHOICE_RGB58H_PERCEP(value)\
+	if(precalc_col1tab[value] < precalc_col2tab[value])\
+		block_error += precalc_col1tab[value];\
+	else\
+		block_error += precalc_col2tab[value];\
+
+#define ONETABLE_RGB58H_PERCEP(distance)\
+		precalc_col1tab = &precalc_col1[distance*16];\
+		precalc_col2tab = &precalc_col2[distance*16];\
+		/* unroll loop for(q = 0; q<16 && block_error < total_best_err; q++) */\
+		FIRSTCHOICE_RGB58H_PERCEP(0)\
+		if( block_error < total_best_err)\
+		{\
+			CHOICE_RGB58H_PERCEP(1)\
+			if( block_error < total_best_err)\
+			{\
+				CHOICE_RGB58H_PERCEP(2)\
+				CHOICE_RGB58H_PERCEP(3)\
+				if( block_error < total_best_err)\
+				{\
+					CHOICE_RGB58H_PERCEP(4)\
+					CHOICE_RGB58H_PERCEP(5)\
+					if( block_error < total_best_err)\
+					{\
+						CHOICE_RGB58H_PERCEP(6)\
+						CHOICE_RGB58H_PERCEP(7)\
+						if( block_error < total_best_err)\
+						{\
+							CHOICE_RGB58H_PERCEP(8)\
+							CHOICE_RGB58H_PERCEP(9)\
+							if( block_error < total_best_err)\
+							{\
+								CHOICE_RGB58H_PERCEP(10)\
+								CHOICE_RGB58H_PERCEP(11)\
+								if( block_error < total_best_err)\
+								{\
+									CHOICE_RGB58H_PERCEP(12)\
+									CHOICE_RGB58H_PERCEP(13)\
+									if( block_error < total_best_err)\
+									{\
+										CHOICE_RGB58H_PERCEP(14)\
+										CHOICE_RGB58H_PERCEP(15)\
+									}\
+								}\
+							}\
+						}\
+					}\
+				}\
+			}\
+		}\
+		/* end unroll loop */\
+		if (block_error < error)\
+			error = block_error;\
+
+#define CALCULATE_ERROR_FROM_PRECALC_RGB58H_PERCEP\
+	error = MAXERR1000;\
+	precalc_col1 = &precalc_err[colorsRGB444_packed[0]*8*16];\
+	precalc_col2 = &precalc_err[colorsRGB444_packed[1]*8*16];\
+	/* Test all distances*/\
+	/* unroll loop for (uint8 d = 0; d < 8; ++d) */\
+		ONETABLE_RGB58H_PERCEP(0)\
+		ONETABLE_RGB58H_PERCEP(1)\
+		ONETABLE_RGB58H_PERCEP(2)\
+		ONETABLE_RGB58H_PERCEP(3)\
+		ONETABLE_RGB58H_PERCEP(4)\
+		ONETABLE_RGB58H_PERCEP(5)\
+		ONETABLE_RGB58H_PERCEP(6)\
+		ONETABLE_RGB58H_PERCEP(7)\
+	/* end unroll loop */\
+
+	CALCULATE_ERROR_FROM_PRECALC_RGB58H_PERCEP
+	return error;\
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Calculate a minimum error for the H-mode when doing exhaustive compression.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int calculateErrorFromPrecalc58H(int *colorsRGB444_packed, unsigned int *precalc_err, unsigned int total_best_err) 
+{
+	unsigned int block_error;\
+	unsigned int *precalc_col1, *precalc_col2;\
+	unsigned int *precalc_col1tab, *precalc_col2tab;\
+
+	unsigned int error;
+
+#define FIRSTCHOICE_RGB58H(value)\
+	if(precalc_col1tab[value] < precalc_col2tab[value])\
+		block_error = precalc_col1tab[value];\
+	else\
+		block_error = precalc_col2tab[value];\
+
+#define CHOICE_RGB58H(value)\
+	if(precalc_col1tab[value] < precalc_col2tab[value])\
+		block_error += precalc_col1tab[value];\
+	else\
+		block_error += precalc_col2tab[value];\
+
+#define ONETABLE_RGB58H(distance)\
+		precalc_col1tab = &precalc_col1[distance*16];\
+		precalc_col2tab = &precalc_col2[distance*16];\
+		/* unroll loop for(q = 0; q<16 && block_error < total_best_err; q++) */\
+		FIRSTCHOICE_RGB58H(0)\
+		if( block_error < total_best_err)\
+		{\
+			CHOICE_RGB58H(1)\
+			if( block_error < total_best_err)\
+			{\
+				CHOICE_RGB58H(2)\
+				CHOICE_RGB58H(3)\
+				if( block_error < total_best_err)\
+				{\
+					CHOICE_RGB58H(4)\
+					CHOICE_RGB58H(5)\
+					if( block_error < total_best_err)\
+					{\
+						CHOICE_RGB58H(6)\
+						CHOICE_RGB58H(7)\
+						if( block_error < total_best_err)\
+						{\
+							CHOICE_RGB58H(8)\
+							CHOICE_RGB58H(9)\
+							if( block_error < total_best_err)\
+							{\
+								CHOICE_RGB58H(10)\
+								CHOICE_RGB58H(11)\
+								if( block_error < total_best_err)\
+								{\
+									CHOICE_RGB58H(12)\
+									CHOICE_RGB58H(13)\
+									if( block_error < total_best_err)\
+									{\
+										CHOICE_RGB58H(14)\
+										CHOICE_RGB58H(15)\
+									}\
+								}\
+							}\
+						}\
+					}\
+				}\
+			}\
+		}\
+		/* end unroll loop */\
+		if (block_error < error)\
+			error = block_error;\
+
+#define CALCULATE_ERROR_FROM_PRECALC_RGB58H\
+	error = MAXIMUM_ERROR;\
+	precalc_col1 = &precalc_err[colorsRGB444_packed[0]*8*16];\
+	precalc_col2 = &precalc_err[colorsRGB444_packed[1]*8*16];\
+	/* Test all distances*/\
+	/* unroll loop for (uint8 d = 0; d < 8; ++d) */\
+		ONETABLE_RGB58H(0)\
+		ONETABLE_RGB58H(1)\
+		ONETABLE_RGB58H(2)\
+		ONETABLE_RGB58H(3)\
+		ONETABLE_RGB58H(4)\
+		ONETABLE_RGB58H(5)\
+		ONETABLE_RGB58H(6)\
+		ONETABLE_RGB58H(7)\
+	/* end unroll loop */\
+
+	CALCULATE_ERROR_FROM_PRECALC_RGB58H
+	return error;\
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// The below code should compress the block to 58 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+// The bit layout is thought to be:
+//
+//|63 62 61 60 59 58|57 56 55 54|53 52 51 50|49 48 47 46|45 44 43 42|41 40 39 38|37 36 35 34|33 32|
+//|-------empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|d2 d1|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// The distance d is three bits, d2 (MSB), d1 and d0 (LSB). d0 is not stored explicitly. 
+// Instead if the 12-bit word red0,green0,blue0 < red1,green1,blue1, d0 is assumed to be 0.
+// Else, it is assumed to be 1.
+
+// The below code should compress the block to 58 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+// The bit layout is thought to be:
+//
+//|63 62 61 60 59 58|57 56 55 54|53 52 51 50|49 48 47 46|45 44 43 42|41 40 39 38|37 36 35 34|33 32|
+//|-------empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|d2 d1|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// The distance d is three bits, d2 (MSB), d1 and d0 (LSB). d0 is not stored explicitly. 
+// Instead if the 12-bit word red0,green0,blue0 < red1,green1,blue1, d0 is assumed to be 0.
+// Else, it is assumed to be 1.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockTHUMB58HExhaustivePerceptual(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, unsigned int best_error_so_far) 
+{
+	unsigned int best_error_using_Hmode;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	unsigned int error;
+	uint8 colorsRGB444[2][3];
+	int colorsRGB444_packed[2];
+	int best_colorsRGB444_packed[2];
+	int colorRGB444_packed;
+	unsigned int pixel_indices;
+	uint8 distance;
+	unsigned int *precalc_err;		    // smallest error per color, table and pixel
+	unsigned int *precalc_err_RG;		// smallest pixel error for an entire table
+	unsigned int *precalc_err_R;		// smallest pixel error for an entire table
+	uint8 block[4*4*4];
+
+	best_error_using_Hmode = MAXERR1000;	
+
+	precalc_err = (unsigned int*) malloc(4096*8*16*sizeof(unsigned int));
+	if(!precalc_err){printf("Out of memory allocating \n");exit(1);}
+
+	precalc_err_RG = (unsigned int*) malloc(16*16*8*16*sizeof(unsigned int));
+	if(!precalc_err_RG){printf("Out of memory allocating \n");exit(1);}
+
+	precalc_err_R = (unsigned int*) malloc(16*8*16*sizeof(unsigned int));
+	if(!precalc_err_R){printf("Out of memory allocating \n");exit(1);}
+
+ 	unsigned int test1, test2;
+	best_error_using_Hmode = (unsigned int)compressBlockTHUMB58HFastestPerceptual1000(img,width, height, startx, starty, test1, test2);
+    best_colorsRGB444_packed[0] = 0;
+	best_colorsRGB444_packed[0] = GETBITSHIGH(test1, 12, 57);
+    best_colorsRGB444_packed[1] = 0;
+	best_colorsRGB444_packed[1] = GETBITSHIGH(test1, 12, 45);
+
+	if(best_error_using_Hmode < best_error_so_far)
+		best_error_so_far = best_error_using_Hmode;
+
+	int xx,yy,count = 0;
+
+	// Use 4 bytes per pixel to make it 32-word aligned.
+	for(xx = 0; xx<4; xx++)
+	{
+		for(yy=0; yy<4; yy++)
+		{
+			block[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed++)
+	{
+		colorsRGB444[0][0] = (colorRGB444_packed >> 8) & 0xf;
+		colorsRGB444[0][1] = (colorRGB444_packed >> 4) & 0xf;
+		colorsRGB444[0][2] = (colorRGB444_packed) & 0xf;
+
+		precalcError58Hperceptual1000(block, colorsRGB444, colorRGB444_packed, precalc_err);
+	}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16)
+	{
+		colorsRGB444[0][0] = (colorRGB444_packed >> 8) & 0xf;
+		colorsRGB444[0][1] = (colorRGB444_packed >> 4) & 0xf;
+		colorsRGB444[0][2] = (colorRGB444_packed) & 0xf;
+		precalcErrorRG_58Hperceptual1000(img, width, startx, starty, colorsRGB444, colorRGB444_packed, precalc_err_RG);
+	}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16*16)
+	{
+		colorsRGB444[0][0] = (colorRGB444_packed >> 8) & 0xf;
+		colorsRGB444[0][1] = (colorRGB444_packed >> 4) & 0xf;
+		colorsRGB444[0][2] = (colorRGB444_packed) & 0xf;
+		precalcErrorR_58Hperceptual1000(img, width, startx, starty, colorsRGB444, colorRGB444_packed, precalc_err_R);
+	}
+
+	int trycols = 0;
+	int allcols = 0;
+
+	for( colorsRGB444[0][0] = 0; colorsRGB444[0][0] <16; colorsRGB444[0][0]++)
+	{
+		colorsRGB444_packed[0] = colorsRGB444[0][0]*256;
+		for( colorsRGB444[1][0] = 0; colorsRGB444[1][0] <16; colorsRGB444[1][0]++)
+		{
+			colorsRGB444_packed[1] = colorsRGB444[1][0]*256;
+			if(colorsRGB444_packed[0] <= colorsRGB444_packed[1])
+			{
+				error = calculateErrorFromPrecalcR58Hperceptual1000(colorsRGB444_packed, precalc_err_R, best_error_so_far);
+				if(error < best_error_so_far)
+				{
+					for( colorsRGB444[0][1] = 0; colorsRGB444[0][1] <16; colorsRGB444[0][1]++)
+					{
+						colorsRGB444_packed[0] = colorsRGB444[0][0]*256 + colorsRGB444[0][1]*16;
+						for( colorsRGB444[1][1] = 0; colorsRGB444[1][1] <16; colorsRGB444[1][1]++)
+						{
+							colorsRGB444_packed[1] = colorsRGB444[1][0]*256 + colorsRGB444[1][1]*16;
+							if(colorsRGB444_packed[0] <= colorsRGB444_packed[1])
+							{
+								error = calculateErrorFromPrecalcRG58Hperceptual1000(colorsRGB444_packed, precalc_err_RG, best_error_so_far);
+								if(error < best_error_so_far)
+								{
+									for( colorsRGB444[0][2] = 0; colorsRGB444[0][2] <16; colorsRGB444[0][2]++)
+									{
+										colorsRGB444_packed[0] = colorsRGB444[0][0]*256 + colorsRGB444[0][1]*16 + colorsRGB444[0][2];
+										for( colorsRGB444[1][2] = 0; colorsRGB444[1][2] <16; colorsRGB444[1][2]++)
+										{
+											colorsRGB444_packed[1] = colorsRGB444[1][0]*256 + colorsRGB444[1][1]*16 + colorsRGB444[1][2];
+											if(colorsRGB444_packed[0] < colorsRGB444_packed[1])
+											{
+												error = calculateErrorFromPrecalc58Hperceptual1000(colorsRGB444_packed, precalc_err, best_error_so_far);
+												if(error < best_error_so_far)
+												{
+													best_error_so_far = error;	
+													best_error_using_Hmode = error;
+													best_colorsRGB444_packed[0] = colorsRGB444_packed[0];
+													best_colorsRGB444_packed[1] = colorsRGB444_packed[1];
+												}
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	best_colorsRGB444[0][0] = (best_colorsRGB444_packed[0] >> 8) & 0xf;
+	best_colorsRGB444[0][1] = (best_colorsRGB444_packed[0] >> 4) & 0xf;
+	best_colorsRGB444[0][2] = (best_colorsRGB444_packed[0]) & 0xf;
+	best_colorsRGB444[1][0] = (best_colorsRGB444_packed[1] >> 8) & 0xf;
+	best_colorsRGB444[1][1] = (best_colorsRGB444_packed[1] >> 4) & 0xf;
+	best_colorsRGB444[1][2] = (best_colorsRGB444_packed[1]) & 0xf;
+
+	free(precalc_err);
+	free(precalc_err_RG);
+	free(precalc_err_R);
+
+	error = (unsigned int) calculateErrorAndCompress58Hperceptual1000(img, width, startx, starty, best_colorsRGB444, distance, pixel_indices);
+	best_distance = distance; 
+	best_pixel_indices = pixel_indices;
+
+	//                   | col0 >= col1      col0 < col1
+	//------------------------------------------------------
+	// (dist & 1) = 1    | no need to swap | need to swap
+	//                   |-----------------+----------------
+	// (dist & 1) = 0    | need to swap    | no need to swap
+	//
+	// This can be done with an xor test.
+
+	best_colorsRGB444_packed[0] = (best_colorsRGB444[0][R] << 8) + (best_colorsRGB444[0][G] << 4) + best_colorsRGB444[0][B];
+	best_colorsRGB444_packed[1] = (best_colorsRGB444[1][R] << 8) + (best_colorsRGB444[1][G] << 4) + best_colorsRGB444[1][B];
+	if( (best_colorsRGB444_packed[0] >= best_colorsRGB444_packed[1]) ^ ((best_distance & 1)==1) )
+	{
+		swapColors(best_colorsRGB444);
+
+		// Reshuffle pixel indices to to exchange C1 with C3, and C2 with C4
+		best_pixel_indices = (0x55555555 & best_pixel_indices) | (0xaaaaaaaa & (~best_pixel_indices));
+	}
+
+	// Put the compress params into the compression block 
+	compressed1 = 0;
+
+	PUTBITSHIGH( compressed1, best_colorsRGB444[0][R], 4, 57);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][G], 4, 53);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][B], 4, 49);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][R], 4, 45);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][G], 4, 41);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][B], 4, 37);
+	PUTBITSHIGH( compressed1, (best_distance >> 1), 2, 33);
+	best_pixel_indices=indexConversion(best_pixel_indices);
+	compressed2 = 0;
+	PUTBITS( compressed2, best_pixel_indices, 32, 31);
+
+	return best_error_using_Hmode;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// The below code should compress the block to 58 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+// The bit layout is thought to be:
+//
+//|63 62 61 60 59 58|57 56 55 54|53 52 51 50|49 48 47 46|45 44 43 42|41 40 39 38|37 36 35 34|33 32|
+//|-------empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|d2 d1|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// The distance d is three bits, d2 (MSB), d1 and d0 (LSB). d0 is not stored explicitly. 
+// Instead if the 12-bit word red0,green0,blue0 < red1,green1,blue1, d0 is assumed to be 0.
+// Else, it is assumed to be 1.
+
+// The below code should compress the block to 58 bits. 
+// This is supposed to match the first of the three modes in TWOTIMER.
+// The bit layout is thought to be:
+//
+//|63 62 61 60 59 58|57 56 55 54|53 52 51 50|49 48 47 46|45 44 43 42|41 40 39 38|37 36 35 34|33 32|
+//|-------empty-----|---red 0---|--green 0--|--blue 0---|---red 1---|--green 1--|--blue 1---|d2 d1|
+//
+//|31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00|
+//|----------------------------------------index bits---------------------------------------------|
+//
+// The distance d is three bits, d2 (MSB), d1 and d0 (LSB). d0 is not stored explicitly. 
+// Instead if the 12-bit word red0,green0,blue0 < red1,green1,blue1, d0 is assumed to be 0.
+// Else, it is assumed to be 1.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+unsigned int compressBlockTHUMB58HExhaustive(uint8 *img,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2, unsigned int best_error_so_far) 
+{
+	unsigned int best_error_using_Hmode;
+	uint8 best_colorsRGB444[2][3];
+	unsigned int best_pixel_indices;
+	uint8 best_distance;
+
+	unsigned int error;
+	uint8 colorsRGB444[2][3];
+	int colorsRGB444_packed[2];
+	int best_colorsRGB444_packed[2];
+	int colorRGB444_packed;
+	unsigned int pixel_indices;
+	uint8 distance;
+	unsigned int *precalc_err;		    // smallest error per color, table and pixel
+	unsigned int *precalc_err_RG;		// smallest pixel error for an entire table
+	unsigned int *precalc_err_R;		// smallest pixel error for an entire table
+	uint8 block[4*4*4];
+
+	best_error_using_Hmode = MAXIMUM_ERROR;	
+
+	precalc_err = (unsigned int*) malloc(4096*8*16*sizeof(unsigned int));
+	if(!precalc_err){printf("Out of memory allocating \n");exit(1);}
+
+	precalc_err_RG = (unsigned int*) malloc(16*16*8*16*sizeof(unsigned int));
+	if(!precalc_err_RG){printf("Out of memory allocating \n");exit(1);}
+
+	precalc_err_R = (unsigned int*) malloc(16*8*16*sizeof(unsigned int));
+	if(!precalc_err_R){printf("Out of memory allocating \n");exit(1);}
+
+ 	unsigned int test1, test2;
+	best_error_using_Hmode = (unsigned int)compressBlockTHUMB58HFastest(img,width, height, startx, starty, test1, test2);
+    best_colorsRGB444_packed[0] = 0;
+	best_colorsRGB444_packed[0] = GETBITSHIGH(test1, 12, 57);
+    best_colorsRGB444_packed[1] = 0;
+	best_colorsRGB444_packed[1] = GETBITSHIGH(test1, 12, 45);
+
+	if(best_error_using_Hmode < best_error_so_far)
+		best_error_so_far = best_error_using_Hmode;
+
+	int xx,yy,count = 0;
+
+	// Reshuffle pixels so that the top left 2x2 pixels arrive first, then the top right 2x2 pixels etc. Also put use 4 bytes per pixel to make it 32-word aligned.
+	for(xx = 0; xx<4; xx++)
+	{
+		for(yy=0; yy<4; yy++)
+		{
+			block[(count)*4] = img[((starty+yy)*width+(startx+xx))*3];
+			block[(count)*4+1] = img[((starty+yy)*width+(startx+xx))*3+1];
+			block[(count)*4+2] = img[((starty+yy)*width+(startx+xx))*3+2];
+			block[(count)*4+3] = 0;
+			count++;
+		}
+	}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed++)
+	{
+		colorsRGB444[0][0] = (colorRGB444_packed >> 8) & 0xf;
+		colorsRGB444[0][1] = (colorRGB444_packed >> 4) & 0xf;
+		colorsRGB444[0][2] = (colorRGB444_packed) & 0xf;
+		precalcError58H(block, colorsRGB444, colorRGB444_packed, precalc_err);
+	}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16)
+	{
+		colorsRGB444[0][0] = (colorRGB444_packed >> 8) & 0xf;
+		colorsRGB444[0][1] = (colorRGB444_packed >> 4) & 0xf;
+		colorsRGB444[0][2] = (colorRGB444_packed) & 0xf;
+		precalcErrorRG_58H(img, width, startx, starty, colorsRGB444, colorRGB444_packed, precalc_err_RG);
+	}
+
+	for( colorRGB444_packed = 0; colorRGB444_packed<16*16*16; colorRGB444_packed+=16*16)
+	{
+		colorsRGB444[0][0] = (colorRGB444_packed >> 8) & 0xf;
+		colorsRGB444[0][1] = (colorRGB444_packed >> 4) & 0xf;
+		colorsRGB444[0][2] = (colorRGB444_packed) & 0xf;
+		precalcErrorR_58H(img, width, startx, starty, colorsRGB444, colorRGB444_packed, precalc_err_R);
+	}
+
+	int trycols = 0;
+	int allcols = 0;
+
+	for( colorsRGB444[0][0] = 0; colorsRGB444[0][0] <16; colorsRGB444[0][0]++)
+	{
+		colorsRGB444_packed[0] = colorsRGB444[0][0]*256;
+		for( colorsRGB444[1][0] = 0; colorsRGB444[1][0] <16; colorsRGB444[1][0]++)
+		{
+			colorsRGB444_packed[1] = colorsRGB444[1][0]*256;
+			if(colorsRGB444_packed[0] <= colorsRGB444_packed[1])
+			{
+				error = calculateErrorFromPrecalcR58H(colorsRGB444_packed, precalc_err_R, best_error_so_far);
+				if(error < best_error_so_far)
+				{
+					for( colorsRGB444[0][1] = 0; colorsRGB444[0][1] <16; colorsRGB444[0][1]++)
+					{
+						colorsRGB444_packed[0] = colorsRGB444[0][0]*256 + colorsRGB444[0][1]*16;
+						for( colorsRGB444[1][1] = 0; colorsRGB444[1][1] <16; colorsRGB444[1][1]++)
+						{
+							colorsRGB444_packed[1] = colorsRGB444[1][0]*256 + colorsRGB444[1][1]*16;
+							if(colorsRGB444_packed[0] <= colorsRGB444_packed[1])
+							{
+								error = calculateErrorFromPrecalcRG58H(colorsRGB444_packed, precalc_err_RG, best_error_so_far);
+								if(error < best_error_so_far)
+								{
+									for( colorsRGB444[0][2] = 0; colorsRGB444[0][2] <16; colorsRGB444[0][2]++)
+									{
+										colorsRGB444_packed[0] = colorsRGB444[0][0]*256 + colorsRGB444[0][1]*16 + colorsRGB444[0][2];
+										for( colorsRGB444[1][2] = 0; colorsRGB444[1][2] <16; colorsRGB444[1][2]++)
+										{
+											colorsRGB444_packed[1] = colorsRGB444[1][0]*256 + colorsRGB444[1][1]*16 + colorsRGB444[1][2];
+											if(colorsRGB444_packed[0] < colorsRGB444_packed[1])
+											{
+												error = calculateErrorFromPrecalc58H(colorsRGB444_packed, precalc_err, best_error_so_far);
+												if(error < best_error_so_far)
+												{
+													best_error_so_far = error;	
+													best_error_using_Hmode = error;
+													best_colorsRGB444_packed[0] = colorsRGB444_packed[0];
+													best_colorsRGB444_packed[1] = colorsRGB444_packed[1];
+												}
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	best_colorsRGB444[0][0] = (best_colorsRGB444_packed[0] >> 8) & 0xf;
+	best_colorsRGB444[0][1] = (best_colorsRGB444_packed[0] >> 4) & 0xf;
+	best_colorsRGB444[0][2] = (best_colorsRGB444_packed[0]) & 0xf;
+	best_colorsRGB444[1][0] = (best_colorsRGB444_packed[1] >> 8) & 0xf;
+	best_colorsRGB444[1][1] = (best_colorsRGB444_packed[1] >> 4) & 0xf;
+	best_colorsRGB444[1][2] = (best_colorsRGB444_packed[1]) & 0xf;
+
+	free(precalc_err);
+	free(precalc_err_RG);
+	free(precalc_err_R);
+
+	error = (unsigned int) calculateErrorAndCompress58H(img, width, startx, starty, best_colorsRGB444, distance, pixel_indices);
+	best_distance = distance; 
+	best_pixel_indices = pixel_indices;
+
+	//                   | col0 >= col1      col0 < col1
+	//------------------------------------------------------
+	// (dist & 1) = 1    | no need to swap | need to swap
+	//                   |-----------------+----------------
+	// (dist & 1) = 0    | need to swap    | no need to swap
+    //
+	// This can be done with an xor test.
+
+	best_colorsRGB444_packed[0] = (best_colorsRGB444[0][R] << 8) + (best_colorsRGB444[0][G] << 4) + best_colorsRGB444[0][B];
+	best_colorsRGB444_packed[1] = (best_colorsRGB444[1][R] << 8) + (best_colorsRGB444[1][G] << 4) + best_colorsRGB444[1][B];
+	if( (best_colorsRGB444_packed[0] >= best_colorsRGB444_packed[1]) ^ ((best_distance & 1)==1) )
+	{
+		swapColors(best_colorsRGB444);
+
+		// Reshuffle pixel indices to to exchange C1 with C3, and C2 with C4
+		best_pixel_indices = (0x55555555 & best_pixel_indices) | (0xaaaaaaaa & (~best_pixel_indices));
+	}
+
+	// Put the compress params into the compression block 
+	compressed1 = 0;
+
+	PUTBITSHIGH( compressed1, best_colorsRGB444[0][R], 4, 57);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][G], 4, 53);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[0][B], 4, 49);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][R], 4, 45);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][G], 4, 41);
+ 	PUTBITSHIGH( compressed1, best_colorsRGB444[1][B], 4, 37);
+	PUTBITSHIGH( compressed1, (best_distance >> 1), 2, 33);
+	best_pixel_indices=indexConversion(best_pixel_indices);
+	compressed2 = 0;
+	PUTBITS( compressed2, best_pixel_indices, 32, 31);
+
+	return best_error_using_Hmode;
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Compress a block exhaustively for the ETC1 codec.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockETC1Exhaustive(uint8 *img, uint8 *imgdec,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int error_currently_best;
+
+	unsigned int etc1_differential_word1;
+	unsigned int etc1_differential_word2;
+	unsigned int error_etc1_differential;
+
+	unsigned int etc1_individual_word1;
+	unsigned int etc1_individual_word2;
+	unsigned int error_etc1_individual;
+	
+	unsigned int error_best;
+	signed char best_char;
+	int best_mode;
+
+	error_currently_best = 255*255*16*3;
+
+	// First pass -- quickly find a low error so that we can later cull away a lot of 
+	// calculations later that are guaranteed to be higher than that error.
+	unsigned int error_etc1;
+	unsigned int etc1_word1;
+	unsigned int etc1_word2;
+
+	error_etc1 = (unsigned int) compressBlockDiffFlipFast(img, imgdec, width, height, startx, starty, etc1_word1, etc1_word2);
+	if(error_etc1 < error_currently_best)
+		error_currently_best = error_etc1;
+
+ 	error_etc1_individual = compressBlockIndividualExhaustive(img, width, height, startx, starty, etc1_individual_word1, etc1_individual_word2, error_currently_best);
+	if(error_etc1_individual < error_currently_best)
+		error_currently_best = error_etc1_individual;
+
+  	error_etc1_differential = compressBlockDifferentialExhaustive(img, width, height, startx, starty, etc1_differential_word1, etc1_differential_word2, error_currently_best);
+	if(error_etc1_differential < error_currently_best)
+		error_currently_best = error_etc1_differential;
+
+ 	error_best = error_etc1_differential;
+ 	compressed1 = etc1_differential_word1;
+ 	compressed2 = etc1_differential_word2;
+ 	best_char = '.';
+ 	best_mode = MODE_ETC1;
+
+	if(error_etc1_individual < error_best)
+	{
+		compressed1 = etc1_individual_word1;
+		compressed2 = etc1_individual_word2;
+		best_char = ',';
+		error_best = error_etc1_individual;
+		best_mode = MODE_ETC1;
+	}
+
+	if(error_etc1 < error_best)
+	{
+		compressed1 = etc1_word1;
+		compressed2 = etc1_word2;
+		best_char = '.';
+		error_best = error_etc1;
+		best_mode = MODE_ETC1;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Compress a block exhaustively for the ETC1 codec using perceptual error measure.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockETC1ExhaustivePerceptual(uint8 *img, uint8 *imgdec,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int error_currently_best;
+	
+	unsigned int etc1_differential_word1;
+	unsigned int etc1_differential_word2;
+	unsigned int error_etc1_differential;
+
+	unsigned int etc1_individual_word1;
+	unsigned int etc1_individual_word2;
+	unsigned int error_etc1_individual;
+
+	unsigned int error_best;
+	signed char best_char;
+	int best_mode;
+
+
+	error_currently_best = 255*255*16*1000;
+
+	// First pass -- quickly find a low error so that we can later cull away a lot of 
+	// calculations later that are guaranteed to be higher than that error.
+	unsigned int error_etc1;
+	unsigned int etc1_word1;
+	unsigned int etc1_word2;
+
+	compressBlockDiffFlipFastPerceptual(img, imgdec, width, height, startx, starty, etc1_word1, etc1_word2);
+	decompressBlockDiffFlip(etc1_word1, etc1_word2, imgdec, width, height, startx, starty);
+	error_etc1 = 1000*calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+	if(error_etc1 < error_currently_best)
+		error_currently_best = error_etc1;
+
+	// Second pass --- now find the lowest error, but only if it is lower than error_currently_best
+
+	error_etc1_differential = compressBlockDifferentialExhaustivePerceptual(img, width, height, startx, starty, etc1_differential_word1, etc1_differential_word2, error_currently_best);
+ 	if(error_etc1_differential < error_currently_best)
+ 		error_currently_best = error_etc1_differential;
+
+  	error_etc1_individual = compressBlockIndividualExhaustivePerceptual(img, width, height, startx, starty, etc1_individual_word1, etc1_individual_word2, error_currently_best);
+  	if(error_etc1_individual < error_currently_best)
+ 		error_currently_best = error_etc1_individual;
+
+	// Now find the best error.
+	error_best = error_etc1;
+	compressed1 = etc1_word1;
+	compressed2 = etc1_word2;
+	best_char = '.';
+	best_mode = MODE_ETC1;
+
+	if(error_etc1_differential < error_best)
+	{
+		error_best = error_etc1_differential;
+		compressed1 = etc1_differential_word1;
+		compressed2 = etc1_differential_word2;
+		best_char = '.';
+		best_mode = MODE_ETC1;
+	}
+
+	if(error_etc1_individual < error_best)
+	{
+		compressed1 = etc1_individual_word1;
+		compressed2 = etc1_individual_word2;
+		best_char = ',';
+		error_best = error_etc1_individual;
+		best_mode = MODE_ETC1;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Compress a block exhaustively for the ETC2 RGB codec using perceptual error measure.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockETC2ExhaustivePerceptual(uint8 *img, uint8 *imgdec,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int error_currently_best;
+	
+	unsigned int etc1_differential_word1;
+	unsigned int etc1_differential_word2;
+	unsigned int error_etc1_differential;
+
+	unsigned int etc1_individual_word1;
+	unsigned int etc1_individual_word2;
+	unsigned int error_etc1_individual;
+
+ 	unsigned int planar57_word1;
+ 	unsigned int planar57_word2;
+ 	unsigned int planar_word1;
+ 	unsigned int planar_word2;
+ 	double error_planar;
+	unsigned int error_planar_red, error_planar_green, error_planar_blue;
+
+	unsigned int thumbH58_word1;
+	unsigned int thumbH58_word2;
+	unsigned int thumbH_word1;
+	unsigned int thumbH_word2;
+	unsigned int error_thumbH;
+	
+	unsigned int thumbT59_word1;
+	unsigned int thumbT59_word2;
+	unsigned int thumbT_word1;
+	unsigned int thumbT_word2;
+	unsigned int error_thumbT;
+	
+	unsigned int error_best;
+	signed char best_char;
+	int best_mode;
+
+	error_currently_best = 255*255*16*1000;
+
+	// First pass -- quickly find a low error so that we can later cull away a lot of 
+	// calculations later that are guaranteed to be higher than that error.
+	unsigned int error_etc1;
+	unsigned int etc1_word1;
+	unsigned int etc1_word2;
+
+	compressBlockDiffFlipFastPerceptual(img, imgdec, width, height, startx, starty, etc1_word1, etc1_word2);
+	decompressBlockDiffFlip(etc1_word1, etc1_word2, imgdec, width, height, startx, starty);
+	error_etc1 = 1000*calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+	if(error_etc1 < error_currently_best)
+		error_currently_best = error_etc1;
+
+	// The planar mode treats every channel independently and should not be affected by the weights in the error measure. 
+	// We can hence use the nonperceptual version of the encoder also to find the best perceptual description of the block.
+	compressBlockPlanar57(img, width, height, startx, starty, planar57_word1, planar57_word2);
+	decompressBlockPlanar57errorPerComponent(planar57_word1, planar57_word2, imgdec, width, height, startx, starty, img, error_planar_red, error_planar_green, error_planar_blue);
+ 	error_planar = 1000*calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff57bits(planar57_word1, planar57_word2, planar_word1, planar_word2);
+	if(error_planar < error_currently_best)
+		error_currently_best = (unsigned int) error_planar;
+
+	error_thumbT = (unsigned int) compressBlockTHUMB59TFastestPerceptual1000(img,width, height, startx, starty, thumbT59_word1, thumbT59_word2);
+	stuff59bits(thumbT59_word1, thumbT59_word2, thumbT_word1, thumbT_word2);
+	if(error_thumbT < error_currently_best)
+		error_currently_best = error_thumbT;
+
+	error_thumbH = (unsigned int) compressBlockTHUMB58HFastestPerceptual1000(img,width,height,startx, starty, thumbH58_word1, thumbH58_word2);
+	stuff58bits(thumbH58_word1, thumbH58_word2, thumbH_word1, thumbH_word2);
+	if(error_thumbH < error_currently_best)
+		error_currently_best = error_thumbH;
+	
+	// Second pass --- now find the lowest error, but only if it is lower than error_currently_best
+
+	// Correct the individual errors for the different planes so that they sum to 1000 instead of 1.
+	error_planar_red *=PERCEPTUAL_WEIGHT_R_SQUARED_TIMES1000;
+	error_planar_green *=PERCEPTUAL_WEIGHT_G_SQUARED_TIMES1000;
+	error_planar_blue *=PERCEPTUAL_WEIGHT_B_SQUARED_TIMES1000;
+	compressBlockPlanar57ExhaustivePerceptual(img, width, height, startx, starty, planar57_word1, planar57_word2, error_currently_best, error_planar_red, error_planar_green, error_planar_blue);	
+	decompressBlockPlanar57(planar57_word1, planar57_word2, imgdec, width, height, startx, starty);
+	error_planar = 1000*calcBlockPerceptualErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff57bits(planar57_word1, planar57_word2, planar_word1, planar_word2);
+	if(error_planar < error_currently_best)
+		error_currently_best = (unsigned int) error_planar;
+	
+	error_etc1_differential = compressBlockDifferentialExhaustivePerceptual(img, width, height, startx, starty, etc1_differential_word1, etc1_differential_word2, error_currently_best);
+	if(error_etc1_differential < error_currently_best)
+		error_currently_best = error_etc1_differential;
+
+	error_etc1_individual = compressBlockIndividualExhaustivePerceptual(img, width, height, startx, starty, etc1_individual_word1, etc1_individual_word2, error_currently_best);
+	if(error_etc1_individual < error_currently_best)
+		error_currently_best = error_etc1_individual;
+
+	error_thumbH = compressBlockTHUMB58HExhaustivePerceptual(img,width,height,startx, starty, thumbH58_word1, thumbH58_word2, error_currently_best);
+	stuff58bits(thumbH58_word1, thumbH58_word2, thumbH_word1, thumbH_word2);
+	if( error_thumbH < error_currently_best)
+		error_currently_best = error_thumbH;
+
+	error_thumbT = compressBlockTHUMB59TExhaustivePerceptual(img,width, height, startx, starty, thumbT59_word1, thumbT59_word2, error_currently_best);
+	stuff59bits(thumbT59_word1, thumbT59_word2, thumbT_word1, thumbT_word2);
+	if(error_thumbT < error_currently_best)
+		error_currently_best = error_thumbT;
+
+	// Now find the best error.
+	error_best = error_etc1;
+	compressed1 = etc1_word1;
+	compressed2 = etc1_word2;
+	best_char = '.';
+	best_mode = MODE_ETC1;
+
+	if(error_etc1_differential < error_best)
+	{
+		error_best = error_etc1_differential;
+		compressed1 = etc1_differential_word1;
+		compressed2 = etc1_differential_word2;
+		best_char = '.';
+		best_mode = MODE_ETC1;
+	}
+
+	if(error_etc1_individual < error_best)
+	{
+		compressed1 = etc1_individual_word1;
+		compressed2 = etc1_individual_word2;
+		best_char = ',';
+		error_best = error_etc1_individual;
+		best_mode = MODE_ETC1;
+	}
+	if(error_planar < error_best)
+	{
+		compressed1 = planar_word1;
+		compressed2 = planar_word2;
+		best_char = 'p';
+		error_best = (unsigned int) error_planar;
+		best_mode = MODE_PLANAR;
+ 	}
+	if(error_thumbH < error_best)
+	{
+		compressed1 = thumbH_word1;
+		compressed2 = thumbH_word2;
+		best_char = 'H';
+		error_best = error_thumbH;
+		best_mode = MODE_THUMB_H;
+	}
+	if(error_thumbT < error_best)
+	{
+		compressed1 = thumbT_word1;
+		compressed2 = thumbT_word2;
+		best_char = 'T';
+		error_best = error_thumbT;
+		best_mode = MODE_THUMB_T;
+	}
+}
+#endif
+
+#if EXHAUSTIVE_CODE_ACTIVE
+// Compress a block exhaustively for the ETC2 RGB codec.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressBlockETC2Exhaustive(uint8 *img, uint8 *imgdec,int width,int height,int startx,int starty, unsigned int &compressed1, unsigned int &compressed2)
+{
+	unsigned int error_currently_best;
+
+	unsigned int etc1_differential_word1;
+	unsigned int etc1_differential_word2;
+	unsigned int error_etc1_differential;
+
+	unsigned int etc1_individual_word1;
+	unsigned int etc1_individual_word2;
+	unsigned int error_etc1_individual;
+
+	unsigned int planar57_word1;
+	unsigned int planar57_word2;
+	unsigned int planar_word1;
+	unsigned int planar_word2;
+	double error_planar;
+	unsigned int error_planar_red;
+	unsigned int error_planar_green;
+	unsigned int error_planar_blue;
+
+	unsigned int thumbH58_word1;
+	unsigned int thumbH58_word2;
+	unsigned int thumbH_word1;
+	unsigned int thumbH_word2;
+	unsigned int error_thumbH;
+	
+	unsigned int thumbT59_word1;
+	unsigned int thumbT59_word2;
+	unsigned int thumbT_word1;
+	unsigned int thumbT_word2;
+	unsigned int error_thumbT;
+	
+	unsigned int error_best;
+	signed char best_char;
+	int best_mode;
+
+	error_currently_best = 255*255*16*3;
+
+	// First pass -- quickly find a low error so that we can later cull away a lot of 
+	// calculations later that are guaranteed to be higher than that error.
+	unsigned int error_etc1;
+	unsigned int etc1_word1;
+	unsigned int etc1_word2;
+
+	error_etc1 = (unsigned int) compressBlockDiffFlipFast(img, imgdec, width, height, startx, starty, etc1_word1, etc1_word2);
+	if(error_etc1 < error_currently_best)
+		error_currently_best = error_etc1;
+
+	compressBlockPlanar57(img, width, height, startx, starty, planar57_word1, planar57_word2);
+	decompressBlockPlanar57errorPerComponent(planar57_word1, planar57_word2, imgdec, width, height, startx, starty, img, error_planar_red, error_planar_green, error_planar_blue);
+	error_planar = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff57bits(planar57_word1, planar57_word2, planar_word1, planar_word2);
+	if(error_planar < error_currently_best)
+		error_currently_best = (unsigned int) error_planar;
+
+	error_thumbT = (unsigned int) compressBlockTHUMB59TFastest(img,width, height, startx, starty, thumbT59_word1, thumbT59_word2);
+	stuff59bits(thumbT59_word1, thumbT59_word2, thumbT_word1, thumbT_word2);
+	if(error_thumbT < error_currently_best)
+		error_currently_best = error_thumbT;
+
+	error_thumbH = (unsigned int) compressBlockTHUMB58HFastest(img,width,height,startx, starty, thumbH58_word1, thumbH58_word2);
+	stuff58bits(thumbH58_word1, thumbH58_word2, thumbH_word1, thumbH_word2);
+	if(error_thumbH < error_currently_best)
+		error_currently_best = error_thumbH;
+
+	// Second pass --- now find the lowest error, but only if it is lower than error_currently_best
+	error_etc1_differential = compressBlockDifferentialExhaustive(img, width, height, startx, starty, etc1_differential_word1, etc1_differential_word2, error_currently_best);
+	if(error_etc1_differential < error_currently_best)
+		error_currently_best = error_etc1_differential;
+
+	compressBlockPlanar57Exhaustive(img, width, height, startx, starty, planar57_word1, planar57_word2, error_currently_best, error_planar_red, error_planar_green, error_planar_blue);
+	decompressBlockPlanar57(planar57_word1, planar57_word2, imgdec, width, height, startx, starty);
+	error_planar = calcBlockErrorRGB(img, imgdec, width, height, startx, starty);
+	stuff57bits(planar57_word1, planar57_word2, planar_word1, planar_word2);
+	if(error_planar < error_currently_best)
+		error_currently_best = (unsigned int) error_planar;
+	
+	error_etc1_individual = compressBlockIndividualExhaustive(img, width, height, startx, starty, etc1_individual_word1, etc1_individual_word2, error_currently_best);
+	if(error_etc1_individual < error_currently_best)
+		error_currently_best = error_etc1_individual;
+
+	error_thumbH = compressBlockTHUMB58HExhaustive(img,width,height,startx, starty, thumbH58_word1, thumbH58_word2, error_currently_best);
+	if( error_thumbH < error_currently_best)
+		error_currently_best = error_thumbH;
+	stuff58bits(thumbH58_word1, thumbH58_word2, thumbH_word1, thumbH_word2);
+
+	error_thumbT = compressBlockTHUMB59TExhaustive(img,width, height, startx, starty, thumbT59_word1, thumbT59_word2, error_currently_best);
+	if(error_thumbT < error_currently_best)
+		error_currently_best = error_thumbT;
+	stuff59bits(thumbT59_word1, thumbT59_word2, thumbT_word1, thumbT_word2);
+
+	error_best = 255*255*3*16;
+	// Now find the best error.
+	error_best = error_etc1;
+	compressed1 = etc1_word1;
+	compressed2 = etc1_word2;
+	best_char = '.';
+	best_mode = MODE_ETC1;
+
+	if(error_etc1_differential < error_best)
+	{
+		error_best = error_etc1_differential;
+		compressed1 = etc1_differential_word1;
+		compressed2 = etc1_differential_word2;
+		best_char = '.';
+		best_mode = MODE_ETC1;
+	}
+	if(error_etc1_individual < error_best)
+	{
+		compressed1 = etc1_individual_word1;
+		compressed2 = etc1_individual_word2;
+		best_char = ',';
+		error_best = error_etc1_individual;
+		best_mode = MODE_ETC1;
+	}
+	if(error_planar < error_best)
+	{
+		compressed1 = planar_word1;
+		compressed2 = planar_word2;
+		best_char = 'p';
+		error_best = (unsigned int) error_planar;
+		best_mode = MODE_PLANAR;
+	}
+	if(error_thumbH < error_best)
+	{
+		compressed1 = thumbH_word1;
+		compressed2 = thumbH_word2;
+		best_char = 'H';
+		error_best = error_thumbH;
+		best_mode = MODE_THUMB_H;
+	}
+	if(error_thumbT < error_best)
+	{
+		compressed1 = thumbT_word1;
+		compressed2 = thumbT_word2;
+		best_char = 'T';
+		error_best = error_thumbT;
+		best_mode = MODE_THUMB_T;
+	}
+}
+#endif
+
+//// Exhaustive code ends here.
+
+
+// Compress an image file.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressImageFile(uint8 *img, uint8 *alphaimg,int width,int height,char *dstfile, int expandedwidth, int expandedheight)
+{
+	FILE *f;
+	int x,y,w,h;
+	unsigned int block1, block2;
+	unsigned short wi, hi;
+	unsigned char magic[4];
+	unsigned char version[2];
+	unsigned short texture_type=format;
+	uint8 *imgdec;
+	uint8* alphaimg2;
+	imgdec = (unsigned char*) malloc(expandedwidth*expandedheight*3);
+	if(!imgdec)
+	{
+		printf("Could not allocate decompression buffer --- exiting\n");
+	}
+
+	magic[0]   = 'P'; magic[1]   = 'K'; magic[2] = 'M'; magic[3] = ' '; 
+
+	if(codec==CODEC_ETC2)
+	{
+		version[0] = '2'; version[1] = '0';
+	}
+	else
+	{
+		version[0] = '1'; version[1] = '0';
+	}
+
+	if(f=fopen(dstfile,"wb"))
+	{
+		w=expandedwidth/4;  w*=4;
+		h=expandedheight/4; h*=4;
+		wi = w;
+		hi = h;
+		if(ktxFile) 
+		{
+			//.ktx file: KTX header followed by compressed binary data.
+			KTX_header header;
+			//identifier
+			for(int i=0; i<12; i++) 
+			{
+				header.identifier[i]=ktx_identifier[i];
+			}
+			//endianess int.. if this comes out reversed, all of the other ints will too.
+			header.endianness=KTX_ENDIAN_REF;
+			
+			//these values are always 0/1 for compressed textures.
+			header.glType=0;
+			header.glTypeSize=1;
+			header.glFormat=0;
+
+			header.pixelWidth=width;
+			header.pixelHeight=height;
+			header.pixelDepth=0;
+
+			//we only support single non-mipmapped non-cubemap textures..
+			header.numberOfArrayElements=0;
+			header.numberOfFaces=1;
+			header.numberOfMipmapLevels=1;
+
+			//and no metadata..
+			header.bytesOfKeyValueData=0;
+			
+			int halfbytes=1;
+			//header.glInternalFormat=?
+			//header.glBaseInternalFormat=?
+			if(format==ETC2PACKAGE_R_NO_MIPMAPS) 
+			{
+				header.glBaseInternalFormat=GL_R;
+				if(formatSigned)
+					header.glInternalFormat=GL_COMPRESSED_SIGNED_R11_EAC;
+				else
+					header.glInternalFormat=GL_COMPRESSED_R11_EAC;
+			}
+			else if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+			{
+				halfbytes=2;
+				header.glBaseInternalFormat=GL_RG;
+				if(formatSigned)
+					header.glInternalFormat=GL_COMPRESSED_SIGNED_RG11_EAC;
+				else
+					header.glInternalFormat=GL_COMPRESSED_RG11_EAC;
+			}
+			else if(format==ETC2PACKAGE_RGB_NO_MIPMAPS) 
+			{
+				header.glBaseInternalFormat=GL_RGB;
+				header.glInternalFormat=GL_COMPRESSED_RGB8_ETC2;
+			}
+			else if(format==ETC2PACKAGE_sRGB_NO_MIPMAPS) 
+			{
+				header.glBaseInternalFormat=GL_SRGB;
+				header.glInternalFormat=GL_COMPRESSED_SRGB8_ETC2;
+			}
+			else if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS) 
+			{
+				halfbytes=2;
+				header.glBaseInternalFormat=GL_RGBA;
+				header.glInternalFormat=GL_COMPRESSED_RGBA8_ETC2_EAC;
+			}
+			else if(format==ETC2PACKAGE_sRGBA_NO_MIPMAPS) 
+			{
+				halfbytes=2;
+				header.glBaseInternalFormat=GL_SRGB8_ALPHA8;
+				header.glInternalFormat=GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC;
+			}
+			else if(format==ETC2PACKAGE_RGBA1_NO_MIPMAPS) 
+			{
+				header.glBaseInternalFormat=GL_RGBA;
+				header.glInternalFormat=GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2;
+			}
+			else if(format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS) 
+			{
+				header.glBaseInternalFormat=GL_SRGB8_ALPHA8;
+				header.glInternalFormat=GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2;
+			}
+			else if(format==ETC1_RGB_NO_MIPMAPS) 
+			{
+				header.glBaseInternalFormat=GL_RGB;
+				header.glInternalFormat=GL_ETC1_RGB8_OES;
+			}
+			else 
+			{
+				printf("internal error: bad format!\n");
+				exit(1);
+			}
+			//write header
+			fwrite(&header,sizeof(KTX_header),1,f);
+			
+			//write size of compressed data.. which depend on the expanded size..
+			unsigned int imagesize=(w*h*halfbytes)/2;
+			fwrite(&imagesize,sizeof(int),1,f);
+		}
+		else 
+		{
+			//.pkm file, contains small header..
+
+			// Write magic number
+			fwrite(&magic[0], sizeof(unsigned char), 1, f);
+			fwrite(&magic[1], sizeof(unsigned char), 1, f);
+			fwrite(&magic[2], sizeof(unsigned char), 1, f);
+			fwrite(&magic[3], sizeof(unsigned char), 1, f);
+		
+			// Write version
+			fwrite(&version[0], sizeof(unsigned char), 1, f);
+			fwrite(&version[1], sizeof(unsigned char), 1, f);
+
+			// Write texture type
+			if(texture_type==ETC2PACKAGE_RG_NO_MIPMAPS&&formatSigned) 
+			{
+				unsigned short temp = ETC2PACKAGE_RG_SIGNED_NO_MIPMAPS;
+				write_big_endian_2byte_word(&temp,f);
+			}
+			else if(texture_type==ETC2PACKAGE_R_NO_MIPMAPS&&formatSigned) 
+			{
+				unsigned short temp = ETC2PACKAGE_R_SIGNED_NO_MIPMAPS;
+				write_big_endian_2byte_word(&temp,f);
+			}
+			else
+				write_big_endian_2byte_word(&texture_type, f);
+
+			// Write binary header: the width and height as unsigned 16-bit words
+			write_big_endian_2byte_word(&wi, f);
+			write_big_endian_2byte_word(&hi, f);
+
+			// Also write the active pixels. For instance, if we want to compress
+			// a 128 x 129 image, we have to extend it to 128 x 132 pixels.
+			// Then the wi and hi written above will be 128 and 132, but the
+			// additional information that we write below will be 128 and 129,
+			// to indicate that it is only the top 129 lines of data in the 
+			// decompressed image that will be valid data, and the rest will
+			// be just garbage. 
+
+			unsigned short activew, activeh;
+			activew = width;
+			activeh = height;
+
+			write_big_endian_2byte_word(&activew, f);
+			write_big_endian_2byte_word(&activeh, f);
+		}
+		int totblocks = expandedheight/4 * expandedwidth/4;
+		int countblocks = 0;
+		double percentageblocks=-1.0;
+		double oldpercentageblocks;
+		
+		if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+		{
+			//extract data from red and green channel into two alpha channels.
+			//note that the image will be 16-bit per channel in this case.
+			alphaimg= (unsigned char*)malloc(expandedwidth*expandedheight*2);
+			alphaimg2=(unsigned char*)malloc(expandedwidth*expandedheight*2);
+			setupAlphaTableAndValtab();
+			if(!alphaimg||!alphaimg2) 
+			{
+				printf("failed allocating space for alpha buffers!\n");
+				exit(1);
+			}
+			for(y=0;y<expandedheight;y++)
+			{
+				for(x=0;x<expandedwidth;x++)
+				{
+					alphaimg[2*(y*expandedwidth+x)]=img[6*(y*expandedwidth+x)];
+					alphaimg[2*(y*expandedwidth+x)+1]=img[6*(y*expandedwidth+x)+1];
+					alphaimg2[2*(y*expandedwidth+x)]=img[6*(y*expandedwidth+x)+2];
+					alphaimg2[2*(y*expandedwidth+x)+1]=img[6*(y*expandedwidth+x)+3];
+				}
+			}
+		}
+		for(y=0;y<expandedheight/4;y++)
+		{
+			for(x=0;x<expandedwidth/4;x++)
+			{
+				countblocks++;
+				oldpercentageblocks = percentageblocks;
+				percentageblocks = 100.0*countblocks/(1.0*totblocks);
+				//compress color channels
+				if(codec==CODEC_ETC) 
+				{
+					if(metric==METRIC_NONPERCEPTUAL) 
+					{
+						if(speed==SPEED_FAST)
+							compressBlockDiffFlipFast(img, imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);
+						else
+#if EXHAUSTIVE_CODE_ACTIVE
+							compressBlockETC1Exhaustive(img, imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);		
+#else
+							printf("Not implemented in this version\n");
+#endif
+					}
+					else 
+					{
+						if(speed==SPEED_FAST)
+							compressBlockDiffFlipFastPerceptual(img, imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);
+						else
+#if EXHAUSTIVE_CODE_ACTIVE
+							compressBlockETC1ExhaustivePerceptual(img, imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);	
+#else
+							printf("Not implemented in this version\n");
+#endif
+					}
+				}
+				else 
+				{
+					if(format==ETC2PACKAGE_R_NO_MIPMAPS||format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+					{
+						//don't compress color
+					}
+					else if(format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS) 
+					{
+						//this is only available for fast/nonperceptual
+						if(speed == SPEED_SLOW && first_time_message)
+						{
+							printf("Slow codec not implemented for RGBA1 --- using fast codec instead.\n");
+							first_time_message = false;
+						}
+						compressBlockETC2Fast(img, alphaimg,imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);
+					}
+					else if(metric==METRIC_NONPERCEPTUAL) 
+					{
+						if(speed==SPEED_FAST)
+							compressBlockETC2Fast(img, alphaimg,imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);
+						else
+#if EXHAUSTIVE_CODE_ACTIVE
+							compressBlockETC2Exhaustive(img, imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);		
+#else
+							printf("Not implemented in this version\n");
+#endif
+					}
+					else 
+					{
+						if(speed==SPEED_FAST)
+							compressBlockETC2FastPerceptual(img, imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);
+						else
+#if EXHAUSTIVE_CODE_ACTIVE
+							compressBlockETC2ExhaustivePerceptual(img, imgdec, expandedwidth, expandedheight, 4*x, 4*y, block1, block2);	
+#else
+							printf("Not implemented in this version\n");
+#endif
+					}
+				}
+				
+				//compression of alpha channel in case of 4-bit alpha. Uses 8-bit alpha channel as input, and has 8-bit precision.
+				if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS) 
+				{
+					uint8 alphadata[8];
+					if(speed==SPEED_SLOW)
+						compressBlockAlphaSlow(alphaimg,4*x,4*y,expandedwidth,expandedheight,alphadata);
+					else
+						compressBlockAlphaFast(alphaimg,4*x,4*y,expandedwidth,expandedheight,alphadata);
+					//write the 8 bytes of alphadata into f.
+					fwrite(alphadata,1,8,f);
+				}
+
+				//store compressed color channels
+				if(format!=ETC2PACKAGE_R_NO_MIPMAPS&&format!=ETC2PACKAGE_RG_NO_MIPMAPS) 
+				{
+					write_big_endian_4byte_word(&block1, f);
+					write_big_endian_4byte_word(&block2, f);
+				}
+
+				//1-channel or 2-channel alpha compression: uses 16-bit data as input, and has 11-bit precision
+				if(format==ETC2PACKAGE_R_NO_MIPMAPS||format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+				{ 
+					uint8 alphadata[8];
+					compressBlockAlpha16(alphaimg,4*x,4*y,expandedwidth,expandedheight,alphadata);
+					fwrite(alphadata,1,8,f);
+				}
+				//compression of second alpha channel in RG-compression
+				if(format==ETC2PACKAGE_RG_NO_MIPMAPS) 
+				{
+					uint8 alphadata[8];
+					compressBlockAlpha16(alphaimg2,4*x,4*y,expandedwidth,expandedheight,alphadata);
+					fwrite(alphadata,1,8,f);
+				}
+#if 1
+				if(verbose)
+				{
+					if(speed==SPEED_FAST) 
+					{
+						if( ((int)(percentageblocks) != (int)(oldpercentageblocks) ) || percentageblocks == 100.0)
+							printf("Compressed %d of %d blocks, %.0f%% finished.\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", countblocks, totblocks, 100.0*countblocks/(1.0*totblocks));
+					}
+					else
+						printf("Compressed %d of %d blocks, %.0f%% finished.\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", countblocks, totblocks, 100.0*countblocks/(1.0*totblocks));
+				}
+#endif
+			}
+		}
+		printf("\n");
+		fclose(f);
+		printf("Saved file <%s>.\n",dstfile);
+	}
+}
+
+#if 0
+
+// Compress an file.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void compressFile(char *srcfile,char *dstfile)
+{
+	uint8 *srcimg;
+	int width,height;
+	int extendedwidth, extendedheight;
+	struct _timeb tstruct;
+	int tstart;
+	int tstop;
+	// 0: compress from .any to .pkm with SPEED_FAST, METRIC_NONPERCEPTUAL, ETC 
+	// 1: compress from .any to .pkm with SPEED_MEDIUM, METRIC_NONPERCEPTUAL, ETC
+	// 2: compress from .any to .pkm with SPEED_SLOW, METRIC_NONPERCEPTUAL, ETC
+	// 3: compress from .any to .pkm with SPEED_FAST, METRIC_PERCEPTUAL, ETC
+	// 4: compress from .any to .pkm with SPEED_MEDIUM, METRIC_PERCEPTUAL, ETC
+	// 5: compress from .any to .pkm with SPEED_SLOW, METRIC_PERCEPTUAL, ETC
+	// 6: decompress from .pkm to .any
+	// 7: calculate PSNR between .any and .any
+	// 8: compress from .any to .pkm with SPEED_FAST, METRIC_NONPERCEPTUAL, ETC2 
+	// 9: compress from .any to .pkm with SPEED_MEDIUM, METRIC_NONPERCEPTUAL, ETC2
+	//10: compress from .any to .pkm with SPEED_SLOW, METRIC_NONPERCEPTUAL, ETC2
+	//11: compress from .any to .pkm with SPEED_FAST, METRIC_PERCEPTUAL, ETC2
+	//12: compress from .any to .pkm with SPEED_MEDIUM, METRIC_PERCEPTUAL, ETC2
+	//13: compress from .any to .pkm with SPEED_SLOW, METRIC_PERCEPTUAL, ETC2
+
+	printf("\n");
+	if(codec==CODEC_ETC)
+		printf("ETC codec, ");
+	else
+		printf("ETC2 codec, ");
+	if(speed==SPEED_FAST)
+		printf("using FAST compression mode and ");
+	else if(speed==SPEED_MEDIUM)
+		printf("using MEDIUM compression mode and ");
+	else
+		printf("using SLOW compression mode and ");
+	if(metric==METRIC_PERCEPTUAL)
+		printf("PERCEPTUAL error metric, ");
+	else
+		printf("NONPERCEPTUAL error metric, ");
+	if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS)
+		printf("in RGBA format");
+	else if(format==ETC2PACKAGE_sRGBA_NO_MIPMAPS)
+		printf("in sRGBA format");
+	else if(format==ETC2PACKAGE_RGBA1_NO_MIPMAPS)
+		printf("in RGB + punch-through alpha format");
+	else if(format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS)
+		printf("in sRGB + punch-through alpha format");
+	else if(format==ETC2PACKAGE_R_NO_MIPMAPS)
+		printf("in R format");
+	else if(format==ETC2PACKAGE_RGB_NO_MIPMAPS||format==ETC1_RGB_NO_MIPMAPS)
+		printf("in RGB format");
+	else if(format==ETC2PACKAGE_RG_NO_MIPMAPS)
+		printf("in RG format");
+	else
+		printf("in OTHER format");
+	printf("\n");
+	if(readCompressParams())
+	{
+		if(format==ETC2PACKAGE_R_NO_MIPMAPS||readSrcFile(srcfile,srcimg,width,height,extendedwidth, extendedheight))
+		{
+			//make sure that alphasrcimg contains the alpha channel or is null here, and pass it to compressimagefile
+			uint8* alphaimg=NULL;
+			if(format==ETC2PACKAGE_RGBA_NO_MIPMAPS||format==ETC2PACKAGE_RGBA1_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA_NO_MIPMAPS||format==ETC2PACKAGE_sRGBA1_NO_MIPMAPS) 
+			{
+				char str[300];
+				//printf("reading alpha channel....");
+				sprintf(str,"imconv %s -alpha extract alpha.pgm\n",srcfile);
+				system(str);
+				readAlpha(alphaimg,width,height,extendedwidth,extendedheight);
+				printf("ok!\n");
+				setupAlphaTableAndValtab();
+			}
+			else if(format==ETC2PACKAGE_R_NO_MIPMAPS) 
+			{
+				char str[300];
+				sprintf(str,"imconv %s alpha.pgm\n",srcfile);
+				system(str);
+				readAlpha(alphaimg,width,height,extendedwidth,extendedheight);
+				printf("read alpha ok, size is %d,%d (%d,%d)",width,height,extendedwidth,extendedheight);
+				setupAlphaTableAndValtab();
+			}
+			printf("Compressing...\n");
+
+			tstart=time(NULL);
+			_ftime( &tstruct );
+			tstart=tstart*1000+tstruct.millitm;
+			compressImageFile(srcimg,alphaimg,width,height,dstfile,extendedwidth, extendedheight);			
+			tstop = time(NULL);
+			_ftime( &tstruct );
+			tstop = tstop*1000+tstruct.millitm;
+			printf( "It took %u milliseconds to compress:\n", tstop - tstart);
+			calculatePSNRfile(dstfile,srcimg,alphaimg);
+		}
+	}
+}
+
+
+// Calculates the PSNR between two files.
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+double calculatePSNRTwoFiles(char *srcfile1,char *srcfile2)
+{
+	uint8 *srcimg1;
+	uint8 *srcimg2;
+	int width1, height1;
+	int width2, height2;
+	double PSNR;
+	double perceptually_weighted_PSNR;
+
+	if(readSrcFileNoExpand(srcfile1,srcimg1,width1,height1))
+	{
+		if(readSrcFileNoExpand(srcfile2,srcimg2,width2,height2))
+		{
+			if((width1 == width2) && (height1 == height2))
+			{
+				PSNR = calculatePSNR(srcimg1, srcimg2, width1, height1);
+				printf("%f\n",PSNR);
+				perceptually_weighted_PSNR = calculateWeightedPSNR(srcimg1, srcimg2, width1, height1, 0.299, 0.587, 0.114);
+			}
+			else
+			{
+				printf("\n Width and height do no not match for image: width, height = (%d, %d) and (%d, %d)\n",width1,height1, width2, height2);
+			}
+		}
+		else
+		{
+			printf("Couldn't open file %s.\n",srcfile2);
+		}
+	}
+	else
+	{
+		printf("Couldn't open file %s.\n",srcfile1);
+	}
+
+	return PSNR;
+}
+
+
+// Main function
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int main(int argc,char *argv[])
+{
+	if(argc==3 || argc==4 || argc == 5 || argc == 7 || argc == 9 || argc == 11 || argc == 13)
+	{
+		// The source file is always the second last one. 
+		char srcfile[200];
+		char dstfile[200];
+		readArguments(argc,argv,srcfile,dstfile);
+		
+		int q = find_pos_of_extension(srcfile);
+		int q2 = find_pos_of_extension(dstfile);
+		
+		if(!fileExist(srcfile))
+		{
+			printf("Error: file <%s> does not exist.\n",srcfile);
+			exit(0);
+		}
+		
+		if(mode==MODE_UNCOMPRESS)
+		{
+			printf("Decompressing .pkm/.ktx file ...\n");
+			uint8* alphaimg=NULL, *img;
+			int w, h;
+			uncompressFile(srcfile,img,alphaimg,w,h);
+			writeOutputFile(dstfile,img,alphaimg,w,h);
+		}
+		else if(mode==MODE_PSNR)
+		{
+			calculatePSNRTwoFiles(srcfile,dstfile);
+		}
+		else
+		{
+			compressFile(srcfile, dstfile);
+		}
+	}
+	else
+	{
+		printf("ETCPACK v2.74 For ETC and ETC2\n");
+		printf("Compresses and decompresses images using the Ericsson Texture Compression (ETC) version 1.0 and 2.0.\n\nUsage: etcpack srcfile dstfile\n\n");
+		printf("      -s {fast|slow}                     Compression speed. Slow = exhaustive \n");
+		printf("                                         search for optimal quality\n");
+		printf("                                         (default: fast)\n");
+		printf("      -e {perceptual|nonperceptual}      Error metric: Perceptual (nicest) or \n");
+		printf("                                         nonperceptual (highest PSNR)\n");
+		printf("                                         (default: perceptual)\n");
+		printf("      -c {etc1|etc2}                     Codec: etc1 (most compatible) or \n");
+		printf("                                         etc2 (highest quality)\n");
+		printf("                                         (default: etc2)\n");
+		printf("      -f {R|R_signed|RG|RG_signed|       Format: one, two, three or four \n");
+		printf("          RGB|RGBA1|RGBA8|               channels, and 1 or 8 bits for alpha\n");
+        printf("          sRGB|sRGBA1|sRGBA8|}           RGB or sRGB.\n");
+		printf("                                         (1 equals punchthrough)\n");
+		printf("                                         (default: RGB)\n");
+		printf("      -v {on|off}                        Detailed progress info. (default on)\n");
+		printf("                                                            \n");
+		printf("Examples: \n");
+		printf("  etcpack img.ppm img.pkm                Compresses img.ppm to img.pkm in\n"); 
+		printf("                                         ETC2 RGB format\n");
+		printf("  etcpack img.ppm img.ktx                Compresses img.ppm to img.ktx in\n"); 
+		printf("                                         ETC2 RGB format\n");
+		printf("  etcpack img.pkm img_copy.ppm           Decompresses img.pkm to img_copy.ppm\n");
+		printf("  etcpack -s slow img.ppm img.pkm        Compress using the slow mode.\n");
+		printf("  etcpack -p orig.ppm copy.ppm           Calculate PSNR between orig and copy\n");
+		printf("  etcpack -f RGBA8 img.tga img.pkm       Compresses img.tga to img.pkm, using \n");
+		printf("                                         etc2 + alpha.\n");
+		printf("  etcpack -f RG img.ppm img.pkm          Compresses red and green channels of\n");
+		printf("                                         img.ppm\n");
+	}
+ 	return 0;
+}
+
+#endif // 0
diff --git a/extern/etcpack/image.cxx b/extern/etcpack/image.cxx
new file mode 100755
index 0000000..8ab8c66
--- /dev/null
+++ b/extern/etcpack/image.cxx
@@ -0,0 +1,461 @@
+//// etcpack v2.74
+//// 
+//// NO WARRANTY 
+//// 
+//// BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE THE PROGRAM IS PROVIDED
+//// "AS IS". ERICSSON MAKES NO REPRESENTATIONS OF ANY KIND, EXTENDS NO
+//// WARRANTIES OR CONDITIONS OF ANY KIND; EITHER EXPRESS, IMPLIED OR
+//// STATUTORY; INCLUDING, BUT NOT LIMITED TO, EXPRESS, IMPLIED OR
+//// STATUTORY WARRANTIES OR CONDITIONS OF TITLE, MERCHANTABILITY,
+//// SATISFACTORY QUALITY, SUITABILITY AND FITNESS FOR A PARTICULAR
+//// PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+//// PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+//// THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. ERICSSON
+//// MAKES NO WARRANTY THAT THE MANUFACTURE, SALE, OFFERING FOR SALE,
+//// DISTRIBUTION, LEASE, USE OR IMPORTATION UNDER THE LICENSE WILL BE FREE
+//// FROM INFRINGEMENT OF PATENTS, COPYRIGHTS OR OTHER INTELLECTUAL
+//// PROPERTY RIGHTS OF OTHERS, AND THE VALIDITY OF THE LICENSE IS SUBJECT
+//// TO YOUR SOLE RESPONSIBILITY TO MAKE SUCH DETERMINATION AND ACQUIRE
+//// SUCH LICENSES AS MAY BE NECESSARY WITH RESPECT TO PATENTS, COPYRIGHT
+//// AND OTHER INTELLECTUAL PROPERTY OF THIRD PARTIES.
+//// 
+//// FOR THE AVOIDANCE OF DOUBT THE PROGRAM (I) IS NOT LICENSED FOR; (II)
+//// IS NOT DESIGNED FOR OR INTENDED FOR; AND (III) MAY NOT BE USED FOR;
+//// ANY MISSION CRITICAL APPLICATIONS SUCH AS, BUT NOT LIMITED TO
+//// OPERATION OF NUCLEAR OR HEALTHCARE COMPUTER SYSTEMS AND/OR NETWORKS,
+//// AIRCRAFT OR TRAIN CONTROL AND/OR COMMUNICATION SYSTEMS OR ANY OTHER
+//// COMPUTER SYSTEMS AND/OR NETWORKS OR CONTROL AND/OR COMMUNICATION
+//// SYSTEMS ALL IN WHICH CASE THE FAILURE OF THE PROGRAM COULD LEAD TO
+//// DEATH, PERSONAL INJURY, OR SEVERE PHYSICAL, MATERIAL OR ENVIRONMENTAL
+//// DAMAGE. YOUR RIGHTS UNDER THIS LICENSE WILL TERMINATE AUTOMATICALLY
+//// AND IMMEDIATELY WITHOUT NOTICE IF YOU FAIL TO COMPLY WITH THIS
+//// PARAGRAPH.
+//// 
+//// IN NO EVENT WILL ERICSSON, BE LIABLE FOR ANY DAMAGES WHATSOEVER,
+//// INCLUDING BUT NOT LIMITED TO PERSONAL INJURY, ANY GENERAL, SPECIAL,
+//// INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR IN
+//// CONNECTION WITH THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
+//// NOT LIMITED TO LOSS OF PROFITS, BUSINESS INTERUPTIONS, OR ANY OTHER
+//// COMMERCIAL DAMAGES OR LOSSES, LOSS OF DATA OR DATA BEING RENDERED
+//// INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+//// THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) REGARDLESS OF THE
+//// THEORY OF LIABILITY (CONTRACT, TORT OR OTHERWISE), EVEN IF SUCH HOLDER
+//// OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+//// 
+//// (C) Ericsson AB 2005-2013. All Rights Reserved.
+//// 
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "image.h"
+
+// Remove warnings for unsafe functions such as strcpy
+#pragma warning(disable : 4996)
+// Remove warnings for conversions between different time variables
+#pragma warning(disable : 4244)
+
+// Removes comments in a .ppm file
+// (i.e., lines starting with #)
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void removeComments(FILE *f1)
+{
+	int c;
+
+	while((c = getc(f1)) == '#')
+	{
+		char line[1024];
+		fgets(line, 1024, f1);
+	}
+	ungetc(c, f1);
+}
+
+
+// Removes white spaces in a .ppm file
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+void removeSpaces(FILE *f1)
+{
+	int c;
+
+	c = getc(f1);
+	while(c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r')
+	{
+		c = getc(f1);
+	}
+	ungetc(c, f1);
+}
+
+// fReadPPM
+//
+// reads a ppm file with P6 header (meaning binary, as opposed to P5, which is ascII)
+// and returns the image in pixels.
+//
+// The header must look like this:
+// 
+// P6
+// # Comments (not necessary)
+// width height
+// 255
+//
+// after that follows RGBRGBRGB...
+// 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool fReadPPM(char *filename, int &width, int &height, unsigned char *&pixels, int targetbitrate)
+{
+	FILE *f1;
+	int maximum;
+	f1 = fopen(filename, "rb");
+
+	if(f1)
+	{
+		char line[255];
+
+		removeSpaces(f1);
+		removeComments(f1);
+		removeSpaces(f1);
+
+		fscanf(f1, "%s", line);
+
+		if(strcmp(line, "P6")!=0)
+		{
+			printf("Error: %s is not binary\n");
+			printf("(Binary .ppm files start with P6).\n");
+			fclose(f1);
+			return false;
+		}
+		removeSpaces(f1);
+		removeComments(f1);
+		removeSpaces(f1);
+		
+		fscanf(f1, "%d %d", &width, &height);
+		if( width<=0 || height <=0)
+		{
+			printf("Error: width or height negative. File: %s\n",filename);
+			fclose(f1);
+			return false;
+		}
+
+		removeSpaces(f1);
+		removeComments(f1);
+		removeSpaces(f1);
+
+		fscanf(f1, "%d", &maximum);
+		if( maximum!= 255&&maximum!=(1<<16)-1)
+		{
+			printf("Error: Color resolution must be 255. File: %s\n",filename);
+			fclose(f1);
+			return false;
+		}
+
+		//printf("maximum is %d\n",maximum);
+		int bitrate=8;
+		if(maximum!=255)
+			bitrate=16;
+
+		// We need to remove the newline.
+		char c = 0;
+		while(c != '\n')
+			fscanf(f1, "%c", &c);
+		
+		unsigned char* readbuffer = (unsigned char*) malloc(3*width*height*bitrate/8);
+		if(!readbuffer)
+		{
+			printf("Error: Could not allocate memory for image. File: %s\n", filename);
+			fclose(f1);
+			return false;
+		}
+
+		if(fread(readbuffer, 3*width*height*bitrate/8, 1, f1) != 1)
+		{
+			printf("Error: Could not read all pixels. File: %s\n", filename);
+			free(pixels);
+			fclose(f1);
+			return false;
+		}
+
+		// If we have reached this point, we have successfully loaded the image.
+
+		//now, convert it to the target bitrate
+		if(targetbitrate==bitrate)
+			pixels=readbuffer;
+		else 
+		{
+			pixels = (unsigned char*) malloc(3*width*height*targetbitrate/8);
+			if(targetbitrate<bitrate) 
+			{
+				//cut least significant bits to go from 16 -> 8 bits..
+				printf("converting 16 bit input to 8 bits\n");
+				for(int x=0; x<width; x++) 
+				{
+					for(int y=0; y<height; y++) 
+					{
+						for(int c=0; c<3; c++) 
+						{
+							pixels[3*(x+y*width)+c]=readbuffer[6*(x+y*width)+2*c];
+						}
+					}
+				}
+			}
+			else 
+			{
+				//replicate 8 bits to go from 8 -> 16 bits...
+				printf("converting 8 bit input to 16 bits\n");
+				for(int x=0; x<width; x++) 
+				{
+					for(int y=0; y<height; y++) 
+					{
+						for(int c=0; c<3; c++) 
+						{
+							pixels[6*(x+y*width)+2*c]=readbuffer[3*(x+y*width)+c];
+							pixels[6*(x+y*width)+2*c+1]=readbuffer[3*(x+y*width)+c];
+						}
+					}
+				}
+			}
+			free(readbuffer);
+		}
+		fclose(f1);
+		return true;
+	}
+	else
+	{
+		printf("Error: Coult not open file %s\n", filename);
+		return false;
+	}
+}
+
+// Write PPM 
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool fWritePPM(char *filename, int width, int height, unsigned char *pixels, int bitrate, bool reverse_y)
+{
+	FILE *fsave;
+	fsave = fopen(filename, "wb");
+	int max = (1<<bitrate)-1;
+	int fac = bitrate/8;
+	if(fsave)
+	{
+		int q;
+		fprintf(fsave, "P6\n%d %d\n%d\n", width, height,max);
+		for(q = 0; q< height; q++)
+		{
+			unsigned char *adr;
+			if(reverse_y) 
+				adr = pixels+3*width*(height-1-q)*fac;
+			else
+				adr = pixels+3*width*q*fac;
+			fwrite(adr, 3*width*fac, 1, fsave);
+		}
+		fclose(fsave);
+		return true;
+	}
+	else
+	{
+		printf("Error: Could not open the file %s.\n",filename);
+		return(false);
+	}
+}
+
+// WritePGM
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+bool fWritePGM(char *filename, int width, int height, unsigned char *pixels,bool reverse_y, int bitdepth)
+{
+   FILE *f;
+   f=fopen(filename,"wb");
+   if(f)
+   {
+      int q;
+	  int max = (1<<bitdepth)-1;
+      fprintf(f,"P5\n%d %d\n%d\n",width,height,max);
+	  if(bitdepth==16)
+		  width*=2; //ugly way of doubling the number of bytes to write, since we write one line at a time..
+      for(q=0;q<height;q++)
+      {
+		 unsigned char *adr;	 
+		 if(reverse_y) adr=pixels+width*(height-1-q);
+		 else adr=pixels+width*q;
+		 fwrite(adr,width,1,f);
+      }
+      fclose(f);
+      return true;
+   }
+   else
+   {
+      printf("Error: could not open file <%s>.\n",filename);
+      return false;
+   }
+}
+
+/* reads a ppm file with the P6 header (means raw RGB), puts data into pixel pointer and returns bit depth (8 or 16 bpp) */
+/* the header looks like this:
+ *---------
+ * P5
+ * # comments if you want to
+ * width height
+ * 255
+ *---------
+ * then follows RGBRGBRGBRGBRGB...
+ */
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2005-2013. All Rights Reserved.
+int fReadPGM(char *filename, int &width, int &height, unsigned char *&pixels, int wantedBitDepth)
+{
+	FILE *f;
+	int colres;
+	int bitdepth=8;
+	f=fopen(filename,"rb");
+	if(f)
+	{
+		char str[100];
+		removeSpaces(f);
+		removeComments(f);
+		removeSpaces(f);
+		fscanf(f,"%s",str);
+		if(strcmp(str,"P5")!=0)
+		{
+			printf("Error: the alpha image file must be of raw color PGM format,\n");
+			printf("i.e., it must have P5 in the header. File: %s\n",filename);
+			fclose(f);
+			return 0;
+		}
+		removeSpaces(f);
+		removeComments(f);
+		removeSpaces(f);
+		fscanf(f,"%d %d",&width,&height);
+		if(width<=0 || height<=0)
+		{
+			printf("Error: width and height of the image must be greater than zero. File: %s\n",filename);
+			fclose(f);
+			return 0;
+		}
+		removeSpaces(f);
+		removeComments(f);
+		removeSpaces(f);
+		fscanf(f,"%d",&colres);
+		if(colres!=255&&colres!=65535)
+		{
+			printf("Error: color resolution must be 255 or 65535.File: %s\n",filename);
+			fclose(f);
+			return 0;
+		}
+		if(colres==65535)
+			bitdepth=16;
+
+		/* gotta eat the newline too */
+		char ch=0;
+		while(ch!='\n') fscanf(f,"%c",&ch);
+
+		pixels=(unsigned char*)malloc(width*height*bitdepth/8);
+		if(!pixels)
+		{
+			printf("Error: could not allocate memory for the pixels of the texture. File: %s\n",filename);
+			fclose(f);
+			return 0;	 
+		}
+      
+		if(fread(pixels,width*height*bitdepth/8,1,f)!=1)
+		{
+			printf("Error: could not read %d bytes of pixel info. File: %s\n",width*height*bitdepth/8,filename);
+			free(pixels);
+			fclose(f);
+			return 0;
+		}
+		fclose(f);
+		printf("read %d-bit alpha channel",bitdepth);
+		if(bitdepth!=wantedBitDepth) 
+		{
+			printf(", converting to %d-bit!",wantedBitDepth);
+			unsigned char* newpixels = (unsigned char*)malloc(width*height*wantedBitDepth/8);
+			for(int x=0; x<width; x++) 
+			{
+				for(int y=0; y<height; y++) 
+				{
+					if(bitdepth<wantedBitDepth) 
+					{
+						//do bit-replication to get 2-bytes per pixel
+						newpixels[2*(x+y*width)]=pixels[x+y*width];
+						newpixels[2*(x+y*width)+1]=pixels[x+y*width];
+					}
+					else 
+					{
+						//simply truncate the extra data..
+						newpixels[(x+y*width)]=pixels[2*(x+y*width)];
+					}
+				}
+			}
+			free(pixels);
+			pixels=newpixels;
+		}
+		printf("\n");
+		return bitdepth;
+	}
+	else
+	{
+		printf("Error: could not open %s.\n",filename);
+		return 0;
+	}   
+}
+/* writes a .tga file from two arrays --- one RGB array and one alpha-array */
+/* */
+// NO WARRANTY --- SEE STATEMENT IN TOP OF FILE (C) Ericsson AB 2012. All Rights Reserved.
+bool fWriteTGAfromRGBandA(char *filename, int width, int height, unsigned char *pixelsRGB, unsigned char *pixelsA, bool reverse_y)
+{
+	FILE *f1;
+
+	if( (f1 = fopen(filename,"wb")) == NULL)
+	{
+		return false;
+	}
+
+	// First write header
+    unsigned char myByteVal;
+    short myShortVal;
+    myByteVal = 0;
+    fwrite(&myByteVal, 1, 1, f1); // ID field (0)
+    fwrite(&myByteVal, 1, 1, f1); // Palette? (no=0)
+    myByteVal = 2; 
+    fwrite(&myByteVal, 1, 1, f1); // Image type (rgb=2)
+    myShortVal = 0;
+    fwrite(&myShortVal, 2, 1, f1); // Palette stuff... 0
+    fwrite(&myShortVal, 2, 1, f1); // Palette stuff... 0
+    myByteVal = 0;
+    fwrite(&myByteVal, 1, 1, f1); // Palette stuff... 0
+    myShortVal = 0;
+    fwrite(&myShortVal, 2, 1, f1); // x-origin
+    myShortVal = 0;
+    fwrite(&myShortVal, 2, 1, f1); // y-origin
+    myShortVal = width;
+    fwrite(&myShortVal, 2, 1, f1); // width
+    myShortVal = height;
+    fwrite(&myShortVal, 2, 1, f1); // height
+    myByteVal = 32;
+    fwrite(&myByteVal, 1, 1, f1); // Bits per pixel = 32
+    myByteVal = 8;
+    fwrite(&myByteVal, 1, 1, f1); // flip bits = 8
+   
+	// Write pixels in BGRA format
+	if(reverse_y)
+	{
+		int xx, yy;
+		for(yy = height-1; yy>=0; yy--)
+		{
+			for(xx = 0; xx<width; xx++)
+			{
+				fwrite(&pixelsRGB[3*(yy*width+xx)+2], sizeof(unsigned char), 1, f1); // B
+				fwrite(&pixelsRGB[3*(yy*width+xx)+1], sizeof(unsigned char), 1, f1); // G
+				fwrite(&pixelsRGB[3*(yy*width+xx)+0], sizeof(unsigned char), 1, f1); // R
+				fwrite(&pixelsA[(yy*width+xx)], sizeof(unsigned char), 1, f1); // A
+			}
+		}
+	}
+	else
+	{
+		for(int q = 0; q< width * height; q++)
+		{
+			fwrite(&pixelsRGB[3*q+2], sizeof(unsigned char), 1, f1); // B
+			fwrite(&pixelsRGB[3*q+1], sizeof(unsigned char), 1, f1); // G
+			fwrite(&pixelsRGB[3*q+0], sizeof(unsigned char), 1, f1); // R
+			fwrite(&pixelsA[1*q], sizeof(unsigned char), 1, f1); // A
+		}
+	}
+	fclose(f1);
+	return true;
+}
diff --git a/extern/etcpack/image.h b/extern/etcpack/image.h
new file mode 100755
index 0000000..f76c1f0
--- /dev/null
+++ b/extern/etcpack/image.h
@@ -0,0 +1,65 @@
+//// etcpack v2.74
+//// 
+//// NO WARRANTY 
+//// 
+//// BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE THE PROGRAM IS PROVIDED
+//// "AS IS". ERICSSON MAKES NO REPRESENTATIONS OF ANY KIND, EXTENDS NO
+//// WARRANTIES OR CONDITIONS OF ANY KIND; EITHER EXPRESS, IMPLIED OR
+//// STATUTORY; INCLUDING, BUT NOT LIMITED TO, EXPRESS, IMPLIED OR
+//// STATUTORY WARRANTIES OR CONDITIONS OF TITLE, MERCHANTABILITY,
+//// SATISFACTORY QUALITY, SUITABILITY AND FITNESS FOR A PARTICULAR
+//// PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+//// PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+//// THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. ERICSSON
+//// MAKES NO WARRANTY THAT THE MANUFACTURE, SALE, OFFERING FOR SALE,
+//// DISTRIBUTION, LEASE, USE OR IMPORTATION UNDER THE LICENSE WILL BE FREE
+//// FROM INFRINGEMENT OF PATENTS, COPYRIGHTS OR OTHER INTELLECTUAL
+//// PROPERTY RIGHTS OF OTHERS, AND THE VALIDITY OF THE LICENSE IS SUBJECT
+//// TO YOUR SOLE RESPONSIBILITY TO MAKE SUCH DETERMINATION AND ACQUIRE
+//// SUCH LICENSES AS MAY BE NECESSARY WITH RESPECT TO PATENTS, COPYRIGHT
+//// AND OTHER INTELLECTUAL PROPERTY OF THIRD PARTIES.
+//// 
+//// FOR THE AVOIDANCE OF DOUBT THE PROGRAM (I) IS NOT LICENSED FOR; (II)
+//// IS NOT DESIGNED FOR OR INTENDED FOR; AND (III) MAY NOT BE USED FOR;
+//// ANY MISSION CRITICAL APPLICATIONS SUCH AS, BUT NOT LIMITED TO
+//// OPERATION OF NUCLEAR OR HEALTHCARE COMPUTER SYSTEMS AND/OR NETWORKS,
+//// AIRCRAFT OR TRAIN CONTROL AND/OR COMMUNICATION SYSTEMS OR ANY OTHER
+//// COMPUTER SYSTEMS AND/OR NETWORKS OR CONTROL AND/OR COMMUNICATION
+//// SYSTEMS ALL IN WHICH CASE THE FAILURE OF THE PROGRAM COULD LEAD TO
+//// DEATH, PERSONAL INJURY, OR SEVERE PHYSICAL, MATERIAL OR ENVIRONMENTAL
+//// DAMAGE. YOUR RIGHTS UNDER THIS LICENSE WILL TERMINATE AUTOMATICALLY
+//// AND IMMEDIATELY WITHOUT NOTICE IF YOU FAIL TO COMPLY WITH THIS
+//// PARAGRAPH.
+//// 
+//// IN NO EVENT WILL ERICSSON, BE LIABLE FOR ANY DAMAGES WHATSOEVER,
+//// INCLUDING BUT NOT LIMITED TO PERSONAL INJURY, ANY GENERAL, SPECIAL,
+//// INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR IN
+//// CONNECTION WITH THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
+//// NOT LIMITED TO LOSS OF PROFITS, BUSINESS INTERUPTIONS, OR ANY OTHER
+//// COMMERCIAL DAMAGES OR LOSSES, LOSS OF DATA OR DATA BEING RENDERED
+//// INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+//// THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) REGARDLESS OF THE
+//// THEORY OF LIABILITY (CONTRACT, TORT OR OTHERWISE), EVEN IF SUCH HOLDER
+//// OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+//// 
+//// (C) Ericsson AB 2005-2013. All Rights Reserved.
+//// 
+
+#ifndef IMAGE_H
+#define IMAGE_H
+
+bool fReadPPM(char *filename, int &width, int &height, unsigned char *&pixels, int targetbitrate);
+bool fWritePPM(char *filename, int width, int height, unsigned char *pixels, int bitrate, bool reverse_y);
+
+bool fReadPFM(char *filename, int &width, int &height, float *&pixels);
+bool fWritePFM(char *filename, int width, int height, float *pixels,bool reverse_y);
+// write a grey scale image
+bool fWritePGM(char *filename, int width, int height, unsigned char *pixels,bool reverse_y, int bitdepth);
+int fReadPGM(char *filename, int &width, int &height, unsigned char *&pixels, int wantedBitDepth);
+// write a TGA image with both RGB and alpha
+bool fWriteTGAfromRGBandA(char *filename, int width, int height, unsigned char *pixelsRGB, unsigned char *pixelsA, bool reverse_y);
+
+#endif
+
+
+
diff --git a/extern/poshlib/posh.h b/extern/poshlib/posh.h
index 0fc39ce..5382294 100644
--- a/extern/poshlib/posh.h
+++ b/extern/poshlib/posh.h
@@ -332,7 +332,7 @@ LLVM:
 #  define POSH_OS_STRING "MinGW"
 #endif
 
-#if defined GO32 && defined DJGPP && defined __MSDOS__ 
+#if defined GO32 && defined DJGPP && defined __MSDOS__
 #  define POSH_OS_GO32 1
 #  define POSH_OS_STRING "GO32/MS-DOS"
 #endif
diff --git a/extern/pvrtextool/Include/PVRTArray.h b/extern/pvrtextool/Include/PVRTArray.h
new file mode 100644
index 0000000..ca493bd
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTArray.h
@@ -0,0 +1,568 @@
+/******************************************************************************
+
+ @File         PVRTArray.h
+
+ @Title        PVRTArray
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI compatible
+
+ @Description  Expanding array template class. Allows appending and direct
+               access. Mixing access methods should be approached with caution.
+
+******************************************************************************/
+#ifndef __PVRTARRAY_H__
+#define __PVRTARRAY_H__
+
+//ACS: PVRTGlobal.h was dragging in too much stuff (like windows.h & crtdbg.h)
+// so I split the relevant stuff out into a new file, PVRTTypes.h
+//#include "PVRTGlobal.h"
+#include "PVRTTypes.h"
+#include "PVRTError.h"
+
+/*!****************************************************************************
+Class
+******************************************************************************/
+
+/*!***************************************************************************
+* @Class CPVRTArray
+* @Brief Expanding array template class.
+* @Description Expanding array template class.
+*****************************************************************************/
+template<typename T>
+class CPVRTArray
+{
+public:
+	/*!***************************************************************************
+	@Function			CPVRTArray
+	@Description		Blank constructor. Makes a default sized array.
+	*****************************************************************************/
+	CPVRTArray() : m_uiSize(0), m_uiCapacity(GetDefaultSize())
+	{
+		m_pArray = new T[m_uiCapacity];
+	}
+
+	/*!***************************************************************************
+	@Function			CPVRTArray
+	@Input				uiSize	intial size of array
+	@Description		Constructor taking initial size of array in elements.
+	*****************************************************************************/
+	CPVRTArray(const unsigned int uiSize) : m_uiSize(0), m_uiCapacity(uiSize)
+	{
+		_ASSERT(uiSize != 0);
+		m_pArray = new T[uiSize];
+	}
+
+	/*!***************************************************************************
+	@Function			CPVRTArray
+	@Input				original	the other dynamic array
+	@Description		Copy constructor.
+	*****************************************************************************/
+	CPVRTArray(const CPVRTArray& original) : m_uiSize(original.m_uiSize),
+											  m_uiCapacity(original.m_uiCapacity)
+	{
+		m_pArray = new T[m_uiCapacity];
+		for(unsigned int i=0;i<m_uiSize;i++)
+		{
+			m_pArray[i]=original.m_pArray[i];
+		}
+	}
+
+	/*!***************************************************************************
+	@Function			CPVRTArray
+	@Input				pArray		an ordinary array
+	@Input				uiSize		number of elements passed
+	@Description		constructor from ordinary array.
+	*****************************************************************************/
+	CPVRTArray(const T* const pArray, const unsigned int uiSize) : m_uiSize(uiSize),
+														  m_uiCapacity(uiSize)
+	{
+		_ASSERT(uiSize != 0);
+		m_pArray = new T[uiSize];
+		for(unsigned int i=0;i<m_uiSize;i++)
+		{
+			m_pArray[i]=pArray[i];
+		}
+	}
+
+	/*!***************************************************************************
+	@Function			CPVRTArray
+	@Input				uiSize		initial capacity
+	@Input				val			value to populate with
+	@Description		constructor from a capacity and initial value.
+	*****************************************************************************/
+	CPVRTArray(const unsigned int uiSize, const T& val)	: m_uiSize(uiSize),
+														m_uiCapacity(uiSize)
+	{
+		_ASSERT(uiSize != 0);
+		m_pArray = new T[uiSize];
+		for(unsigned int uiIndex = 0; uiIndex < m_uiSize; ++uiIndex)
+		{
+			m_pArray[uiIndex] = val;
+		}
+	}
+
+	/*!***************************************************************************
+	@Function			~CPVRTArray
+	@Description		Destructor.
+	*****************************************************************************/
+	virtual ~CPVRTArray()
+	{
+		if(m_pArray)
+			delete [] m_pArray;
+	}
+
+	/*!***************************************************************************
+	@Function			Insert
+	@Input				pos		The position to insert the new element at
+	@Input				addT	The element to insert
+	@Return				The index of the new item or -1 on failure.
+	@Description		Inserts an element into the array, expanding it
+						if necessary.
+	*****************************************************************************/
+	int Insert(const unsigned int pos, const T& addT)
+	{
+		unsigned int uiIndex = pos;
+
+		if(pos >= m_uiSize) // Are we adding to the end
+			uiIndex = Append(addT);
+		else
+		{
+			unsigned int uiNewCapacity = 0;
+			T* pArray = m_pArray;
+
+			if(m_uiSize > m_uiCapacity)
+			{
+				uiNewCapacity = m_uiCapacity + 10;	// Expand the array by 10.
+
+				pArray = new T[uiNewCapacity];		// New Array
+
+				if(!pArray)
+					return -1;						// Failed to allocate memory!
+
+				// Copy the first half to the new array
+				for(unsigned int i = 0; i < pos; ++i)
+				{
+					pArray[i] = m_pArray[i];
+				}
+			}
+
+			// Copy last half to the new array
+			for(unsigned int i = m_uiSize; i > pos; --i)
+			{
+				pArray[i] = m_pArray[i - 1];
+			}
+
+			// Insert our new element
+			pArray[pos] = addT;
+			uiIndex = pos;
+
+			// Increase our size
+			++m_uiSize;
+
+			// Switch pointers and free memory if needed
+			if(pArray != m_pArray)
+			{
+				m_uiCapacity = uiNewCapacity;
+				delete[] m_pArray;
+				m_pArray = pArray;
+			}
+		}
+
+		return uiIndex;
+	}
+
+	/*!***************************************************************************
+	@Function			Append
+	@Input				addT	The element to append
+	@Return				The index of the new item.
+	@Description		Appends an element to the end of the array, expanding it
+						if necessary.
+	*****************************************************************************/
+	unsigned int Append(const T& addT)
+	{
+		unsigned int uiIndex = Append();
+		m_pArray[uiIndex] = addT;
+		return uiIndex;
+	}
+
+	/*!***************************************************************************
+	@Function			Append
+	@Return				The index of the new item.
+	@Description		Creates space for a new item, but doesn't add. Instead
+						returns the index of the new item.
+	*****************************************************************************/
+	unsigned int Append()
+	{
+		unsigned int uiIndex = m_uiSize;
+		SetCapacity(m_uiSize+1);
+		m_uiSize++;
+
+		return uiIndex;
+	}
+
+	/*!***************************************************************************
+	@Function		Clear
+	@Description	Clears the array.
+	*****************************************************************************/
+	void Clear()
+	{
+		m_uiSize = 0U;
+	}
+
+	/*!***************************************************************************
+	@Function			Resize
+	@Input				uiSize		New size of array
+	@Description		Changes the array to the new size
+	*****************************************************************************/
+	EPVRTError Resize(const unsigned int uiSize)
+	{
+		EPVRTError err = SetCapacity(uiSize);
+
+		if(err != PVR_SUCCESS)
+			return err;
+
+		m_uiSize = uiSize;
+		return PVR_SUCCESS;
+	}
+
+	/*!***************************************************************************
+	@Function			SetCapacity
+	@Input				uiSize		New capacity of array
+	@Description		Expands array to new capacity
+	*****************************************************************************/
+	EPVRTError SetCapacity(const unsigned int uiSize)
+	{
+		if(uiSize <= m_uiCapacity)
+			return PVR_SUCCESS;	// nothing to be done
+
+		unsigned int uiNewCapacity;
+		if(uiSize < m_uiCapacity*2)
+		{
+			uiNewCapacity = m_uiCapacity*2;			// Ignore the new size. Expand to twice the previous size.
+		}
+		else
+		{
+			uiNewCapacity = uiSize;
+		}
+
+		T* pNewArray = new T[uiNewCapacity];		// New Array
+		if(!pNewArray)
+			return PVR_FAIL;						// Failed to allocate memory!
+
+		// Copy source data to new array
+		for(unsigned int i = 0; i < m_uiSize; ++i)
+		{
+			pNewArray[i] = m_pArray[i];
+		}
+
+		// Switch pointers and free memory
+		m_uiCapacity	= uiNewCapacity;
+		T* pOldArray	= m_pArray;
+		m_pArray		= pNewArray;
+		delete [] pOldArray;
+		return PVR_SUCCESS;
+	}
+
+	/*!***************************************************************************
+	@Function			Copy
+	@Input				other	The CPVRTArray needing copied
+	@Description		A copy function. Will attempt to copy from other CPVRTArrays
+						if this is possible.
+	*****************************************************************************/
+	template<typename T2>
+	void Copy(const CPVRTArray<T2>& other)
+	{
+		T* pNewArray = new T[other.GetCapacity()];
+		if(pNewArray)
+		{
+			// Copy data
+			for(unsigned int i = 0; i < other.GetSize(); i++)
+			{
+				pNewArray[i] = other[i];
+			}
+
+			// Free current array
+			if(m_pArray)
+				delete [] m_pArray;
+
+			// Swap pointers
+			m_pArray		= pNewArray;
+
+			m_uiCapacity	= other.GetCapacity();
+			m_uiSize		= other.GetSize();
+		}
+	}
+
+	/*!***************************************************************************
+	@Function			=
+	@Input				other	The CPVRTArray needing copied
+	@Description		assignment operator.
+	*****************************************************************************/
+	CPVRTArray& operator=(const CPVRTArray<T>& other)
+	{
+		if(&other != this)
+			Copy(other);
+
+		return *this;
+	}
+
+	/*!***************************************************************************
+	@Function		operator+=
+	@Input			other		the array to append.
+	@Description	appends an existing CPVRTArray on to this one.
+	*****************************************************************************/
+	CPVRTArray& operator+=(const CPVRTArray<T>& other)
+	{
+		if(&other != this)
+		{
+			for(unsigned int uiIndex = 0; uiIndex < other.GetSize(); ++uiIndex)
+			{
+				Append(other[uiIndex]);
+			}
+		}
+
+		return *this;
+	}
+
+	/*!***************************************************************************
+	@Function			[]
+	@Input				uiIndex	index of element in array
+	@Return				the element indexed
+	@Description		indexed access into array. Note that this has no error
+						checking whatsoever
+	*****************************************************************************/
+	T& operator[](const unsigned int uiIndex)
+	{
+		_ASSERT(uiIndex < m_uiCapacity);
+		return m_pArray[uiIndex];
+	}
+
+	/*!***************************************************************************
+	@Function			[]
+	@Input				uiIndex	index of element in array
+	@Return				The element indexed
+	@Description		Indexed access into array. Note that this has no error
+						checking whatsoever
+	*****************************************************************************/
+	const T& operator[](const unsigned int uiIndex) const
+	{
+		_ASSERT(uiIndex < m_uiCapacity);
+		return m_pArray[uiIndex];
+	}
+
+	/*!***************************************************************************
+	@Function			GetSize
+	@Return				Size of array
+	@Description		Gives current size of array/number of elements
+	*****************************************************************************/
+	unsigned int GetSize() const
+	{
+		return m_uiSize;
+	}
+
+	/*!***************************************************************************
+	@Function			GetDefaultSize
+	@Return				Default size of array
+	@Description		Gives the default size of array/number of elements
+	*****************************************************************************/
+	static unsigned int GetDefaultSize()
+	{
+		return 16U;
+	}
+
+	/*!***************************************************************************
+	@Function			GetCapacity
+	@Return				Capacity of array
+	@Description		Gives current allocated size of array/number of elements
+	*****************************************************************************/
+	unsigned int GetCapacity() const
+	{
+		return m_uiCapacity;
+	}
+
+	/*!***************************************************************************
+	@Function			Contains
+	@Input				object		The object to check in the array
+	@Return				true if object is contained in this array.
+	@Description		Indicates whether the given object resides inside the 
+						array.
+	*****************************************************************************/
+	bool Contains(const T& object) const
+	{
+		for(unsigned int uiIndex = 0; uiIndex < m_uiSize; ++uiIndex)
+		{
+			if(m_pArray[uiIndex] == object)
+				return true;
+		}
+		return false;
+	}
+
+	/*!***************************************************************************
+	@Function			Find
+	@Input				object		The object to check in the array
+	@Return				pointer to the found object or NULL.
+	@Description		Attempts to find the object in the array and returns a
+						pointer if it is found, or NULL if not found. The time
+						taken is O(N).
+	*****************************************************************************/
+	T* Find(const T& object) const
+	{
+		for(unsigned int uiIndex = 0; uiIndex < m_uiSize; ++uiIndex)
+		{
+			if(m_pArray[uiIndex] == object)
+				return &m_pArray[uiIndex];
+		}
+		return NULL;
+	}
+
+	/*!***************************************************************************
+	@Function			Sort
+	@Input				predicate		The object which defines "bool operator()"
+	@Description		Simple bubble-sort of the array. Pred should be an object that
+						defines a bool operator().
+	*****************************************************************************/
+	template<class Pred>
+	void Sort(Pred predicate)
+	{
+		bool bSwap;
+		for(unsigned int i=0; i < m_uiSize; ++i)
+		{
+			bSwap = false;
+			for(unsigned int j=0; j < m_uiSize-1; ++j)
+			{
+				if(predicate(m_pArray[j], m_pArray[j+1]))
+				{
+					PVRTswap(m_pArray[j], m_pArray[j+1]);
+					bSwap = true;
+				}
+			}
+
+			if(!bSwap)
+				return;
+		}
+	}
+
+	/*!***************************************************************************
+	@Function		Remove
+	@Input			uiIndex		The index to remove
+	@Return			success or failure
+	@Description	Removes an element from the array.
+	*****************************************************************************/
+	virtual EPVRTError Remove(unsigned int uiIndex)
+	{
+		_ASSERT(uiIndex < m_uiSize);
+		if(m_uiSize == 0)
+			return PVR_FAIL;
+
+		if(uiIndex == m_uiSize-1)
+		{
+			return RemoveLast();
+		}
+        
+        m_uiSize--;
+        // Copy the data. memmove will only work for built-in types.
+        for(unsigned int uiNewIdx = uiIndex; uiNewIdx < m_uiSize; ++uiNewIdx)
+        {
+            m_pArray[uiNewIdx] = m_pArray[uiNewIdx+1];
+        }
+		
+		return PVR_SUCCESS;
+	}
+
+	/*!***************************************************************************
+	@Function			RemoveLast
+	@Return				success or failure
+	@Description		Removes the last element. Simply decrements the size value
+	*****************************************************************************/
+	virtual EPVRTError RemoveLast()
+	{
+		if(m_uiSize > 0)
+		{
+			m_uiSize--;
+			return PVR_SUCCESS;
+		}	
+		else
+		{
+			return PVR_FAIL;
+		}
+	}
+
+protected:
+	unsigned int 	m_uiSize;				/*! current size of contents of array */
+	unsigned int	m_uiCapacity;			/*! currently allocated size of array */
+	T				*m_pArray;				/*! the actual array itself */
+};
+
+// note "this" is required for ISO standard C++ and gcc complains otherwise
+// http://lists.apple.com/archives/Xcode-users//2005/Dec/msg00644.html
+template<typename T>
+class CPVRTArrayManagedPointers : public CPVRTArray<T*>
+{
+public:
+	virtual ~CPVRTArrayManagedPointers()
+	{
+		if(this->m_pArray)
+		{
+			for(unsigned int i=0;i<this->m_uiSize;i++)
+			{
+				delete(this->m_pArray[i]);
+			}
+		}
+	}
+
+	/*!***************************************************************************
+	@Function		Remove
+	@Input			uiIndex		The index to remove
+	@Return			success or failure
+	@Description	Removes an element from the array.
+	*****************************************************************************/
+	virtual EPVRTError Remove(unsigned int uiIndex)
+	{
+		_ASSERT(uiIndex < this->m_uiSize);
+		if(this->m_uiSize == 0)
+			return PVR_FAIL;
+
+		if(uiIndex == this->m_uiSize-1)
+		{
+			return this->RemoveLast();
+		}
+
+		unsigned int uiSize = (this->m_uiSize - (uiIndex+1)) * sizeof(T*);
+	
+		delete this->m_pArray[uiIndex];
+		memmove(this->m_pArray + uiIndex, this->m_pArray + (uiIndex+1), uiSize);
+
+		this->m_uiSize--;
+		return PVR_SUCCESS;
+	}
+
+	/*!***************************************************************************
+	@Function			RemoveLast
+	@Return				success or failure
+	@Description		Removes the last element. Simply decrements the size value
+	*****************************************************************************/
+	virtual EPVRTError RemoveLast()
+	{
+		if(this->m_uiSize > 0 && this->m_pArray)
+		{
+			delete this->m_pArray[this->m_uiSize-1];
+			this->m_uiSize--;
+			return PVR_SUCCESS;
+		}
+		else
+		{
+			return PVR_FAIL;
+		}
+	}
+};
+
+#endif // __PVRTARRAY_H__
+
+/*****************************************************************************
+End of file (PVRTArray.h)
+*****************************************************************************/
+
diff --git a/extern/pvrtextool/Include/PVRTDecompress.h b/extern/pvrtextool/Include/PVRTDecompress.h
new file mode 100644
index 0000000..d817d7e
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTDecompress.h
@@ -0,0 +1,58 @@
+/******************************************************************************
+
+ @File         PVRTDecompress.h
+
+ @Title        PVRTDecompress
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI compatible
+
+ @Description  PVRTC and ETC Texture Decompression.
+
+******************************************************************************/
+
+#ifndef _PVRTDECOMPRESS_H_
+#define _PVRTDECOMPRESS_H_
+
+/*!***********************************************************************
+ @Function		PVRTDecompressPVRTC
+ @Input			pCompressedData The PVRTC texture data to decompress
+ @Input			Do2bitMode Signifies whether the data is PVRTC2 or PVRTC4
+ @Input			XDim X dimension of the texture
+ @Input			YDim Y dimension of the texture
+ @Return		Returns the amount of data that was decompressed.
+ @Modified		pResultImage The decompressed texture data
+ @Description	Decompresses PVRTC to RGBA 8888
+*************************************************************************/
+int PVRTDecompressPVRTC(const void *pCompressedData,
+				const int Do2bitMode,
+				const int XDim,
+				const int YDim,
+				unsigned char* pResultImage);
+
+/*!***********************************************************************
+@Function		PVRTDecompressETC
+@Input			pSrcData The ETC texture data to decompress
+@Input			x X dimension of the texture
+@Input			y Y dimension of the texture
+@Modified		pDestData The decompressed texture data
+@Input			nMode The format of the data
+@Returns		The number of bytes of ETC data decompressed
+@Description	Decompresses ETC to RGBA 8888
+*************************************************************************/
+int PVRTDecompressETC(const void * const pSrcData,
+						 const unsigned int &x,
+						 const unsigned int &y,
+						 void *pDestData,
+						 const int &nMode);
+
+
+#endif /* _PVRTDECOMPRESS_H_ */
+
+/*****************************************************************************
+ End of file (PVRTBoneBatch.h)
+*****************************************************************************/
+
diff --git a/extern/pvrtextool/Include/PVRTError.h b/extern/pvrtextool/Include/PVRTError.h
new file mode 100644
index 0000000..8103508
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTError.h
@@ -0,0 +1,71 @@
+/******************************************************************************
+
+ @File         PVRTError.h
+
+ @Title        PVRTError
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI compatible
+
+ @Description  
+
+******************************************************************************/
+#ifndef _PVRTERROR_H_
+#define _PVRTERROR_H_
+
+#if defined(ANDROID)
+	#include <android/log.h>
+#else
+	#if defined(_WIN32)
+		#include <windows.h>
+	#else
+		#include <stdio.h>
+	#endif
+#endif
+/*!***************************************************************************
+ Macros
+*****************************************************************************/
+
+/*! Outputs a string to the standard error if built for debugging. */
+#if !defined(PVRTERROR_OUTPUT_DEBUG)
+	#if defined(_DEBUG) || defined(DEBUG)
+		#if defined(ANDROID)
+			#define PVRTERROR_OUTPUT_DEBUG(A) __android_log_print(ANDROID_LOG_INFO, "PVRTools", A);
+		#elif defined(_WIN32)
+			#define PVRTERROR_OUTPUT_DEBUG(A) OutputDebugStringA(A);
+		#else
+			#define PVRTERROR_OUTPUT_DEBUG(A) fprintf(stderr,A);
+		#endif
+	#else
+		#define PVRTERROR_OUTPUT_DEBUG(A)
+	#endif
+#endif
+
+
+/*!***************************************************************************
+ Enums
+*****************************************************************************/
+/*! Enum error codes */
+enum EPVRTError
+{
+	PVR_SUCCESS = 0,
+	PVR_FAIL = 1,
+	PVR_OVERFLOW = 2
+};
+
+/*!***************************************************************************
+ @Function			PVRTErrorOutputDebug
+ @Input				format		printf style format followed by arguments it requires
+ @Description		Outputs a string to the standard error.
+*****************************************************************************/
+void PVRTErrorOutputDebug(char const * const format, ...);
+
+#endif // _PVRTERROR_H_
+
+/*****************************************************************************
+End of file (PVRTError.h)
+*****************************************************************************/
+
diff --git a/extern/pvrtextool/Include/PVRTGlobal.h b/extern/pvrtextool/Include/PVRTGlobal.h
new file mode 100644
index 0000000..a7df228
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTGlobal.h
@@ -0,0 +1,278 @@
+/******************************************************************************
+
+ @File         PVRTGlobal.h
+
+ @Title        PVRTGlobal
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI compatible
+
+ @Description  Global defines and typedefs for PVRTools
+
+******************************************************************************/
+#ifndef _PVRTGLOBAL_H_
+#define _PVRTGLOBAL_H_
+
+/*!***************************************************************************
+ Macros
+*****************************************************************************/
+#define PVRT_MIN(a,b)            (((a) < (b)) ? (a) : (b))
+#define PVRT_MAX(a,b)            (((a) > (b)) ? (a) : (b))
+#define PVRT_CLAMP(x, l, h)      (PVRT_MIN((h), PVRT_MAX((x), (l))))
+
+// avoid warning about unused parameter
+#define PVRT_UNREFERENCED_PARAMETER(x) ((void) x)
+
+#if defined(_WIN32) && !defined(__QT__)	/* Windows desktop */
+#if !defined(_CRTDBG_MAP_ALLOC)
+	#define _CRTDBG_MAP_ALLOC
+#endif
+	#include <windows.h>
+	#include <crtdbg.h>
+	#include <tchar.h>
+#endif
+
+#if defined(_WIN32) && !defined(__QT__)
+
+#else
+#if defined(__linux__) || defined(__APPLE__)
+	#define _ASSERT(a)((void)0)
+	#define _ASSERTE(a)((void)0)
+	#ifdef _DEBUG
+		#ifndef _RPT0
+		#define _RPT0(a,b) printf(b)
+		#endif
+		#ifndef _RPT1
+		#define _RPT1(a,b,c) printf(b,c)
+		#endif
+	#else
+		#ifndef _RPT0
+	    #define _RPT0(a,b)((void)0)
+		#endif
+		#ifndef _RPT1
+	    #define _RPT1(a,b,c)((void)0)
+		#endif
+	#endif
+	#define _RPT2(a,b,c,d)((void)0)
+	#define _RPT3(a,b,c,d,e)((void)0)
+	#define _RPT4(a,b,c,d,e,f)((void)0)
+	#include <stdlib.h>
+	#include <string.h>
+	#define BYTE unsigned char
+	#define WORD unsigned short
+	#define DWORD unsigned int
+	typedef struct tagRGBQUAD {
+	BYTE    rgbBlue;
+	BYTE    rgbGreen;
+	BYTE    rgbRed;
+	BYTE    rgbReserved;
+	} RGBQUAD;
+	#define BOOL int
+#if !defined(TRUE)
+	#define TRUE 1
+#endif
+#if !defined(FALSE)
+	#define FALSE 0
+#endif
+#else
+	#define _CRT_WARN 0
+	#define _RPT0(a,b)
+	#define _RPT1(a,b,c)
+	#define _RPT2(a,b,c,d)
+	#define _RPT3(a,b,c,d,e)
+	#define _RPT4(a,b,c,d,e,f)
+	#define _ASSERT(X)
+	#define _ASSERTE(X)
+#endif
+#endif
+
+#include <stdio.h>
+
+#define FREE(X)		{ if(X) { free(X); (X) = 0; } }
+
+#if 1 //ACS: I split this out into PVRTTypes.h
+
+ #include "PVRTTypes.h"
+
+#else
+
+// This macro is used to check at compile time that types are of a certain size
+// If the size does not equal the expected size, this typedefs an array of size 0
+// which causes a compile error
+#define PVRTSIZEASSERT(T, size) typedef int (sizeof_##T)[sizeof(T) == (size)]
+#define PVRTCOMPILEASSERT(T, expr)	typedef int (assert_##T)[expr]
+
+
+/****************************************************************************
+** Integer types
+****************************************************************************/
+
+typedef char				PVRTchar8;
+typedef signed char			PVRTint8;
+typedef signed short		PVRTint16;
+typedef signed int			PVRTint32;
+typedef unsigned char		PVRTuint8;
+typedef unsigned short		PVRTuint16;
+typedef unsigned int		PVRTuint32;
+
+typedef float				PVRTfloat32;
+
+#if (defined(__int64) || defined(_WIN32))
+typedef signed __int64     PVRTint64;
+typedef unsigned __int64   PVRTuint64;
+#elif defined(TInt64)
+typedef TInt64             PVRTint64;
+typedef TUInt64            PVRTuint64;
+#else
+typedef signed long long   PVRTint64;
+typedef unsigned long long PVRTuint64;
+#endif
+
+#if __SIZEOF_WCHAR_T__  == 4 || __WCHAR_MAX__ > 0x10000
+	#define PVRTSIZEOFWCHAR 4
+#else
+	#define PVRTSIZEOFWCHAR 2
+#endif
+
+PVRTSIZEASSERT(PVRTchar8,   1);
+PVRTSIZEASSERT(PVRTint8,   1);
+PVRTSIZEASSERT(PVRTuint8,  1);
+PVRTSIZEASSERT(PVRTint16,  2);
+PVRTSIZEASSERT(PVRTuint16, 2);
+PVRTSIZEASSERT(PVRTint32,  4);
+PVRTSIZEASSERT(PVRTuint32, 4);
+PVRTSIZEASSERT(PVRTint64,  8);
+PVRTSIZEASSERT(PVRTuint64, 8);
+PVRTSIZEASSERT(PVRTfloat32, 4);
+
+/*!**************************************************************************
+@Enum   ETextureFilter
+@Brief  Enum values for defining texture filtering
+****************************************************************************/
+enum ETextureFilter
+{
+	eFilter_Nearest,
+	eFilter_Linear,
+	eFilter_None,
+
+	eFilter_Size,
+	eFilter_Default		= eFilter_Nearest,
+	eFilter_MipDefault	= eFilter_None
+};
+
+/*!**************************************************************************
+@Enum   ETextureWrap
+@Brief  Enum values for defining texture wrapping
+****************************************************************************/
+enum ETextureWrap
+{
+	eWrap_Clamp,
+	eWrap_Repeat,
+
+	eWrap_Size,
+	eWrap_Default = eWrap_Repeat
+};
+#endif
+
+
+/****************************************************************************
+** swap template function
+****************************************************************************/
+/*!***************************************************************************
+ @Function		PVRTswap
+ @Input			a Type a
+ @Input			b Type b
+ @Description	A swap template function that swaps a and b
+*****************************************************************************/
+
+template <typename T>
+inline void PVRTswap(T& a, T& b)
+{
+	T temp = a;
+	a = b;
+	b = temp;
+}
+
+/*!***************************************************************************
+ @Function		PVRTClamp
+ @Input			val		Value to clamp
+ @Input			min		Minimum legal value
+ @Input			max		Maximum legal value
+ @Description	A clamp template function that clamps val between min and max.
+*****************************************************************************/
+template <typename T>
+inline T PVRTClamp(const T& val, const T& min, const T& max)
+{
+	if(val > max)
+		return max;
+	if(val < min)
+		return min;
+	return val;
+}
+
+/*!***************************************************************************
+ @Function		PVRTByteSwap
+ @Input			pBytes A number
+ @Input			i32ByteNo Number of bytes in pBytes
+ @Description	Swaps the endianness of pBytes in place
+*****************************************************************************/
+inline void PVRTByteSwap(unsigned char* pBytes, int i32ByteNo)
+{
+	int i = 0, j = i32ByteNo - 1;
+
+	while(i < j)
+		PVRTswap<unsigned char>(pBytes[i++], pBytes[j--]);
+}
+
+/*!***************************************************************************
+ @Function		PVRTByteSwap32
+ @Input			ui32Long A number
+ @Returns		ui32Long with its endianness changed
+ @Description	Converts the endianness of an unsigned int
+*****************************************************************************/
+inline unsigned int PVRTByteSwap32(unsigned int ui32Long)
+{
+	return ((ui32Long&0x000000FF)<<24) + ((ui32Long&0x0000FF00)<<8) + ((ui32Long&0x00FF0000)>>8) + ((ui32Long&0xFF000000) >> 24);
+}
+
+/*!***************************************************************************
+ @Function		PVRTByteSwap16
+ @Input			ui16Short A number
+ @Returns		ui16Short with its endianness changed
+ @Description	Converts the endianness of a unsigned short
+*****************************************************************************/
+inline unsigned short PVRTByteSwap16(unsigned short ui16Short)
+{
+	return (ui16Short>>8) | (ui16Short<<8);
+}
+
+/*!***************************************************************************
+ @Function		PVRTIsLittleEndian
+ @Returns		True if the platform the code is ran on is little endian
+ @Description	Returns true if the platform the code is ran on is little endian
+*****************************************************************************/
+inline bool PVRTIsLittleEndian()
+{
+	static bool bLittleEndian;
+	static bool bIsInit = false;
+
+	if(!bIsInit)
+	{
+		short int word = 0x0001;
+		char *byte = (char*) &word;
+		bLittleEndian = byte[0] ? true : false;
+		bIsInit = true;
+	}
+
+	return bLittleEndian;
+}
+
+#endif // _PVRTGLOBAL_H_
+
+/*****************************************************************************
+ End of file (Tools.h)
+*****************************************************************************/
+
diff --git a/extern/pvrtextool/Include/PVRTMap.h b/extern/pvrtextool/Include/PVRTMap.h
new file mode 100644
index 0000000..8442516
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTMap.h
@@ -0,0 +1,222 @@
+/******************************************************************************
+
+ @File         PVRTMap.h
+
+ @Title        PVRTArray
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI compatible
+
+ @Description  A simple and easy to use implementation of a map.
+
+******************************************************************************/
+#ifndef __PVRTMAP_H__
+#define __PVRTMAP_H__
+
+#include "PVRTArray.h"
+
+/*!****************************************************************************
+Class
+******************************************************************************/
+
+/*!***************************************************************************
+* @Class		CPVRTMap
+* @Brief		Expanding map template class.
+* @Description	A simple and easy to use implementation of a map.
+*****************************************************************************/
+template <typename KeyType, typename DataType>
+class CPVRTMap
+{
+public:
+
+	/*!***********************************************************************
+	 @Function		CPVRTMap
+	 @Return		A new CPVRTMap.
+	 @Description	Constructor for a CPVRTMap.
+	*************************************************************************/
+	CPVRTMap() : m_Keys(), m_Data(), m_uiSize(0)
+	{}
+
+	/*!***********************************************************************
+	 @Function		~CPVRTMap
+	 @Description	Destructor for a CPVRTMap.
+	*************************************************************************/
+	~CPVRTMap()
+	{
+		//Clear the map, that's enough - the CPVRTArray members will tidy everything else up.
+		Clear();
+	}
+
+	EPVRTError Reserve(const PVRTuint32 uiSize)
+	{
+		//Sets the capacity of each member array to the requested size. The array used will only expand.
+		//Returns the most serious error from either method.
+		return PVRT_MAX(m_Keys.SetCapacity(uiSize),m_Data.SetCapacity(uiSize));
+	}
+
+	/*!***********************************************************************
+	 @Function		GetSize
+	 @Return		Number of meaningful members in the map.
+	 @Description	Returns the number of meaningful members in the map.
+	*************************************************************************/
+	PVRTuint32 GetSize() const
+	{
+		//Return the size.
+		return m_uiSize;
+	}
+
+	/*!***********************************************************************
+	 @Function		GetIndexOf
+	 @Input			key
+	 @Return		The index value for a mapped item.
+	 @Description	Gets the position of a particular key/data within the map.
+					If the return value is exactly equal to the value of 
+					GetSize() then the item has not been found.
+	*************************************************************************/
+	PVRTuint32 GetIndexOf(const KeyType key) const
+	{
+		//Loop through all the valid keys.
+		for (PVRTuint32 i=0; i<m_uiSize; ++i)
+		{
+			//Check if a key matches.
+			if (m_Keys[i]==key)
+			{
+				//If a matched key is found, return the position.
+				return i;
+			}
+		}
+
+		//If not found, return the number of meaningful members.
+		return m_uiSize;
+	}
+
+	/*!***********************************************************************
+	 @Function		GetDataAtIndex
+	 @Input			uiIndex
+	 @Return		Data type at the specified position.
+	 @Description	Returns a pointer to the Data at a particular index. 
+					If the index supplied is not valid, NULL is returned 
+					instead. Deletion of data at this pointer will lead
+					to undefined behaviour.
+	*************************************************************************/
+	const DataType* GetDataAtIndex(const PVRTuint32 uiIndex) const
+	{
+		return &(m_Data[uiIndex]);
+	}
+
+	/*!***********************************************************************
+	 @Function		operator[]
+	 @Input			key
+	 @Return		Data that is mapped to 'key'.
+	 @Description	If a mapping already exists for 'key' then it will return 
+					the associated data. If no mapping currently exists, a new 
+					element is created in place.
+	*************************************************************************/
+	DataType& operator[] (const KeyType key)
+	{
+		//Get the index of the key.
+		PVRTuint32 uiIndex = GetIndexOf(key);
+
+		//Check the index is valid
+		if (uiIndex != m_uiSize)
+		{
+			//Return mapped data if the index is valid.
+			return m_Data[uiIndex];
+		}
+		else
+		{
+			//Append the key to the Keys array.
+			m_Keys.Append(key);
+
+			//Create a new DataType.
+			DataType sNewData;
+
+			//Append the new pointer to the Data array.
+			m_Data.Append(sNewData);
+
+			//Increment the size of meaningful data.
+			++m_uiSize;
+
+			//Return the contents of pNewData.
+			return m_Data[m_Keys.GetSize()-1];
+		}
+	}
+
+	/*!***********************************************************************
+	 @Function		Remove
+	 @Input			key
+	 @Return		Returns PVR_FAIL if item doesn't exist. 
+					Otherwise returns PVR_SUCCESS.
+	 @Description	Removes an element from the map if it exists.
+	*************************************************************************/
+	EPVRTError Remove(const KeyType key)
+	{
+		//Finds the index of the key.
+		PVRTuint32 uiIndex=GetIndexOf(key);
+
+		//If the key is invalid, fail.
+		if (uiIndex==m_uiSize)
+		{
+			//Return failure.
+			return PVR_FAIL;
+		}
+		
+		//Decrement the size of the map to ignore the last element in each array.
+		m_uiSize--;
+
+		//Copy the last key over the deleted key. There are now two copies of one element, 
+		//but the one at the end of the array is ignored.
+		m_Keys[uiIndex]=m_Keys[m_uiSize-1];
+
+		//Copy the last data over the deleted data in the same way as the keys.
+		m_Data[uiIndex]=m_Data[m_uiSize-1];
+
+		//Return success.
+		return PVR_SUCCESS;
+	}
+
+	/*!***********************************************************************
+	 @Function		Clear
+	 @Description	Clears the Map of all data values.
+	*************************************************************************/
+	void Clear()
+	{
+		//Set the size to 0.
+		m_uiSize=0;
+		m_Keys.Clear();
+		m_Data.Clear();
+	}
+
+	/*!***********************************************************************
+	 @Function		Exists
+	 @Input			key
+	 @Return		Whether data exists for the specified key or not.
+	 @Description	Checks whether or not data exists for the specified key.
+	*************************************************************************/
+	bool Exists(const KeyType key) const
+	{
+		//Checks for a valid index for key, if not, returns false.
+		return (GetIndexOf(key) != m_uiSize);
+	}
+
+private:
+
+	//Array of all the keys. Indices match m_Data.
+	CPVRTArray<KeyType> m_Keys;
+
+	//Array of pointers to all the allocated data.
+	CPVRTArray<DataType> m_Data;
+
+	//The number of meaningful members in the map.
+	PVRTuint32 m_uiSize;
+};
+
+#endif // __PVRTMAP_H__
+
+/*****************************************************************************
+End of file (PVRTMap.h)
+*****************************************************************************/
+
diff --git a/extern/pvrtextool/Include/PVRTString.h b/extern/pvrtextool/Include/PVRTString.h
new file mode 100644
index 0000000..4dede32
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTString.h
@@ -0,0 +1,985 @@
+/******************************************************************************
+
+ @File         PVRTString.h
+
+ @Title        PVRTString
+
+ @Version       @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI compatible
+
+ @Description  A string class that can be used as drop-in replacement for
+               std::string on platforms/compilers that don't provide a full C++
+               standard library.
+
+******************************************************************************/
+#ifndef _PVRTSTRING_H_
+#define _PVRTSTRING_H_
+
+#include <stdio.h>
+#define _USING_PVRTSTRING_
+
+/*!***************************************************************************
+@Class CPVRTString
+@Brief A string class
+*****************************************************************************/
+class CPVRTString
+{
+
+private:
+
+	// Checking printf and scanf format strings
+#if defined(_CC_GNU_) || defined(__GNUG__) || defined(__GNUC__)
+#define FX_PRINTF(fmt,arg) __attribute__((format(printf,fmt,arg)))
+#define FX_SCANF(fmt,arg)  __attribute__((format(scanf,fmt,arg)))
+#else
+#define FX_PRINTF(fmt,arg)
+#define FX_SCANF(fmt,arg)
+#endif
+
+public:
+	typedef	size_t	size_type;
+	typedef	char value_type;
+	typedef	char& reference;
+	typedef	const char& const_reference;
+
+	static const size_type npos;
+
+
+	
+
+	/*!***********************************************************************
+	@Function			CPVRTString
+	@Input				_Ptr	A string
+	@Input				_Count	Length of _Ptr
+	@Description		Constructor
+	************************************************************************/
+	CPVRTString(const char* _Ptr, size_t _Count = npos);
+
+	/*!***********************************************************************
+	@Function			CPVRTString
+	@Input				_Right	A string
+	@Input				_Roff	Offset into _Right
+	@Input				_Count	Number of chars from _Right to assign to the new string
+	@Description		Constructor
+	************************************************************************/
+	CPVRTString(const CPVRTString& _Right, size_t _Roff = 0, size_t _Count = npos);
+
+	/*!***********************************************************************
+	@Function			CPVRTString
+	@Input				_Count	Length of new string
+	@Input				_Ch		A char to fill it with
+	@Description		Constructor
+	*************************************************************************/
+	CPVRTString(size_t _Count, const char _Ch);
+
+	/*!***********************************************************************
+	@Function			CPVRTString
+	@Input				_Ch	A char
+	@Description		Constructor
+	*************************************************************************/
+	CPVRTString(const char _Ch);
+
+	/*!***********************************************************************
+	@Function			CPVRTString
+	@Description		Constructor
+	************************************************************************/
+	CPVRTString();
+
+	/*!***********************************************************************
+	@Function			~CPVRTString
+	@Description		Destructor
+	************************************************************************/
+	virtual ~CPVRTString();
+
+	/*!***********************************************************************
+	@Function			append
+	@Input				_Ptr	A string
+	@Returns			Updated string
+	@Description		Appends a string
+	*************************************************************************/
+	CPVRTString& append(const char* _Ptr);
+
+	/*!***********************************************************************
+	@Function			append
+	@Input				_Ptr	A string
+	@Input				_Count	String length
+	@Returns			Updated string
+	@Description		Appends a string of length _Count
+	*************************************************************************/
+	CPVRTString& append(const char* _Ptr, size_t _Count);
+
+	/*!***********************************************************************
+	@Function			append
+	@Input				_Str	A string
+	@Returns			Updated string
+	@Description		Appends a string
+	*************************************************************************/
+	CPVRTString& append(const CPVRTString& _Str);
+
+	/*!***********************************************************************
+	@Function			append
+	@Input				_Str	A string
+	@Input				_Off	A position in string
+	@Input				_Count	Number of letters to append
+	@Returns			Updated string
+	@Description		Appends _Count letters of _Str from _Off in _Str
+	*************************************************************************/
+	CPVRTString& append(const CPVRTString& _Str, size_t _Off, size_t _Count);
+
+	/*!***********************************************************************
+	@Function			append
+	@Input				_Ch		A char
+	@Input				_Count	Number of times to append _Ch
+	@Returns			Updated string
+	@Description		Appends _Ch _Count times
+	*************************************************************************/
+	CPVRTString& append(size_t _Count, const char _Ch);
+
+	//template<class InputIterator> CPVRTString& append(InputIterator _First, InputIterator _Last);
+
+	/*!***********************************************************************
+	@Function			assign
+	@Input				_Ptr A string
+	@Returns			Updated string
+	@Description		Assigns the string to the string _Ptr
+	*************************************************************************/
+	CPVRTString& assign(const char* _Ptr);
+
+	/*!***********************************************************************
+	@Function			assign
+	@Input				_Ptr A string
+	@Input				_Count Length of _Ptr
+	@Returns			Updated string
+	@Description		Assigns the string to the string _Ptr
+	*************************************************************************/
+	CPVRTString& assign(const char* _Ptr, size_t _Count);
+
+	/*!***********************************************************************
+	@Function			assign
+	@Input				_Str A string
+	@Returns			Updated string
+	@Description		Assigns the string to the string _Str
+	*************************************************************************/
+	CPVRTString& assign(const CPVRTString& _Str);
+
+	/*!***********************************************************************
+	@Function			assign
+	@Input				_Str A string
+	@Input				_Off First char to start assignment from
+	@Input				_Count Length of _Str
+	@Returns			Updated string
+	@Description		Assigns the string to _Count characters in string _Str starting at _Off
+	*************************************************************************/
+	CPVRTString& assign(const CPVRTString& _Str, size_t _Off, size_t _Count=npos);
+
+	/*!***********************************************************************
+	@Function			assign
+	@Input				_Ch A string
+	@Input				_Count Number of times to repeat _Ch
+	@Returns			Updated string
+	@Description		Assigns the string to _Count copies of _Ch
+	*************************************************************************/
+	CPVRTString& assign(size_t _Count, char _Ch);
+
+	//template<class InputIterator> CPVRTString& assign(InputIterator _First, InputIterator _Last);
+
+	//const_reference at(size_t _Off) const;
+	//reference at(size_t _Off);
+
+	// const_iterator begin() const;
+	// iterator begin();
+
+	/*!***********************************************************************
+	@Function			c_str
+	@Returns			const char* pointer of the string
+	@Description		Returns a const char* pointer of the string
+	*************************************************************************/
+	const char* c_str() const;
+
+	/*!***********************************************************************
+	@Function			capacity
+	@Returns			The size of the character array reserved
+	@Description		Returns the size of the character array reserved
+	*************************************************************************/
+	size_t capacity() const;
+
+	/*!***********************************************************************
+	@Function			clear
+	@Description		Clears the string
+	*************************************************************************/
+	void clear();
+
+	/*!***********************************************************************
+	@Function			compare
+	@Input				_Str A string to compare with
+	@Returns			0 if the strings match
+	@Description		Compares the string with _Str
+	*************************************************************************/
+	int compare(const CPVRTString& _Str) const;
+
+	/*!***********************************************************************
+	@Function			compare
+	@Input				_Pos1	Position to start comparing from
+	@Input				_Num1	Number of chars to compare
+	@Input				_Str 	A string to compare with
+	@Returns			0 if the strings match
+	@Description		Compares the string with _Str
+	*************************************************************************/
+	int compare(size_t _Pos1, size_t _Num1, const CPVRTString& _Str) const;
+
+	/*!***********************************************************************
+	@Function			compare
+	@Input				_Pos1	Position to start comparing from
+	@Input				_Num1	Number of chars to compare
+	@Input				_Str 	A string to compare with
+	@Input				_Off 	Position in _Str to compare from
+	@Input				_Count	Number of chars in _Str to compare with
+	@Returns			0 if the strings match
+	@Description		Compares the string with _Str
+	*************************************************************************/
+	int compare(size_t _Pos1, size_t _Num1, const CPVRTString& _Str, size_t _Off, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			compare
+	@Input				_Ptr A string to compare with
+	@Returns			0 if the strings match
+	@Description		Compares the string with _Ptr
+	*************************************************************************/
+	int compare(const char* _Ptr) const;
+
+	/*!***********************************************************************
+	@Function			compare
+	@Input				_Pos1	Position to start comparing from
+	@Input				_Num1	Number of chars to compare
+	@Input				_Ptr 	A string to compare with
+	@Returns			0 if the strings match
+	@Description		Compares the string with _Ptr
+	*************************************************************************/
+	int compare(size_t _Pos1, size_t _Num1, const char* _Ptr) const;
+
+	/*!***********************************************************************
+	@Function			compare
+	@Input				_Pos1	Position to start comparing from
+	@Input				_Num1	Number of chars to compare
+	@Input				_Ptr 	A string to compare with
+	@Input				_Count	Number of chars to compare
+	@Returns			0 if the strings match
+	@Description		Compares the string with _Str
+	*************************************************************************/
+	int compare(size_t _Pos1, size_t _Num1, const char* _Ptr, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			<
+	@Input				_Str A string to compare with
+	@Returns			True on success
+	@Description		Less than operator
+	*************************************************************************/
+	bool operator<(const CPVRTString & _Str) const;
+
+	/*!***********************************************************************
+	@Function		==
+	@Input			_Str 	A string to compare with
+	@Returns		True if they match
+	@Description	== Operator
+	*************************************************************************/
+	bool operator==(const CPVRTString& _Str) const;
+
+	/*!***********************************************************************
+	@Function		==
+	@Input			_Ptr 	A string to compare with
+	@Returns		True if they match
+	@Description	== Operator
+	*************************************************************************/
+	bool operator==(const char* const _Ptr) const;
+
+	/*!***********************************************************************
+	@Function			!=
+	@Input				_Str 	A string to compare with
+	@Returns			True if they don't match
+	@Description		!= Operator
+	*************************************************************************/
+	bool operator!=(const CPVRTString& _Str) const;
+
+	/*!***********************************************************************
+	@Function			!=
+	@Input				_Ptr 	A string to compare with
+	@Returns			True if they don't match
+	@Description		!= Operator
+	*************************************************************************/
+	bool operator!=(const char* const _Ptr) const;
+
+	/*!***********************************************************************
+	@Function			copy
+	@Modified			_Ptr 	A string to copy to
+	@Input				_Count	Size of _Ptr
+	@Input				_Off	Position to start copying from
+	@Returns			Number of bytes copied
+	@Description		Copies the string to _Ptr
+	*************************************************************************/
+	size_t copy(char* _Ptr, size_t _Count, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			data
+	@Returns			A const char* version of the string
+	@Description		Returns a const char* version of the string
+	*************************************************************************/
+	const char* data( ) const;
+
+	/*!***********************************************************************
+	@Function			empty
+	@Returns			True if the string is empty
+	@Description		Returns true if the string is empty
+	*************************************************************************/
+	bool empty() const;
+
+	// const_iterator end() const;
+	// iterator end();
+
+	//iterator erase(iterator _First, iterator _Last);
+	//iterator erase(iterator _It);
+
+	/*!***********************************************************************
+	@Function			erase
+	@Input				_Pos	The position to start erasing from
+	@Input				_Count	Number of chars to erase
+	@Returns			An updated string
+	@Description		Erases a portion of the string
+	*************************************************************************/
+	CPVRTString& erase(size_t _Pos = 0, size_t _Count = npos);
+
+	/*!***********************************************************************
+	@Function			substitute
+	@Input				_src	Character to search
+	@Input				_subDes	Character to substitute for
+	@Input				_all	Substitute all
+	@Returns			An updated string
+	@Description		Erases a portion of the string
+	*************************************************************************/
+	CPVRTString& substitute(char _src,char _subDes, bool _all = true);
+
+	/*!***********************************************************************
+	@Function			substitute
+	@Input				_src	Character to search
+	@Input				_subDes	Character to substitute for
+	@Input				_all	Substitute all
+	@Returns			An updated string
+	@Description		Erases a portion of the string
+	*************************************************************************/
+	CPVRTString& substitute(const char* _src, const char* _subDes, bool _all = true);
+
+	//size_t find(char _Ch, size_t _Off = 0) const;
+	//size_t find(const char* _Ptr, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find
+	@Input				_Ptr	String to search.
+	@Input				_Off	Offset to search from.
+	@Input				_Count	Number of characters in this string.
+	@Returns			Position of the first matched string.
+	@Description		Finds a substring within this string.
+	*************************************************************************/
+	size_t find(const char* _Ptr, size_t _Off, size_t _Count) const;
+	
+	/*!***********************************************************************
+	@Function			find
+	@Input				_Str	String to search.
+	@Input				_Off	Offset to search from.
+	@Returns			Position of the first matched string.
+	@Description		Finds a substring within this string.
+	*************************************************************************/
+	size_t find(const CPVRTString& _Str, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_first_not_of
+	@Input				_Ch		A char
+	@Input				_Off	Start position of the find
+	@Returns			Position of the first char that is not _Ch
+	@Description		Returns the position of the first char that is not _Ch
+	*************************************************************************/
+	size_t find_first_not_of(char _Ch, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_first_not_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Returns			Position of the first char that is not in _Ptr
+	@Description		Returns the position of the first char that is not in _Ptr
+	*************************************************************************/
+	size_t find_first_not_of(const char* _Ptr, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_first_not_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Input				_Count	Number of chars in _Ptr
+	@Returns			Position of the first char that is not in _Ptr
+	@Description		Returns the position of the first char that is not in _Ptr
+	*************************************************************************/
+	size_t find_first_not_of(const char* _Ptr, size_t _Off, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			find_first_not_of
+	@Input				_Str	A string
+	@Input				_Off	Start position of the find
+	@Returns			Position of the first char that is not in _Str
+	@Description		Returns the position of the first char that is not in _Str
+	*************************************************************************/
+	size_t find_first_not_of(const CPVRTString& _Str, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_first_of
+	@Input				_Ch		A char
+	@Input				_Off	Start position of the find
+	@Returns			Position of the first char that is _Ch
+	@Description		Returns the position of the first char that is _Ch
+	*************************************************************************/
+	size_t find_first_of(char _Ch, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_first_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Returns			Position of the first char that matches a char in _Ptr
+	@Description		Returns the position of the first char that matches a char in _Ptr
+	*************************************************************************/
+	size_t find_first_of(const char* _Ptr, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_first_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Input				_Count	Size of _Ptr
+	@Returns			Position of the first char that matches a char in _Ptr
+	@Description		Returns the position of the first char that matches a char in _Ptr
+	*************************************************************************/
+	size_t find_first_of(const char* _Ptr, size_t _Off, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			find_first_ofn
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Input				_Count	Size of _Ptr
+	@Returns			Position of the first char that matches a char in _Ptr
+	@Description		Returns the position of the first char that matches all chars in _Ptr
+	*************************************************************************/
+	size_t find_first_ofn(const char* _Ptr, size_t _Off, size_t _Count) const;
+	
+
+	/*!***********************************************************************
+	@Function			find_first_of
+	@Input				_Str	A string
+	@Input				_Off	Start position of the find
+	@Returns			Position of the first char that matches a char in _Str
+	@Description		Returns the position of the first char that matches a char in _Str
+	*************************************************************************/
+	size_t find_first_of(const CPVRTString& _Str, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_last_not_of
+	@Input				_Ch		A char
+	@Input				_Off	Start position of the find
+	@Returns			Position of the last char that is not _Ch
+	@Description		Returns the position of the last char that is not _Ch
+	*************************************************************************/
+	size_t find_last_not_of(char _Ch, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_last_not_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Returns			Position of the last char that is not in _Ptr
+	@Description		Returns the position of the last char that is not in _Ptr
+	*************************************************************************/
+	size_t find_last_not_of(const char* _Ptr, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_last_not_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Input				_Count	Length of _Ptr
+	@Returns			Position of the last char that is not in _Ptr
+	@Description		Returns the position of the last char that is not in _Ptr
+	*************************************************************************/
+	size_t find_last_not_of(const char* _Ptr, size_t _Off, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			find_last_not_of
+	@Input				_Str	A string
+	@Input				_Off	Start position of the find
+	@Returns			Position of the last char that is not in _Str
+	@Description		Returns the position of the last char that is not in _Str
+	*************************************************************************/
+	size_t find_last_not_of(const CPVRTString& _Str, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_last_of
+	@Input				_Ch		A char
+	@Input				_Off	Start position of the find
+	@Returns			Position of the last char that is _Ch
+	@Description		Returns the position of the last char that is _Ch
+	*************************************************************************/
+	size_t find_last_of(char _Ch, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_last_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Returns			Position of the last char that is in _Ptr
+	@Description		Returns the position of the last char that is in _Ptr
+	*************************************************************************/
+	size_t find_last_of(const char* _Ptr, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_last_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Input				_Count	Length of _Ptr
+	@Returns			Position of the last char that is in _Ptr
+	@Description		Returns the position of the last char that is in _Ptr
+	*************************************************************************/
+	size_t find_last_of(const char* _Ptr, size_t _Off, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			find_last_of
+	@Input				_Str	A string
+	@Input				_Off	Start position of the find
+	@Returns			Position of the last char that is in _Str
+	@Description		Returns the position of the last char that is in _Str
+	*************************************************************************/
+	size_t find_last_of(const CPVRTString& _Str, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_number_of
+	@Input				_Ch		A char
+	@Input				_Off	Start position of the find
+	@Returns			Number of occurances of _Ch in the parent string.
+	@Description		Returns the number of occurances of _Ch in the parent string.
+	*************************************************************************/
+	size_t find_number_of(char _Ch, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_number_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Returns			Number of occurances of _Ptr in the parent string.
+	@Description		Returns the number of occurances of _Ptr in the parent string.
+	*************************************************************************/
+	size_t find_number_of(const char* _Ptr, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_number_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Input				_Count	Size of _Ptr
+	@Returns			Number of occurances of _Ptr in the parent string.
+	@Description		Returns the number of occurances of _Ptr in the parent string.
+	*************************************************************************/
+	size_t find_number_of(const char* _Ptr, size_t _Off, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			find_number_of
+	@Input				_Str	A string
+	@Input				_Off	Start position of the find
+	@Returns			Number of occurances of _Str in the parent string.
+	@Description		Returns the number of occurances of _Str in the parent string.
+	*************************************************************************/
+	size_t find_number_of(const CPVRTString& _Str, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_next_occurance_of
+	@Input				_Ch		A char
+	@Input				_Off	Start position of the find
+	@Returns			Next occurance of _Ch in the parent string.
+	@Description		Returns the next occurance of _Ch in the parent string
+	after or at _Off.	If not found, returns the length of the string.
+	*************************************************************************/
+	int find_next_occurance_of(char _Ch, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_next_occurance_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Returns			Next occurance of _Ptr in the parent string.
+	@Description		Returns the next occurance of _Ptr in the parent string
+	after or at _Off.	If not found, returns the length of the string.
+	*************************************************************************/
+	int find_next_occurance_of(const char* _Ptr, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_next_occurance_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Input				_Count	Size of _Ptr
+	@Returns			Next occurance of _Ptr in the parent string.
+	@Description		Returns the next occurance of _Ptr in the parent string
+	after or at _Off.	If not found, returns the length of the string.
+	*************************************************************************/
+	int find_next_occurance_of(const char* _Ptr, size_t _Off, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			find_next_occurance_of
+	@Input				_Str	A string
+	@Input				_Off	Start position of the find
+	@Returns			Next occurance of _Str in the parent string.
+	@Description		Returns the next occurance of _Str in the parent string
+	after or at _Off.	If not found, returns the length of the string.
+	*************************************************************************/
+	int find_next_occurance_of(const CPVRTString& _Str, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_previous_occurance_of
+	@Input				_Ch		A char
+	@Input				_Off	Start position of the find
+	@Returns			Previous occurance of _Ch in the parent string.
+	@Description		Returns the previous occurance of _Ch in the parent string
+	before _Off.	If not found, returns -1.
+	*************************************************************************/
+	int find_previous_occurance_of(char _Ch, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_previous_occurance_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Returns			Previous occurance of _Ptr in the parent string.
+	@Description		Returns the previous occurance of _Ptr in the parent string
+	before _Off.	If not found, returns -1.
+	*************************************************************************/
+	int find_previous_occurance_of(const char* _Ptr, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			find_previous_occurance_of
+	@Input				_Ptr	A string
+	@Input				_Off	Start position of the find
+	@Input				_Count	Size of _Ptr
+	@Returns			Previous occurance of _Ptr in the parent string.
+	@Description		Returns the previous occurance of _Ptr in the parent string
+	before _Off.	If not found, returns -1.
+	*************************************************************************/
+	int find_previous_occurance_of(const char* _Ptr, size_t _Off, size_t _Count) const;
+
+	/*!***********************************************************************
+	@Function			find_previous_occurance_of
+	@Input				_Str	A string
+	@Input				_Off	Start position of the find
+	@Returns			Previous occurance of _Str in the parent string.
+	@Description		Returns the previous occurance of _Str in the parent string
+	before _Off.	If not found, returns -1.
+	*************************************************************************/
+	int find_previous_occurance_of(const CPVRTString& _Str, size_t _Off = 0) const;
+
+	/*!***********************************************************************
+	@Function			left
+	@Input				iSize	number of characters to return (excluding null character)
+	@Returns			The leftmost 'iSize' characters of the string.
+	@Description		Returns the leftmost characters of the string (excluding 
+	the null character) in a new CPVRTString. If iSize is
+	larger than the string, a copy of the original string is returned.
+	*************************************************************************/
+	CPVRTString left(size_t iSize) const;
+
+	/*!***********************************************************************
+	@Function			right
+	@Input				iSize	number of characters to return (excluding null character)
+	@Returns			The rightmost 'iSize' characters of the string.
+	@Description		Returns the rightmost characters of the string (excluding 
+	the null character) in a new CPVRTString. If iSize is
+	larger than the string, a copy of the original string is returned.
+	*************************************************************************/
+	CPVRTString right(size_t iSize) const;
+
+	//allocator_type get_allocator( ) const;
+
+	//CPVRTString& insert(size_t _P0, const char* _Ptr);
+	//CPVRTString& insert(size_t _P0, const char* _Ptr, size_t _Count);
+	//CPVRTString& insert(size_t _P0, const CPVRTString& _Str);
+	//CPVRTString& insert(size_t _P0, const CPVRTString& _Str, size_t _Off, size_t _Count);
+	//CPVRTString& insert(size_t _P0, size_t _Count, char _Ch);
+	//iterator insert(iterator _It, char _Ch = char());
+	//template<class InputIterator> void insert(iterator _It, InputIterator _First, InputIterator _Last);
+	//void insert(iterator _It, size_t _Count, char _Ch);
+
+	/*!***********************************************************************
+	@Function			length
+	@Returns			Length of the string
+	@Description		Returns the length of the string
+	*************************************************************************/
+	size_t length() const;
+
+	/*!***********************************************************************
+	@Function			max_size
+	@Returns			The maximum number of chars that the string can contain
+	@Description		Returns the maximum number of chars that the string can contain
+	*************************************************************************/
+	size_t max_size() const;
+
+	/*!***********************************************************************
+	@Function			push_back
+	@Input				_Ch A char to append
+	@Description		Appends _Ch to the string
+	*************************************************************************/
+	void push_back(char _Ch);
+
+	// const_reverse_iterator rbegin() const;
+	// reverse_iterator rbegin();
+
+	// const_reverse_iterator rend() const;
+	// reverse_iterator rend();
+
+	//CPVRTString& replace(size_t _Pos1, size_t _Num1, const char* _Ptr);
+	//CPVRTString& replace(size_t _Pos1, size_t _Num1, const CPVRTString& _Str);
+	//CPVRTString& replace(size_t _Pos1, size_t _Num1, const char* _Ptr, size_t _Num2);
+	//CPVRTString& replace(size_t _Pos1, size_t _Num1, const CPVRTString& _Str, size_t _Pos2, size_t _Num2);
+	//CPVRTString& replace(size_t _Pos1, size_t _Num1, size_t _Count, char _Ch);
+
+	//CPVRTString& replace(iterator _First0, iterator _Last0, const char* _Ptr);
+	//CPVRTString& replace(iterator _First0, iterator _Last0, const CPVRTString& _Str);
+	//CPVRTString& replace(iterator _First0, iterator _Last0, const char* _Ptr, size_t _Num2);
+	//CPVRTString& replace(iterator _First0, iterator _Last0, size_t _Num2, char _Ch);
+	//template<class InputIterator> CPVRTString& replace(iterator _First0, iterator _Last0, InputIterator _First, InputIterator _Last);
+
+	/*!***********************************************************************
+	@Function			reserve
+	@Input				_Count Size of string to reserve
+	@Description		Reserves space for _Count number of chars
+	*************************************************************************/
+	void reserve(size_t _Count = 0);
+
+	/*!***********************************************************************
+	@Function			resize
+	@Input				_Count 	Size of string to resize to
+	@Input				_Ch		Character to use to fill any additional space
+	@Description		Resizes the string to _Count in length
+	*************************************************************************/
+	void resize(size_t _Count, char _Ch = char());
+
+	//size_t rfind(char _Ch, size_t _Off = npos) const;
+	//size_t rfind(const char* _Ptr, size_t _Off = npos) const;
+	//size_t rfind(const char* _Ptr, size_t _Off = npos, size_t _Count) const;
+	//size_t rfind(const CPVRTString& _Str, size_t _Off = npos) const;
+
+	/*!***********************************************************************
+	@Function			size
+	@Returns			Size of the string
+	@Description		Returns the size of the string
+	*************************************************************************/
+	size_t size() const;
+
+	/*!***********************************************************************
+	@Function			substr
+	@Input				_Off	Start of the substring
+	@Input				_Count	Length of the substring
+	@Returns			A substring of the string
+	@Description		Returns the size of the string
+	*************************************************************************/
+	CPVRTString substr(size_t _Off = 0, size_t _Count = npos) const;
+
+	/*!***********************************************************************
+	@Function			swap
+	@Input				_Str	A string to swap with
+	@Description		Swaps the contents of the string with _Str
+	*************************************************************************/
+	void swap(CPVRTString& _Str);
+
+	/*!***********************************************************************
+	@Function			toLower
+	@Returns			An updated string
+	@Description		Converts the string to lower case
+	*************************************************************************/
+	CPVRTString& toLower();
+	
+	/*!***********************************************************************
+	@Function			toUpper
+	@Returns			An updated string
+	@Description		Converts the string to upper case
+	*************************************************************************/
+	CPVRTString& toUpper();
+
+	/*!***********************************************************************
+	@Function			format
+	@Input				pFormat A string containing the formating
+	@Returns			A formatted string
+	@Description		return the formatted string
+	************************************************************************/
+	CPVRTString format(const char *pFormat, ...);
+	
+	/*!***********************************************************************
+	@Function			+=
+	@Input				_Ch A char
+	@Returns			An updated string
+	@Description		+= Operator
+	*************************************************************************/
+	CPVRTString& operator+=(char _Ch);
+
+	/*!***********************************************************************
+	@Function			+=
+	@Input				_Ptr A string
+	@Returns			An updated string
+	@Description		+= Operator
+	*************************************************************************/
+	CPVRTString& operator+=(const char* _Ptr);
+
+	/*!***********************************************************************
+	@Function			+=
+	@Input				_Right A string
+	@Returns			An updated string
+	@Description		+= Operator
+	*************************************************************************/
+	CPVRTString& operator+=(const CPVRTString& _Right);
+
+	/*!***********************************************************************
+	@Function			=
+	@Input				_Ch A char
+	@Returns			An updated string
+	@Description		= Operator
+	*************************************************************************/
+	CPVRTString& operator=(char _Ch);
+
+	/*!***********************************************************************
+	@Function			=
+	@Input				_Ptr A string
+	@Returns			An updated string
+	@Description		= Operator
+	*************************************************************************/
+	CPVRTString& operator=(const char* _Ptr);
+
+	/*!***********************************************************************
+	@Function			=
+	@Input				_Right A string
+	@Returns			An updated string
+	@Description		= Operator
+	*************************************************************************/
+	CPVRTString& operator=(const CPVRTString& _Right);
+
+	/*!***********************************************************************
+	@Function			[]
+	@Input				_Off An index into the string
+	@Returns			A character
+	@Description		[] Operator
+	*************************************************************************/
+	const_reference operator[](size_t _Off) const;
+
+	/*!***********************************************************************
+	@Function			[]
+	@Input				_Off An index into the string
+	@Returns			A character
+	@Description		[] Operator
+	*************************************************************************/
+	reference operator[](size_t _Off);
+
+	/*!***********************************************************************
+	@Function			+
+	@Input				_Left A string
+	@Input				_Right A string
+	@Returns			An updated string
+	@Description		+ Operator
+	*************************************************************************/
+	friend CPVRTString operator+ (const CPVRTString& _Left, const CPVRTString& _Right);
+
+	/*!***********************************************************************
+	@Function			+
+	@Input				_Left A string
+	@Input				_Right A string
+	@Returns			An updated string
+	@Description		+ Operator
+	*************************************************************************/
+	friend CPVRTString operator+ (const CPVRTString& _Left, const char* _Right);
+
+	/*!***********************************************************************
+	@Function			+
+	@Input				_Left A string
+	@Input				_Right A string
+	@Returns			An updated string
+	@Description		+ Operator
+	*************************************************************************/
+	friend CPVRTString operator+ (const CPVRTString& _Left, const char _Right);
+
+	/*!***********************************************************************
+	@Function			+
+	@Input				_Left A string
+	@Input				_Right A string
+	@Returns			An updated string
+	@Description		+ Operator
+	*************************************************************************/
+	friend CPVRTString operator+ (const char* _Left, const CPVRTString& _Right);
+
+
+	/*!***********************************************************************
+	@Function			+
+	@Input				_Left A string
+	@Input				_Right A string
+	@Returns			An updated string
+	@Description		+ Operator
+	*************************************************************************/
+	friend CPVRTString operator+ (const char _Left, const CPVRTString& _Right);
+
+protected:
+	char* m_pString;
+	size_t m_Size;
+	size_t m_Capacity;
+};
+
+/*************************************************************************
+* MISCELLANEOUS UTILITY FUNCTIONS
+*************************************************************************/
+/*!***********************************************************************
+@Function			PVRTStringGetFileExtension
+@Input				strFilePath A string
+@Returns			Extension
+@Description		Extracts the file extension from a file path.
+Returns an empty CPVRTString if no extension is found.
+************************************************************************/
+CPVRTString PVRTStringGetFileExtension(const CPVRTString& strFilePath);
+
+/*!***********************************************************************
+@Function			PVRTStringGetContainingDirectoryPath
+@Input				strFilePath A string
+@Returns			Directory
+@Description		Extracts the directory portion from a file path.
+************************************************************************/
+CPVRTString PVRTStringGetContainingDirectoryPath(const CPVRTString& strFilePath);
+
+/*!***********************************************************************
+@Function			PVRTStringGetFileName
+@Input				strFilePath A string
+@Returns			FileName
+@Description		Extracts the name and extension portion from a file path.
+************************************************************************/
+CPVRTString PVRTStringGetFileName(const CPVRTString& strFilePath);
+
+/*!***********************************************************************
+@Function			PVRTStringStripWhiteSpaceFromStartOf
+@Input				strLine A string
+@Returns			Result of the white space stripping
+@Description		strips white space characters from the beginning of a CPVRTString.
+************************************************************************/
+CPVRTString PVRTStringStripWhiteSpaceFromStartOf(const CPVRTString& strLine);
+
+/*!***********************************************************************
+@Function			PVRTStringStripWhiteSpaceFromEndOf
+@Input				strLine A string
+@Returns			Result of the white space stripping
+@Description		strips white space characters from the end of a CPVRTString.
+************************************************************************/
+CPVRTString PVRTStringStripWhiteSpaceFromEndOf(const CPVRTString& strLine);
+
+/*!***********************************************************************
+@Function			PVRTStringFromFormattedStr
+@Input				pFormat A string containing the formating
+@Returns			A formatted string
+@Description		Creates a formatted string
+************************************************************************/
+CPVRTString PVRTStringFromFormattedStr(const char *pFormat, ...);
+
+#endif // _PVRTSTRING_H_
+
+
+/*****************************************************************************
+End of file (PVRTString.h)
+*****************************************************************************/
+
+
+
+
diff --git a/extern/pvrtextool/Include/PVRTTexture.h b/extern/pvrtextool/Include/PVRTTexture.h
new file mode 100644
index 0000000..32ad588
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTTexture.h
@@ -0,0 +1,703 @@
+/******************************************************************************
+
+ @File         PVRTTexture.h
+
+ @Title        PVRTTexture
+
+ @Version       @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI compatible
+
+ @Description  Texture loading.
+
+******************************************************************************/
+#ifndef _PVRTTEXTURE_H_
+#define _PVRTTEXTURE_H_
+
+//ACS: PVRTGlobal.h was dragging in too much stuff (like windows.h & crtdbg.h)
+// so I split the relevant stuff out into a new file, PVRTTypes.h
+//#include "PVRTGlobal.h"
+#include "PVRTTypes.h"
+
+/*****************************************************************************
+* Texture related constants and enumerations. 
+*****************************************************************************/
+// V3 Header Identifiers.
+const PVRTuint32 PVRTEX3_IDENT			= 0x03525650;	// 'P''V''R'3
+const PVRTuint32 PVRTEX3_IDENT_REV		= 0x50565203;   
+// If endianness is backwards then PVR3 will read as 3RVP, hence why it is written as an int.
+
+//Current version texture identifiers
+const PVRTuint32 PVRTEX_CURR_IDENT		= PVRTEX3_IDENT;
+const PVRTuint32 PVRTEX_CURR_IDENT_REV	= PVRTEX3_IDENT_REV;
+
+// PVR Header file flags.										Condition if true. If false, opposite is true unless specified.
+const PVRTuint32 PVRTEX3_FILE_COMPRESSED	= (1<<0);		//	Texture has been file compressed using PVRTexLib (currently unused)
+const PVRTuint32 PVRTEX3_PREMULTIPLIED		= (1<<1);		//	Texture has been premultiplied by alpha value.	
+
+// Mip Map level specifier constants. Other levels are specified by 1,2...n
+const PVRTint32 PVRTEX_TOPMIPLEVEL			= 0;
+const PVRTint32 PVRTEX_ALLMIPLEVELS			= -1; //This is a special number used simply to return a total of all MIP levels when dealing with data sizes.
+
+//values for each meta data type that we know about. Texture arrays hinge on each surface being identical in all but content, including meta data. 
+//If the meta data varies even slightly then a new texture should be used. It is possible to write your own extension to get around this however.
+enum EPVRTMetaData
+{
+	ePVRTMetaDataTextureAtlasCoords=0,
+	ePVRTMetaDataBumpData,
+	ePVRTMetaDataCubeMapOrder,
+	ePVRTMetaDataTextureOrientation,
+	ePVRTMetaDataBorderData,
+	ePVRTMetaDataPadding,
+	ePVRTMetaDataNumMetaDataTypes
+};
+
+enum EPVRTAxis
+{
+	ePVRTAxisX = 0,
+	ePVRTAxisY = 1,
+	ePVRTAxisZ = 2
+};
+
+enum EPVRTOrientation
+{
+	ePVRTOrientLeft	= 1<<ePVRTAxisX,
+	ePVRTOrientRight= 0,
+	ePVRTOrientUp	= 1<<ePVRTAxisY,
+	ePVRTOrientDown	= 0,
+	ePVRTOrientOut	= 1<<ePVRTAxisZ,
+	ePVRTOrientIn	= 0
+};
+
+enum EPVRTColourSpace
+{
+	ePVRTCSpacelRGB,
+	ePVRTCSpacesRGB,
+	ePVRTCSpaceNumSpaces
+};
+
+//Compressed pixel formats
+enum EPVRTPixelFormat
+{
+	ePVRTPF_PVRTCI_2bpp_RGB,
+	ePVRTPF_PVRTCI_2bpp_RGBA,
+	ePVRTPF_PVRTCI_4bpp_RGB,
+	ePVRTPF_PVRTCI_4bpp_RGBA,
+	ePVRTPF_PVRTCII_2bpp,
+	ePVRTPF_PVRTCII_4bpp,
+	ePVRTPF_ETC1,
+	ePVRTPF_DXT1,
+	ePVRTPF_DXT2,
+	ePVRTPF_DXT3,
+	ePVRTPF_DXT4,
+	ePVRTPF_DXT5,
+
+	//These formats are identical to some DXT formats.
+	ePVRTPF_BC1 = ePVRTPF_DXT1,
+	ePVRTPF_BC2 = ePVRTPF_DXT3,
+	ePVRTPF_BC3 = ePVRTPF_DXT5,
+
+	//These are currently unsupported:
+	ePVRTPF_BC4,
+	ePVRTPF_BC5,
+	ePVRTPF_BC6,
+	ePVRTPF_BC7,
+
+	//These are supported
+	ePVRTPF_UYVY,
+	ePVRTPF_YUY2,
+	ePVRTPF_BW1bpp,
+	ePVRTPF_SharedExponentR9G9B9E5,
+	ePVRTPF_RGBG8888,
+	ePVRTPF_GRGB8888,
+	ePVRTPF_ETC2_RGB,
+	ePVRTPF_ETC2_RGBA,
+	ePVRTPF_ETC2_RGB_A1,
+	ePVRTPF_EAC_R11,
+	ePVRTPF_EAC_RG11,
+
+	//Invalid value
+	ePVRTPF_NumCompressedPFs
+};
+
+//Variable Type Names
+enum EPVRTVariableType
+{
+	ePVRTVarTypeUnsignedByteNorm,
+	ePVRTVarTypeSignedByteNorm,
+	ePVRTVarTypeUnsignedByte,
+	ePVRTVarTypeSignedByte,
+	ePVRTVarTypeUnsignedShortNorm,
+	ePVRTVarTypeSignedShortNorm,
+	ePVRTVarTypeUnsignedShort,
+	ePVRTVarTypeSignedShort,
+	ePVRTVarTypeUnsignedIntegerNorm,
+	ePVRTVarTypeSignedIntegerNorm,
+	ePVRTVarTypeUnsignedInteger,
+	ePVRTVarTypeSignedInteger,
+	ePVRTVarTypeSignedFloat,	ePVRTVarTypeFloat=ePVRTVarTypeSignedFloat, //the name ePVRTVarTypeFloat is now deprecated.
+	ePVRTVarTypeUnsignedFloat,
+	ePVRTVarTypeNumVarTypes
+};
+
+//A 64 bit pixel format ID & this will give you the high bits of a pixel format to check for a compressed format.
+static const PVRTuint64 PVRTEX_PFHIGHMASK=0xffffffff00000000ull;
+
+/*****************************************************************************
+* Texture header structures.
+*****************************************************************************/
+
+struct MetaDataBlock
+{
+	PVRTuint32	DevFOURCC;		//A 4cc descriptor of the data type's creator. Values equating to values between 'P' 'V' 'R' 0 and 'P' 'V' 'R' 255 will be used by our headers.
+	PVRTuint32	u32Key;			//A DWORD (enum value) identifying the data type, and thus how to read it.
+	PVRTuint32	u32DataSize;	//Size of the Data member.
+	PVRTuint8*	Data;			//Data array, can be absolutely anything, the loader needs to know how to handle it based on DevFOURCC and Key. Use new operator to assign to it.
+		
+	/*!***********************************************************************
+		@Function		MetaDataBlock
+		@Description	Meta Data Block Constructor
+	*************************************************************************/
+	MetaDataBlock() : DevFOURCC(0), u32Key(0), u32DataSize(0), Data(NULL)
+	{}
+		
+	/*!***********************************************************************
+		@Function		MetaDataBlock
+		@Description	Meta Data Block Copy Constructor
+	*************************************************************************/
+	MetaDataBlock(const MetaDataBlock& rhs)  : DevFOURCC(rhs.DevFOURCC), u32Key(rhs.u32Key), u32DataSize(rhs.u32DataSize)
+	{
+		//Copy the data across.
+		Data = new PVRTuint8[u32DataSize];
+		for (PVRTuint32 uiDataAmt=0; uiDataAmt<u32DataSize; ++uiDataAmt)
+		{
+			Data[uiDataAmt]=rhs.Data[uiDataAmt];
+		}
+	}
+
+	/*!***********************************************************************
+		@Function		~MetaDataBlock
+		@Description	Meta Data Block Destructor
+	*************************************************************************/
+	~MetaDataBlock()
+	{
+		if (Data) 
+			delete [] Data;
+		Data = NULL;
+	}
+
+	/*!***********************************************************************
+		@Function		SizeOfBlock
+		@Return			size_t Size (in a file) of the block.
+		@Description	Returns the number of extra bytes this will add to any output files.
+	*************************************************************************/
+	size_t SizeOfBlock() const
+	{
+		return sizeof(DevFOURCC)+sizeof(u32Key)+sizeof(u32DataSize)+u32DataSize;
+	}
+
+	/*!***********************************************************************
+		@Function		operator=
+		@Return			MetaDataBlock This MetaDataBlock after the operation.
+		@Description	Assigns one MetaDataBlock to the other.
+	*************************************************************************/
+	MetaDataBlock& operator=(const MetaDataBlock& rhs)
+	{
+		if (&rhs==this)
+			return *this;
+
+		//Remove pre-existing data.
+		if (Data)
+			delete [] Data;
+		Data=NULL;
+
+		//Copy the basic parameters
+		DevFOURCC=rhs.DevFOURCC;
+		u32Key=rhs.u32Key;
+		u32DataSize=rhs.u32DataSize;
+
+		//Copy the data across.
+		if (rhs.Data)
+		{
+			Data = new PVRTuint8[u32DataSize];
+			for (PVRTuint32 uiDataAmt=0; uiDataAmt<u32DataSize; ++uiDataAmt)
+			{
+				Data[uiDataAmt]=rhs.Data[uiDataAmt];
+			}
+		}
+
+		return *this;
+	}
+
+	/*!***************************************************************************
+	@Function		ReadFromPtr
+	@Input			pDataCursor		The data to read
+	@Description	Reads from a pointer of memory in to the meta data block.
+	*****************************************************************************/
+	bool ReadFromPtr(const unsigned char** pDataCursor);
+};
+
+//The idea behind this is that it stores EVERYTHING that you would ever need to read a texture accurately, and nothing more. 
+//Extraneous data is stored in meta data. Correct use of the texture may rely on meta data, but accurate data loading can be done through the
+//Standard header alone.
+
+#pragma pack(push,4)
+struct PVRTextureHeaderV3{
+	PVRTuint32	u32Version;			//Version of the file header, used to identify it.
+	PVRTuint32	u32Flags;			//Various format flags.
+	PVRTuint64	u64PixelFormat;		//The pixel format, 8cc value storing the 4 channel identifiers and their respective sizes.
+	PVRTuint32	u32ColourSpace;		//The Colour Space of the texture, currently either linear RGB or sRGB.
+	PVRTuint32	u32ChannelType;		//Variable type that the channel is stored in. Supports signed/unsigned int/short/byte or float for now.
+	PVRTuint32	u32Height;			//Height of the texture.
+	PVRTuint32	u32Width;			//Width of the texture.
+	PVRTuint32	u32Depth;			//Depth of the texture. (Z-slices)
+	PVRTuint32	u32NumSurfaces;		//Number of members in a Texture Array.
+	PVRTuint32	u32NumFaces;		//Number of faces in a Cube Map. Maybe be a value other than 6.
+	PVRTuint32	u32MIPMapCount;		//Number of MIP Maps in the texture - NB: Includes top level.
+	PVRTuint32	u32MetaDataSize;	//Size of the accompanying meta data.
+
+	//Constructor for the header - used to make sure that the header is initialised usefully. The initial pixel format is an invalid one and must be set.
+	PVRTextureHeaderV3() : 
+		u32Version(PVRTEX3_IDENT),u32Flags(0),
+		u64PixelFormat(ePVRTPF_NumCompressedPFs),
+		u32ColourSpace(0),u32ChannelType(0),
+		u32Height(1),u32Width(1),u32Depth(1),
+		u32NumSurfaces(1),u32NumFaces(1),
+		u32MIPMapCount(1),u32MetaDataSize(0)
+	{}
+};
+#pragma pack(pop)
+#define PVRTEX3_HEADERSIZE 52
+
+/*!***************************************************************************
+Describes the Version 2 header of a PVR texture header.
+*****************************************************************************/
+struct PVR_Texture_Header
+{
+	PVRTuint32 dwHeaderSize;		/*!< size of the structure */
+	PVRTuint32 dwHeight;			/*!< height of surface to be created */
+	PVRTuint32 dwWidth;				/*!< width of input surface */
+	PVRTuint32 dwMipMapCount;		/*!< number of mip-map levels requested */
+	PVRTuint32 dwpfFlags;			/*!< pixel format flags */
+	PVRTuint32 dwTextureDataSize;	/*!< Total size in bytes */
+	PVRTuint32 dwBitCount;			/*!< number of bits per pixel  */
+	PVRTuint32 dwRBitMask;			/*!< mask for red bit */
+	PVRTuint32 dwGBitMask;			/*!< mask for green bits */
+	PVRTuint32 dwBBitMask;			/*!< mask for blue bits */
+	PVRTuint32 dwAlphaBitMask;		/*!< mask for alpha channel */
+	PVRTuint32 dwPVR;				/*!< magic number identifying pvr file */
+	PVRTuint32 dwNumSurfs;			/*!< the number of surfaces present in the pvr */
+} ;
+
+/*****************************************************************************
+* Legacy (V2 and V1) ENUMS
+*****************************************************************************/
+
+	enum PVRTPixelType
+	{
+		MGLPT_ARGB_4444 = 0x00,
+		MGLPT_ARGB_1555,
+		MGLPT_RGB_565,
+		MGLPT_RGB_555,
+		MGLPT_RGB_888,
+		MGLPT_ARGB_8888,
+		MGLPT_ARGB_8332,
+		MGLPT_I_8,
+		MGLPT_AI_88,
+		MGLPT_1_BPP,
+		MGLPT_VY1UY0,
+		MGLPT_Y1VY0U,
+		MGLPT_PVRTC2,
+		MGLPT_PVRTC4,
+
+		// OpenGL version of pixel types
+		OGL_RGBA_4444= 0x10,
+		OGL_RGBA_5551,
+		OGL_RGBA_8888,
+		OGL_RGB_565,
+		OGL_RGB_555,
+		OGL_RGB_888,
+		OGL_I_8,
+		OGL_AI_88,
+		OGL_PVRTC2,
+		OGL_PVRTC4,
+		OGL_BGRA_8888,
+		OGL_A_8,
+		OGL_PVRTCII4,	//Not in use
+		OGL_PVRTCII2,	//Not in use
+
+		// S3TC Encoding
+		D3D_DXT1 = 0x20,
+		D3D_DXT2,
+		D3D_DXT3,
+		D3D_DXT4,
+		D3D_DXT5,
+
+		//RGB Formats
+		D3D_RGB_332,
+		D3D_AL_44,
+		D3D_LVU_655,
+		D3D_XLVU_8888,
+		D3D_QWVU_8888,
+		
+		//10 bit integer - 2 bit alpha
+		D3D_ABGR_2101010,
+		D3D_ARGB_2101010,
+		D3D_AWVU_2101010,
+
+		//16 bit integers
+		D3D_GR_1616,
+		D3D_VU_1616,
+		D3D_ABGR_16161616,
+
+		//Float Formats
+		D3D_R16F,
+		D3D_GR_1616F,
+		D3D_ABGR_16161616F,
+
+		//32 bits per channel
+		D3D_R32F,
+		D3D_GR_3232F,
+		D3D_ABGR_32323232F,
+		
+		// Ericsson
+		ETC_RGB_4BPP,
+		ETC_RGBA_EXPLICIT,				// unimplemented
+		ETC_RGBA_INTERPOLATED,			// unimplemented
+		
+		D3D_A8 = 0x40,
+		D3D_V8U8,
+		D3D_L16,
+				
+		D3D_L8,
+		D3D_AL_88,
+
+		//Y'UV Colourspace
+		D3D_UYVY,
+		D3D_YUY2,
+		
+		// DX10
+		DX10_R32G32B32A32_FLOAT= 0x50,
+		DX10_R32G32B32A32_UINT , 
+		DX10_R32G32B32A32_SINT,
+
+		DX10_R32G32B32_FLOAT,
+		DX10_R32G32B32_UINT,
+		DX10_R32G32B32_SINT,
+
+		DX10_R16G16B16A16_FLOAT ,
+		DX10_R16G16B16A16_UNORM,
+		DX10_R16G16B16A16_UINT ,
+		DX10_R16G16B16A16_SNORM ,
+		DX10_R16G16B16A16_SINT ,
+
+		DX10_R32G32_FLOAT ,
+		DX10_R32G32_UINT ,
+		DX10_R32G32_SINT ,
+
+		DX10_R10G10B10A2_UNORM ,
+		DX10_R10G10B10A2_UINT ,
+
+		DX10_R11G11B10_FLOAT ,				// unimplemented
+
+		DX10_R8G8B8A8_UNORM , 
+		DX10_R8G8B8A8_UNORM_SRGB ,
+		DX10_R8G8B8A8_UINT ,
+		DX10_R8G8B8A8_SNORM ,
+		DX10_R8G8B8A8_SINT ,
+
+		DX10_R16G16_FLOAT , 
+		DX10_R16G16_UNORM , 
+		DX10_R16G16_UINT , 
+		DX10_R16G16_SNORM ,
+		DX10_R16G16_SINT ,
+
+		DX10_R32_FLOAT ,
+		DX10_R32_UINT ,
+		DX10_R32_SINT ,
+
+		DX10_R8G8_UNORM ,
+		DX10_R8G8_UINT ,
+		DX10_R8G8_SNORM , 
+		DX10_R8G8_SINT ,
+
+		DX10_R16_FLOAT ,
+		DX10_R16_UNORM ,
+		DX10_R16_UINT ,
+		DX10_R16_SNORM ,
+		DX10_R16_SINT ,
+
+		DX10_R8_UNORM, 
+		DX10_R8_UINT,
+		DX10_R8_SNORM,
+		DX10_R8_SINT,
+
+		DX10_A8_UNORM, 
+		DX10_R1_UNORM, 
+		DX10_R9G9B9E5_SHAREDEXP,	// unimplemented
+		DX10_R8G8_B8G8_UNORM,		// unimplemented
+		DX10_G8R8_G8B8_UNORM,		// unimplemented
+
+		DX10_BC1_UNORM,	
+		DX10_BC1_UNORM_SRGB,
+
+		DX10_BC2_UNORM,	
+		DX10_BC2_UNORM_SRGB,
+
+		DX10_BC3_UNORM,	
+		DX10_BC3_UNORM_SRGB,
+
+		DX10_BC4_UNORM,				// unimplemented
+		DX10_BC4_SNORM,				// unimplemented
+
+		DX10_BC5_UNORM,				// unimplemented
+		DX10_BC5_SNORM,				// unimplemented
+
+		// OpenVG
+
+		/* RGB{A,X} channel ordering */
+		ePT_VG_sRGBX_8888  = 0x90,
+		ePT_VG_sRGBA_8888,
+		ePT_VG_sRGBA_8888_PRE,
+		ePT_VG_sRGB_565,
+		ePT_VG_sRGBA_5551,
+		ePT_VG_sRGBA_4444,
+		ePT_VG_sL_8,
+		ePT_VG_lRGBX_8888,
+		ePT_VG_lRGBA_8888,
+		ePT_VG_lRGBA_8888_PRE,
+		ePT_VG_lL_8,
+		ePT_VG_A_8,
+		ePT_VG_BW_1,
+
+		/* {A,X}RGB channel ordering */
+		ePT_VG_sXRGB_8888,
+		ePT_VG_sARGB_8888,
+		ePT_VG_sARGB_8888_PRE,
+		ePT_VG_sARGB_1555,
+		ePT_VG_sARGB_4444,
+		ePT_VG_lXRGB_8888,
+		ePT_VG_lARGB_8888,
+		ePT_VG_lARGB_8888_PRE,
+
+		/* BGR{A,X} channel ordering */
+		ePT_VG_sBGRX_8888,
+		ePT_VG_sBGRA_8888,
+		ePT_VG_sBGRA_8888_PRE,
+		ePT_VG_sBGR_565,
+		ePT_VG_sBGRA_5551,
+		ePT_VG_sBGRA_4444,
+		ePT_VG_lBGRX_8888,
+		ePT_VG_lBGRA_8888,
+		ePT_VG_lBGRA_8888_PRE,
+
+		/* {A,X}BGR channel ordering */
+		ePT_VG_sXBGR_8888,
+		ePT_VG_sABGR_8888 ,
+		ePT_VG_sABGR_8888_PRE,
+		ePT_VG_sABGR_1555,
+		ePT_VG_sABGR_4444,
+		ePT_VG_lXBGR_8888,
+		ePT_VG_lABGR_8888,
+		ePT_VG_lABGR_8888_PRE,
+
+		// max cap for iterating
+		END_OF_PIXEL_TYPES,
+
+		MGLPT_NOTYPE = 0xffffffff
+
+	};
+
+/*****************************************************************************
+* Legacy constants (V1/V2)
+*****************************************************************************/
+
+const PVRTuint32 PVRTEX_MIPMAP			= (1<<8);		// has mip map levels
+const PVRTuint32 PVRTEX_TWIDDLE			= (1<<9);		// is twiddled
+const PVRTuint32 PVRTEX_BUMPMAP			= (1<<10);		// has normals encoded for a bump map
+const PVRTuint32 PVRTEX_TILING			= (1<<11);		// is bordered for tiled pvr
+const PVRTuint32 PVRTEX_CUBEMAP			= (1<<12);		// is a cubemap/skybox
+const PVRTuint32 PVRTEX_FALSEMIPCOL		= (1<<13);		// are there false coloured MIP levels
+const PVRTuint32 PVRTEX_VOLUME			= (1<<14);		// is this a volume texture
+const PVRTuint32 PVRTEX_ALPHA			= (1<<15);		// v2.1 is there transparency info in the texture
+const PVRTuint32 PVRTEX_VERTICAL_FLIP	= (1<<16);		// v2.1 is the texture vertically flipped
+
+const PVRTuint32 PVRTEX_PIXELTYPE		= 0xff;			// pixel type is always in the last 16bits of the flags
+const PVRTuint32 PVRTEX_IDENTIFIER		= 0x21525650;	// the pvr identifier is the characters 'P','V','R'
+
+const PVRTuint32 PVRTEX_V1_HEADER_SIZE	= 44;			// old header size was 44 for identification purposes
+
+const PVRTuint32 PVRTC2_MIN_TEXWIDTH	= 16;
+const PVRTuint32 PVRTC2_MIN_TEXHEIGHT	= 8;
+const PVRTuint32 PVRTC4_MIN_TEXWIDTH	= 8;
+const PVRTuint32 PVRTC4_MIN_TEXHEIGHT	= 8;
+const PVRTuint32 ETC_MIN_TEXWIDTH		= 4;
+const PVRTuint32 ETC_MIN_TEXHEIGHT		= 4;
+const PVRTuint32 DXT_MIN_TEXWIDTH		= 4;
+const PVRTuint32 DXT_MIN_TEXHEIGHT		= 4;
+
+/****************************************************************************
+** Functions
+****************************************************************************/
+
+/*!***************************************************************************
+@Function		PVRTTextureCreate
+@Input			w			Size of the texture
+@Input			h			Size of the texture
+@Input			wMin		Minimum size of a texture level
+@Input			hMin		Minimum size of a texture level
+@Input			nBPP		Bits per pixel of the format
+@Input			bMIPMap		Create memory for MIP-map levels also?
+@Return			Allocated texture memory (must be free()d)
+@Description	Creates a PVRTextureHeaderV3 structure, including room for
+the specified texture, in memory.
+*****************************************************************************/
+PVRTextureHeaderV3 *PVRTTextureCreate(
+									  unsigned int		w,
+									  unsigned int		h,
+									  const unsigned int	wMin,
+									  const unsigned int	hMin,
+									  const unsigned int	nBPP,
+									  const bool			bMIPMap);
+
+/*!***************************************************************************
+@Function		PVRTTextureTile
+@Modified		pOut		The tiled texture in system memory
+@Input			pIn			The source texture
+@Input			nRepeatCnt	Number of times to repeat the source texture
+@Description	Allocates and fills, in system memory, a texture large enough
+to repeat the source texture specified number of times.
+*****************************************************************************/
+void PVRTTextureTile(
+					 PVRTextureHeaderV3			**pOut,
+					 const PVRTextureHeaderV3	* const pIn,
+					 const int					nRepeatCnt);
+
+/****************************************************************************
+** Internal Functions
+****************************************************************************/
+//Preprocessor definitions to generate a pixelID for use when consts are needed. For example - switch statements. These should be evaluated by the compiler rather than at run time - assuming that arguments are all constant.
+
+//Generate a 4 channel PixelID.
+#define PVRTGENPIXELID4(C1Name, C2Name, C3Name, C4Name, C1Bits, C2Bits, C3Bits, C4Bits) ( ( (PVRTuint64)C1Name) + ( (PVRTuint64)C2Name<<8) + ( (PVRTuint64)C3Name<<16) + ( (PVRTuint64)C4Name<<24) + ( (PVRTuint64)C1Bits<<32) + ( (PVRTuint64)C2Bits<<40) + ( (PVRTuint64)C3Bits<<48) + ( (PVRTuint64)C4Bits<<56) )
+
+//Generate a 1 channel PixelID.
+#define PVRTGENPIXELID3(C1Name, C2Name, C3Name, C1Bits, C2Bits, C3Bits)( PVRTGENPIXELID4(C1Name, C2Name, C3Name, 0, C1Bits, C2Bits, C3Bits, 0) )
+
+//Generate a 2 channel PixelID.
+#define PVRTGENPIXELID2(C1Name, C2Name, C1Bits, C2Bits) ( PVRTGENPIXELID4(C1Name, C2Name, 0, 0, C1Bits, C2Bits, 0, 0) )
+
+//Generate a 3 channel PixelID.
+#define PVRTGENPIXELID1(C1Name, C1Bits) ( PVRTGENPIXELID4(C1Name, 0, 0, 0, C1Bits, 0, 0, 0))
+
+//Forward declaration of CPVRTMap.
+template <typename KeyType, typename DataType>
+class CPVRTMap;
+
+
+/*!***********************************************************************
+ @Function		PVRTGetBitsPerPixel
+ @Input			u64PixelFormat		A PVR Pixel Format ID.
+ @Return		const PVRTuint32	Number of bits per pixel.
+ @Description	Returns the number of bits per pixel in a PVR Pixel Format 
+				identifier.
+*************************************************************************/
+PVRTuint32 PVRTGetBitsPerPixel(PVRTuint64 u64PixelFormat);
+
+/*!***********************************************************************
+ @Function		PVRTGetFormatMinDims
+ @Input			u64PixelFormat	A PVR Pixel Format ID.
+ @Modified		minX			Returns the minimum width.
+ @Modified		minY			Returns the minimum height.
+ @Modified		minZ			Returns the minimum depth.
+ @Description	Gets the minimum dimensions (x,y,z) for a given pixel format.
+*************************************************************************/
+void PVRTGetFormatMinDims(PVRTuint64 u64PixelFormat, PVRTuint32 &minX, PVRTuint32 &minY, PVRTuint32 &minZ);
+
+/*!***********************************************************************
+ @Function		PVRTConvertOldTextureHeaderToV3
+ @Input			LegacyHeader	Legacy header for conversion.
+ @Modified		NewHeader		New header to output into.
+ @Modified		pMetaData		MetaData Map to output into.
+ @Description	Converts a legacy texture header (V1 or V2) to a current 
+				generation header (V3)
+*************************************************************************/
+void PVRTConvertOldTextureHeaderToV3(const PVR_Texture_Header* LegacyHeader, PVRTextureHeaderV3& NewHeader, CPVRTMap<PVRTuint32, CPVRTMap<PVRTuint32,MetaDataBlock> >* pMetaData);
+
+/*!***********************************************************************
+ @Function		PVRTMapLegacyTextureEnumToNewFormat
+ @Input			OldFormat		Legacy Enumeration Value
+ @Modified		newType			New PixelType identifier.
+ @Modified		newCSpace		New ColourSpace
+ @Modified		newChanType		New Channel Type
+ @Modified		isPreMult		Whether format is pre-multiplied
+ @Description	Maps a legacy enumeration value to the new PVR3 style format.
+*************************************************************************/
+void PVRTMapLegacyTextureEnumToNewFormat(PVRTPixelType OldFormat, PVRTuint64& newType, EPVRTColourSpace& newCSpace, EPVRTVariableType& newChanType, bool& isPreMult);
+
+/*!***************************************************************************
+@Function		PVRTTextureLoadTiled
+@Modified		pDst			Texture to place the tiled data
+@Input			nWidthDst		Width of destination texture
+@Input			nHeightDst		Height of destination texture
+@Input			pSrc			Texture to tile
+@Input			nWidthSrc		Width of source texture
+@Input			nHeightSrc		Height of source texture
+@Input 			nElementSize	Bytes per pixel
+@Input			bTwiddled		True if the data is twiddled
+@Description	Needed by PVRTTextureTile() in the various PVRTTextureAPIs
+*****************************************************************************/
+void PVRTTextureLoadTiled(
+						  PVRTuint8		* const pDst,
+						  const unsigned int	nWidthDst,
+						  const unsigned int	nHeightDst,
+						  const PVRTuint8	* const pSrc,
+						  const unsigned int	nWidthSrc,
+						  const unsigned int	nHeightSrc,
+						  const unsigned int	nElementSize,
+						  const bool			bTwiddled);
+
+
+/*!***************************************************************************
+@Function		PVRTTextureTwiddle
+@Output			a	Twiddled value
+@Input			u	Coordinate axis 0
+@Input			v	Coordinate axis 1
+@Description	Combine a 2D coordinate into a twiddled value
+*****************************************************************************/
+void PVRTTextureTwiddle(unsigned int &a, const unsigned int u, const unsigned int v);
+
+/*!***************************************************************************
+@Function		PVRTTextureDeTwiddle
+@Output			u	Coordinate axis 0
+@Output			v	Coordinate axis 1
+@Input			a	Twiddled value
+@Description	Extract 2D coordinates from a twiddled value.
+*****************************************************************************/
+void PVRTTextureDeTwiddle(unsigned int &u, unsigned int &v, const unsigned int a);
+
+/*!***********************************************************************
+@Function		PVRTGetTextureDataSize
+@Input			sTextureHeader	Specifies the texture header. 
+@Input			iMipLevel	Specifies a mip level to check, 'PVRTEX_ALLMIPLEVELS'
+							can be passed to get the size of all MIP levels. 
+@Input			bAllSurfaces	Size of all surfaces is calculated if true, 
+							only a single surface if false.
+@Input			bAllFaces	Size of all faces is calculated if true, 
+							only a single face if false.
+@Return			PVRTuint32		Size in BYTES of the specified texture area.
+@Description	Gets the size in BYTES of the texture, given various input 
+				parameters.	User can retrieve the size of either all 
+				surfaces or a single surface, all faces or a single face and
+				all MIP-Maps or a single specified MIP level.
+*************************************************************************/
+PVRTuint32 PVRTGetTextureDataSize(PVRTextureHeaderV3 sTextureHeader, PVRTint32 iMipLevel=PVRTEX_ALLMIPLEVELS, bool bAllSurfaces = true, bool bAllFaces = true);
+
+#endif /* _PVRTTEXTURE_H_ */
+
+/*****************************************************************************
+End of file (PVRTTexture.h)
+*****************************************************************************/
+
diff --git a/extern/pvrtextool/Include/PVRTTypes.h b/extern/pvrtextool/Include/PVRTTypes.h
new file mode 100644
index 0000000..984ff86
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTTypes.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+
+ @File         PVRTTypes.h
+
+ @Title        PVRTTypes
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI compatible
+
+ @Description  Global enums and typedefs for PVRTools
+
+******************************************************************************/
+
+//ACS: I split this out of PVRTGlobal.h
+
+#ifndef _PVRTTYPES_H_
+#define _PVRTTYPES_H_
+
+/*!***************************************************************************
+ Macros
+*****************************************************************************/
+//#include <stdio.h>
+
+// This macro is used to check at compile time that types are of a certain size
+// If the size does not equal the expected size, this typedefs an array of size 0
+// which causes a compile error
+#define PVRTSIZEASSERT(T, size) typedef int (sizeof_##T)[sizeof(T) == (size)]
+#define PVRTCOMPILEASSERT(T, expr)	typedef int (assert_##T)[expr]
+
+
+/****************************************************************************
+** Integer types
+****************************************************************************/
+
+typedef char				PVRTchar8;
+typedef signed char			PVRTint8;
+typedef signed short		PVRTint16;
+typedef signed int			PVRTint32;
+typedef unsigned char		PVRTuint8;
+typedef unsigned short		PVRTuint16;
+typedef unsigned int		PVRTuint32;
+
+typedef float				PVRTfloat32;
+
+#if (defined(__int64) || defined(_WIN32))
+typedef signed __int64     PVRTint64;
+typedef unsigned __int64   PVRTuint64;
+#elif defined(TInt64)
+typedef TInt64             PVRTint64;
+typedef TUInt64            PVRTuint64;
+#else
+typedef signed long long   PVRTint64;
+typedef unsigned long long PVRTuint64;
+#endif
+
+#if __SIZEOF_WCHAR_T__  == 4 || __WCHAR_MAX__ > 0x10000
+	#define PVRTSIZEOFWCHAR 4
+#else
+	#define PVRTSIZEOFWCHAR 2
+#endif
+
+PVRTSIZEASSERT(PVRTchar8,   1);
+PVRTSIZEASSERT(PVRTint8,   1);
+PVRTSIZEASSERT(PVRTuint8,  1);
+PVRTSIZEASSERT(PVRTint16,  2);
+PVRTSIZEASSERT(PVRTuint16, 2);
+PVRTSIZEASSERT(PVRTint32,  4);
+PVRTSIZEASSERT(PVRTuint32, 4);
+PVRTSIZEASSERT(PVRTint64,  8);
+PVRTSIZEASSERT(PVRTuint64, 8);
+PVRTSIZEASSERT(PVRTfloat32, 4);
+
+/*!**************************************************************************
+@Enum   ETextureFilter
+@Brief  Enum values for defining texture filtering
+****************************************************************************/
+enum ETextureFilter
+{
+	eFilter_Nearest,
+	eFilter_Linear,
+	eFilter_None,
+
+	eFilter_Size,
+	eFilter_Default		= eFilter_Nearest,
+	eFilter_MipDefault	= eFilter_None
+};
+
+/*!**************************************************************************
+@Enum   ETextureWrap
+@Brief  Enum values for defining texture wrapping
+****************************************************************************/
+enum ETextureWrap
+{
+	eWrap_Clamp,
+	eWrap_Repeat,
+
+	eWrap_Size,
+	eWrap_Default = eWrap_Repeat
+};
+
+/*****************************************************************************
+ ACS: Handle missing assert macros.
+      Maybe you needed to include PVRTGlobal.h after all?
+*****************************************************************************/
+#ifndef _ASSERT
+# define _ASSERT(X)
+#endif
+#ifndef _ASSERTE
+# define _ASSERTE(X)
+#endif
+
+#endif // _PVRTTYPES_H_
+
+/*****************************************************************************
+ End of file
+*****************************************************************************/
+
diff --git a/extern/pvrtextool/Include/PVRTexture.h b/extern/pvrtextool/Include/PVRTexture.h
new file mode 100644
index 0000000..df300e6
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTexture.h
@@ -0,0 +1,225 @@
+/******************************************************************************
+
+ @File         PVRTexture.h
+
+ @Title        
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     
+
+ @Description  
+
+******************************************************************************/
+#ifndef _PVRTEXTURE_H
+#define _PVRTEXTURE_H
+
+#include "PVRTextureDefines.h"
+#include "PVRTextureHeader.h"
+//ACS: removed unneccesary includes:
+//#include "PVRTString.h"
+
+namespace pvrtexture
+{
+	class PVR_DLL CPVRTexture : public CPVRTextureHeader
+	{		
+	public:
+	/*******************************************************************************
+	* Construction methods for a texture.
+	*******************************************************************************/
+		/*!***********************************************************************
+		 @Function		CPVRTexture
+		 @Return		CPVRTexture A new texture.
+		 @Description	Creates a new empty texture
+		*************************************************************************/
+		CPVRTexture();
+		/*!***********************************************************************
+		 @Function		CPVRTexture
+		 @Input			sHeader
+		 @Input			pData
+		 @Return		CPVRTexture A new texture.
+		 @Description	Creates a new texture based on a texture header, 
+						pre-allocating the correct amount of memory. If data is
+						supplied, it will be copied into memory.
+		*************************************************************************/
+		CPVRTexture(const CPVRTextureHeader& sHeader, const void* pData=NULL);
+
+		/*!***********************************************************************
+		 @Function		CPVRTexture
+		 @Input			szFilePath
+		 @Return		CPVRTexture A new texture.
+		 @Description	Creates a new texture from a filepath.
+		*************************************************************************/
+		CPVRTexture(const char* szFilePath);
+
+		/*!***********************************************************************
+		 @Function		CPVRTexture
+		 @Input			pTexture
+		 @Return		CPVRTexture A new texture.
+		 @Description	Creates a new texture from a pointer that includes a header
+						structure, meta data and texture data as laid out in a file.
+						This functionality is primarily for user defined file loading.
+						Header may be any version of pvr.
+		*************************************************************************/
+		CPVRTexture( const void* pTexture );
+
+		/*!***********************************************************************
+		 @Function		CPVRTexture
+		 @Input			texture
+		 @Return		CPVRTexture A new texture
+		 @Description	Creates a new texture as a copy of another.
+		*************************************************************************/
+		CPVRTexture(const CPVRTexture& texture);
+
+		/*!***********************************************************************
+		 @Function		~CPVRTexture
+		 @Description	Deconstructor for CPVRTextures.
+		*************************************************************************/
+		~CPVRTexture();
+
+		/*!***********************************************************************
+		 @Function		operator=
+		 @Input			rhs
+		 @Return		CPVRTexture& This texture.
+		 @Description	Will copy the contents and information of another texture into this one.
+		*************************************************************************/
+		CPVRTexture& operator=(const CPVRTexture& rhs);
+		
+	/*******************************************************************************
+	* Texture accessor functions - others are inherited from CPVRTextureHeader.
+	*******************************************************************************/
+		/*!***********************************************************************
+		 @Function		getDataPtr
+		 @Input			uiMIPLevel
+		 @Input			uiArrayMember
+		 @Input			uiFaceNumber
+		 @Return		void* Pointer to a location in the texture.
+		 @Description	Returns a pointer into the texture's data. 
+						It is possible to specify an offset to specific array members, 
+						faces and MIP Map levels.
+		*************************************************************************/
+		void* getDataPtr(uint32 uiMIPLevel = 0, uint32 uiArrayMember = 0, uint32 uiFaceNumber = 0) const;
+
+		/*!***********************************************************************
+		 @Function		getHeader
+		 @Return		const CPVRTextureHeader& Returns the header only for this texture.
+		 @Description	Gets the header for this texture, allowing you to create a new
+						texture based on this one with some changes. Useful for passing
+						information about a texture without passing all of its data.
+		*************************************************************************/
+		const CPVRTextureHeader& getHeader() const;
+
+	/*******************************************************************************
+	* File io.
+	*******************************************************************************/
+
+		/*!***********************************************************************
+		 @Function		setPaddedMetaData
+		 @Input			uiPadding
+		 @Description	When writing the texture out to a PVR file, it is often
+						desirable to pad the meta data so that the start of the
+						texture data aligns to a given boundary.
+						This function pads to a boundary value equal to "uiPadding".
+						For example setting uiPadding=8 will align the start of the
+						texture data to an 8 byte boundary.
+						Note - this should be called immediately before saving as
+						the value is worked out based on the current meta data size.
+		*************************************************************************/
+		void addPaddingMetaData( uint32 uiPadding );
+
+		/*!***********************************************************************
+		 @Function		saveFile
+		 @Input			filepath
+		 @Return		bool Whether the method succeeds or not.
+		 @Description	Writes out to a file, given a filename and path. 
+						File type will be determined by the extension present in the string. 
+						If no extension is present, PVR format will be selected. 
+						Unsupported formats will result in failure.
+		*************************************************************************/
+		bool saveFile(const CPVRTString& filepath) const;
+	
+		/*!***********************************************************************
+		 @Function		saveFileLegacyPVR
+		 @Input			filepath
+		 @Input			eApi
+		 @Return		bool Whether the method succeeds or not.
+		 @Description	Writes out to a file, stripping any extensions specified
+						and appending .pvr. This function is for legacy support only
+						and saves out to PVR Version 2 file. The target api must be
+						specified in order to save to this format.
+		*************************************************************************/
+		bool saveFileLegacyPVR(const CPVRTString& filepath, ELegacyApi eApi) const;
+
+	private:
+		size_t	m_stDataSize;		// Size of the texture data.
+		uint8*	m_pTextureData;		// Pointer to texture data.
+
+	/*******************************************************************************
+	* Private IO functions
+	*******************************************************************************/
+		/*!***********************************************************************
+		 @Function		loadPVRFile
+		 @Input			pTextureFile
+		 @Description	Loads a PVR file.
+		*************************************************************************/
+		bool privateLoadPVRFile(FILE* pTextureFile);
+
+		/*!***********************************************************************
+		 @Function		privateSavePVRFile
+		 @Input			pTextureFile
+		 @Description	Saves a PVR File.
+		*************************************************************************/
+		bool privateSavePVRFile(FILE* pTextureFile) const;
+
+		/*!***********************************************************************
+		 @Function		loadKTXFile
+		 @Input			pTextureFile
+		 @Description	Loads a KTX file.
+		*************************************************************************/
+		bool privateLoadKTXFile(FILE* pTextureFile);
+
+		/*!***********************************************************************
+		 @Function		privateSaveKTXFile
+		 @Input			pTextureFile
+		 @Description	Saves a KTX File.
+		*************************************************************************/
+		bool privateSaveKTXFile(FILE* pTextureFile) const;
+
+		/*!***********************************************************************
+		 @Function		loadDDSFile
+		 @Input			pTextureFile
+		 @Description	Loads a DDS file.
+		 *************************************************************************/
+		bool privateLoadDDSFile(FILE* pTextureFile);
+		bool privateLoadDDS10File(FILE* pTextureFile);
+
+		/*!***********************************************************************
+		 @Function		privateSaveDDSFile
+		 @Input			pTextureFile
+		 @Description	Saves a DDS File.
+		*************************************************************************/
+		bool privateSaveDDSFile(FILE* pTextureFile) const;
+
+		//Legacy IO
+		/*!***********************************************************************
+		 @Function		privateSavePVRFile
+		 @Input			pTextureFile
+		 @Input			filename
+		 @Description	Saves a .h File.
+		*************************************************************************/
+		bool privateSaveCHeaderFile(FILE* pTextureFile, CPVRTString filename) const;
+
+		/*!***********************************************************************
+		 @Function		privateSaveLegacyPVRFile
+		 @Input			pTextureFile
+		 @Input			eApi
+		 @Description	Saves a legacy PVR File - Uses version 2 file format.
+		*************************************************************************/
+		bool privateSaveLegacyPVRFile(FILE* pTextureFile, ELegacyApi eApi) const;
+	};
+};
+
+#endif //_PVRTEXTURE_H
+
diff --git a/extern/pvrtextool/Include/PVRTextureDefines.h b/extern/pvrtextool/Include/PVRTextureDefines.h
new file mode 100644
index 0000000..ee6ba47
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTextureDefines.h
@@ -0,0 +1,109 @@
+/******************************************************************************
+
+ @File         PVRTextureDefines.h
+
+ @Title        
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     
+
+ @Description  
+
+******************************************************************************/
+#ifndef _PVRTEXTURE_DEFINES_H
+#define _PVRTEXTURE_DEFINES_H
+
+//To use the PVRTexLib .dll on Windows, you need to define _WINDLL_IMPORT
+#ifndef PVR_DLL
+#if defined(_WINDLL_EXPORT)
+#define PVR_DLL __declspec(dllexport)
+//Forward declaration of PVRTexture Header and CPVRTMap. This exports their interfaces for DLLs.
+struct PVR_DLL PVRTextureHeaderV3;
+template <typename KeyType, typename DataType>
+class PVR_DLL CPVRTMap;
+template<typename T>
+class PVR_DLL CPVRTArray;
+#elif defined(_WINDLL_IMPORT)
+#define PVR_DLL __declspec(dllimport)
+//Forward declaration of PVRTexture Header and CPVRTMap. This imports their interfaces for DLLs.
+struct PVR_DLL PVRTextureHeaderV3;
+template <typename KeyType, typename DataType>
+class PVR_DLL CPVRTMap;
+template<typename T>
+class PVR_DLL CPVRTArray;
+#else
+#define PVR_DLL
+#endif
+#endif
+
+
+#include "PVRTTexture.h"
+
+namespace pvrtexture
+{
+	/*****************************************************************************
+	* Type defines for standard variable sizes.
+	*****************************************************************************/
+	typedef	signed char			int8;
+	typedef	signed short		int16;
+	typedef	signed int			int32;
+	typedef	signed long long    int64;
+	typedef unsigned char		uint8;
+	typedef unsigned short		uint16;
+	typedef unsigned int		uint32;
+	typedef	unsigned long long	uint64;
+	
+	/*****************************************************************************
+	* Texture related constants and enumerations. 
+	*****************************************************************************/
+	enum ECompressorQuality
+	{
+		ePVRTCFast=0,
+		ePVRTCNormal,
+		ePVRTCHigh,
+		ePVRTCBest,
+		eNumPVRTCModes,
+
+		eETCFast=0,
+		eETCFastPerceptual,
+		eETCSlow,
+		eETCSlowPerceptual,
+		eNumETCModes
+	};
+
+	enum EResizeMode
+	{
+		eResizeNearest,
+		eResizeLinear,
+		eResizeCubic,
+		eNumResizeModes
+	};
+
+	// Legacy - API enums.
+	enum ELegacyApi
+	{
+		eOGLES=1,
+		eOGLES2,
+		eD3DM,
+		eOGL,
+		eDX9,
+		eDX10,
+		eOVG,
+		eMGL,
+	};
+
+	/*****************************************************************************
+	* Useful macros.
+	*****************************************************************************/
+	#define TEXOFFSET2D(x,y,width) ( ((x)+(y)*(width)) )
+	#define TEXOFFSET3D(x,y,z,width,height) ( ((x)+(y)*(width)+(z)*(width)*(height)) )
+
+	/*****************************************************************************
+	* Useful typedef for Meta Data Maps
+	*****************************************************************************/
+	typedef CPVRTMap<uint32, CPVRTMap<uint32,MetaDataBlock> > MetaDataMap;
+};
+#endif //_PVRTEXTURE_DEFINES_H
diff --git a/extern/pvrtextool/Include/PVRTextureFormat.h b/extern/pvrtextool/Include/PVRTextureFormat.h
new file mode 100644
index 0000000..e0ff187
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTextureFormat.h
@@ -0,0 +1,90 @@
+/******************************************************************************
+
+ @File         PVRTextureFormat.h
+
+ @Title        
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     
+
+ @Description  
+
+******************************************************************************/
+#ifndef _PVRT_PIXEL_FORMAT_H
+#define _PVRT_PIXEL_FORMAT_H
+
+#include "PVRTextureDefines.h"
+//ACS: removed unneccesary includes:
+//#include "PVRTString.h"
+
+namespace pvrtexture
+{
+	//Channel Names
+	enum EChannelName
+	{
+		eNoChannel,
+		eRed,
+		eGreen,
+		eBlue,
+		eAlpha,
+		eLuminance,
+		eIntensity,
+		eUnspecified,
+		eNumChannels
+	};
+
+	//PixelType union
+	union PVR_DLL PixelType
+	{
+		/*!***********************************************************************
+		 @Function		PixelType
+		 @Return		A new PixelType
+		 @Description	Creates an empty pixeltype.
+		*************************************************************************/
+		PixelType();
+
+		/*!***********************************************************************
+		 @Function		PixelType
+		 @Input			Type
+		 @Return		A new PixelType
+		 @Description	Initialises a new pixel type from a 64 bit integer value.
+		*************************************************************************/
+		PixelType(uint64 Type);
+
+		/*!***********************************************************************
+		 @Function		PixelType
+		 @Input			C1Name
+		 @Input			C2Name
+		 @Input			C3Name
+		 @Input			C4Name
+		 @Input			C1Bits
+		 @Input			C2Bits
+		 @Input			C3Bits
+		 @Input			C4Bits
+		 @Return		A new PixelType
+		 @Description	Takes up to 4 characters (CnName) and 4 values (CnBits) 
+						to create a new PixelType. Any unused channels should be set to 0.
+						For example: PixelType('r','g','b',0,8,8,8,0);
+		*************************************************************************/
+		PixelType(uint8 C1Name, uint8 C2Name, uint8 C3Name, uint8 C4Name, uint8 C1Bits, uint8 C2Bits, uint8 C3Bits, uint8 C4Bits);
+
+		struct PVR_DLL LowHigh
+		{
+			uint32	Low;
+			uint32	High;
+		} Part;
+
+		uint64	PixelTypeID;
+		uint8	PixelTypeChar[8];
+	};
+
+	static const PixelType PVRStandard8PixelType = PixelType('r','g','b','a',8,8,8,8);
+	static const PixelType PVRStandard16PixelType = PixelType('r','g','b','a',16,16,16,16);
+	static const PixelType PVRStandard32PixelType = PixelType('r','g','b','a',32,32,32,32);
+}
+
+#endif
+
diff --git a/extern/pvrtextool/Include/PVRTextureHeader.h b/extern/pvrtextool/Include/PVRTextureHeader.h
new file mode 100644
index 0000000..7fb7815
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTextureHeader.h
@@ -0,0 +1,603 @@
+/******************************************************************************
+
+ @File         PVRTextureHeader.h
+
+ @Title        
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     
+
+ @Description  
+
+******************************************************************************/
+#ifndef _PVRTEXTURE_HEADER_H
+#define _PVRTEXTURE_HEADER_H
+
+#include "PVRTextureDefines.h"
+#include "PVRTextureFormat.h"
+#include "PVRTMap.h"
+#include "PVRTString.h"
+
+namespace pvrtexture
+{
+	//Wrapper class for PVRTextureHeaderV3, adds 'smart' accessor functions.
+	class PVR_DLL CPVRTextureHeader
+	{	
+	protected:
+		PVRTextureHeaderV3											m_sHeader;		//Texture header as laid out in a file.
+		mutable CPVRTMap<uint32, CPVRTMap<uint32,MetaDataBlock> >	m_MetaData;		//Map of all the meta data stored for a texture.
+
+	public:
+	/*******************************************************************************
+	* Construction methods for a texture header.
+	*******************************************************************************/
+		/*!***********************************************************************
+		 @Function		CPVRTextureHeader
+		 @Return		CPVRTextureHeader A new texture header.
+		 @Description	Default constructor for a CPVRTextureHeader. Returns an empty header.
+		*************************************************************************/
+		CPVRTextureHeader();
+
+		/*!***********************************************************************
+		 @Function		CPVRTextureHeader
+		 @Input			fileHeader
+		 @Input			metaDataCount
+		 @Input			metaData
+		 @Return		CPVRTextureHeader A new texture header.
+		 @Description	Creates a new texture header from a PVRTextureHeaderV3, 
+						and appends Meta data if any is supplied.
+		*************************************************************************/
+		CPVRTextureHeader(	PVRTextureHeaderV3	fileHeader,
+							uint32				metaDataCount=0,
+							MetaDataBlock*		metaData=NULL);
+
+		/*!***********************************************************************
+		 @Function		CPVRTextureHeader
+		 @Input			u64PixelFormat
+		 @Input			u32Height
+		 @Input			u32Width
+		 @Input			u32Depth
+		 @Input			u32NumMipMaps
+		 @Input			u32NumArrayMembers
+		 @Input			u32NumFaces
+		 @Input			eColourSpace
+		 @Input			eChannelType
+		 @Input			bPreMultiplied
+		 @Return		CPVRTextureHeader A new texture header.
+		 @Description	Creates a new texture header based on individual header
+						variables.
+		*************************************************************************/
+		CPVRTextureHeader(	uint64				u64PixelFormat,
+							uint32				u32Height=1,
+							uint32				u32Width=1,
+							uint32				u32Depth=1,
+							uint32				u32NumMipMaps=1,
+							uint32				u32NumArrayMembers=1,
+							uint32				u32NumFaces=1,
+							EPVRTColourSpace	eColourSpace=ePVRTCSpacelRGB,
+							EPVRTVariableType	eChannelType=ePVRTVarTypeUnsignedByteNorm,
+							bool				bPreMultiplied=false);
+
+		/*!***********************************************************************
+		 @Function		operator=
+		 @Input			rhs
+		 @Return		CPVRTextureHeader& This header.
+		 @Description	Will copy the contents and information of another header into this one.
+		*************************************************************************/
+		CPVRTextureHeader& operator=(const CPVRTextureHeader& rhs);
+		
+	/*******************************************************************************
+	* Accessor Methods for a texture's properties - getters.
+	*******************************************************************************/
+
+		/*!***********************************************************************
+		 @Function		getFileHeader
+		 @Return		PVRTextureHeaderV3		The file header.
+		 @Description	Gets the file header structure.
+		*************************************************************************/
+		const PVRTextureHeaderV3 getFileHeader() const;
+
+		/*!***********************************************************************
+		 @Function		getPixelType
+		 @Return		PixelType		64-bit pixel type ID.
+		 @Description	Gets the 64-bit pixel type ID of the texture.
+		*************************************************************************/
+		const PixelType getPixelType() const;
+
+		/*!***********************************************************************
+		 @Function		getBitsPerPixel
+		 @Return		uint32		Number of bits per pixel
+		 @Description	Gets the bits per pixel of the texture format.
+		*************************************************************************/
+		const uint32 getBitsPerPixel() const;
+
+		/*!***********************************************************************
+		 @Function		getColourSpace
+		 @Return		EPVRTColourSpace	enum representing colour space.
+		 @Description	Returns the colour space of the texture.
+		*************************************************************************/
+		const EPVRTColourSpace getColourSpace() const;
+
+		/*!***********************************************************************
+		 @Function		getChannelType
+		 @Return		EPVRTVariableType	enum representing the type of the texture.
+		 @Description	Returns the variable type that the texture's data is stored in.
+		*************************************************************************/
+		const EPVRTVariableType getChannelType() const;
+
+		/*!***********************************************************************
+		 @Function		getWidth
+		 @Input				uiMipLevel	MIP level that user is interested in.
+		 @Return		uint32		Width of the specified MIP-Map level.
+		 @Description	Gets the width of the user specified MIP-Map 
+						level for the texture
+		*************************************************************************/
+		const uint32 getWidth(uint32 uiMipLevel=PVRTEX_TOPMIPLEVEL) const;
+
+		/*!***********************************************************************
+		 @Function		getHeight
+		 @Input				uiMipLevel	MIP level that user is interested in.
+		 @Return		uint32		Height of the specified MIP-Map level.
+		 @Description	Gets the height of the user specified MIP-Map 
+						level for the texture
+		*************************************************************************/
+		const uint32 getHeight(uint32 uiMipLevel=PVRTEX_TOPMIPLEVEL) const;
+
+		/*!***********************************************************************
+		 @Function		getDepth
+		 @Input				uiMipLevel	MIP level that user is interested in.
+		 @Return		Depth of the specified MIP-Map level.
+		 @Description	Gets the depth of the user specified MIP-Map 
+						level for the texture
+		*************************************************************************/
+		const uint32 getDepth(uint32 uiMipLevel=PVRTEX_TOPMIPLEVEL) const;
+
+		/*!***********************************************************************
+		 @Function		getTextureSize
+		 @Input				iMipLevel		Specifies a MIP level to check, 
+										'PVRTEX_ALLMIPLEVELS' can be passed to get 
+										the size of all MIP levels. 
+		 @Input				bAllSurfaces	Size of all surfaces is calculated if true, 
+										only a single surface if false.
+		 @Input				bAllFaces		Size of all faces is calculated if true, 
+										only a single face if false.
+		 @Return		uint32			Size in PIXELS of the specified texture area.
+		 @Description	Gets the size in PIXELS of the texture, given various input 
+						parameters.	User can retrieve the total size of either all 
+						surfaces or a single surface, all faces or a single face and
+						all MIP-Maps or a single specified MIP level. All of these
+		*************************************************************************/
+		const uint32 getTextureSize(int32 iMipLevel=PVRTEX_ALLMIPLEVELS, bool bAllSurfaces = true, bool bAllFaces = true) const;
+
+		/*!***********************************************************************
+		 @Function		getDataSize
+		 @Input				iMipLevel		Specifies a mip level to check, 
+										'PVRTEX_ALLMIPLEVELS' can be passed to get 
+										the size of all MIP levels. 
+		 @Input				bAllSurfaces	Size of all surfaces is calculated if true, 
+										only a single surface if false.
+		 @Input				bAllFaces		Size of all faces is calculated if true, 
+										only a single face if false.
+		 @Return		uint32			Size in BYTES of the specified texture area.
+		 @Description	Gets the size in BYTES of the texture, given various input 
+						parameters.	User can retrieve the size of either all 
+						surfaces or a single surface, all faces or a single face 
+						and all MIP-Maps or a single specified MIP level.
+		*************************************************************************/
+		const uint32 getDataSize(int32 iMipLevel=PVRTEX_ALLMIPLEVELS, bool bAllSurfaces = true, bool bAllFaces = true) const;
+
+		/*!***********************************************************************
+		 @Function		getNumArrayMembers
+		 @Return		uint32		Number of array members in this texture.
+		 @Description	Gets the number of array members stored in this texture.
+		*************************************************************************/
+		const uint32 getNumArrayMembers() const;
+
+		/*!***********************************************************************
+		 @Function		getNumMIPLevels
+		 @Return		uint32		Number of MIP-Map levels in this texture.
+		 @Description	Gets the number of MIP-Map levels stored in this texture.
+		*************************************************************************/
+		const uint32 getNumMIPLevels() const;
+
+		/*!***********************************************************************
+		 @Function		getNumFaces
+		 @Return		uint32		Number of faces in this texture.
+		 @Description	Gets the number of faces stored in this texture.
+		*************************************************************************/
+		const uint32 getNumFaces() const;
+
+		/*!***********************************************************************
+		 @Function		getOrientation
+		 @Input				axis			EPVRTAxis type specifying the axis to examine.
+		 @Return		EPVRTOrientation	Enum orientation of the axis.
+		 @Description	Gets the data orientation for this texture.
+		*************************************************************************/
+		const EPVRTOrientation getOrientation(EPVRTAxis axis) const;
+
+		/*!***********************************************************************
+		 @Function		isFileCompressed
+		 @Return		bool	True if it is file compressed.
+		 @Description	Returns whether or not the texture is compressed using
+						PVRTexLib's FILE compression - this is independent of 
+						any texture compression.
+		*************************************************************************/
+		const bool isFileCompressed() const;
+				
+		/*!***********************************************************************
+		 @Function		isPreMultiplied
+		 @Return		bool	True if texture is premultiplied.
+		 @Description	Returns whether or not the texture's colour has been
+						pre-multiplied by the alpha values.
+		*************************************************************************/
+		const bool isPreMultiplied() const;
+
+		/*!***********************************************************************
+		 @Function		getMetaDataSize
+		 @Return		const uint32 Size, in bytes, of the meta data stored in the header.
+		 @Description	Returns the total size of the meta data stored in the header. 
+						This includes the size of all information stored in all MetaDataBlocks.
+		*************************************************************************/
+		const uint32 getMetaDataSize() const;
+
+		/*!***********************************************************************
+		@Function		getOGLFormat
+		@Modified		internalformat
+		@Modified		format
+		@Modified		type
+		@Description	Gets the OpenGL equivalent values of internal format, format
+						and type for this texture. This will return any supported
+						OpenGL texture values, it is up to the user to decide if 
+						these are valid for their current platform.
+		*************************************************************************/
+		const void getOGLFormat(uint32& internalformat, uint32& format, uint32& type) const;
+
+		/*!***********************************************************************
+		 @Function		getOGLESFormat
+		 @Modified		internalformat
+		 @Modified		format
+		 @Modified		type
+		 @Description	Gets the OpenGLES equivalent values of internal format, 
+						format and type for this texture. This will return any 
+						supported OpenGLES texture values, it is up to the user 
+						to decide if these are valid for their current platform.
+		*************************************************************************/
+		const void getOGLESFormat(uint32& internalformat, uint32& format, uint32& type) const;
+
+		/*!***********************************************************************
+		 @Function		getD3DFormat
+		 @Return		const uint32 
+		 @Description	Gets the D3DFormat (up to DirectX 9 and Direct 3D Mobile)
+						equivalent values for this texture. This will return any 
+						supported D3D texture formats, it is up to the user to
+						decide if this is valid for their current platform.
+		*************************************************************************/
+		const uint32 getD3DFormat() const;
+		
+		/*!***********************************************************************
+		 @Function		getDXGIFormat
+		 @Return		const uint32 
+		 @Description	Gets the DXGIFormat (DirectX 10 onward) equivalent values 
+						for this texture. This will return any supported DX texture
+						formats, it is up to the user to decide if this is valid 
+						for their current platform.
+		*************************************************************************/
+		const uint32 getDXGIFormat() const;
+
+	/*!***********************************************************************
+	* Accessor Methods for a texture's properties - setters.
+	*************************************************************************/
+
+		/*!***********************************************************************
+		 @Function		setPixelFormat
+		 @Input				uPixelFormat	The format of the pixel.
+		 @Description	Sets the pixel format for this texture.
+		*************************************************************************/
+		void setPixelFormat(PixelType uPixelFormat);
+
+		/*!***********************************************************************
+		 @Function		setColourSpace
+		 @Input				eColourSpace	A colour space enum.
+		 @Description	Sets the colour space for this texture. Default is lRGB.
+		*************************************************************************/
+		void setColourSpace(EPVRTColourSpace eColourSpace);
+
+		/*!***********************************************************************
+		 @Function		setChannelType
+		 @Input				eVarType	A variable type enum.
+		 @Description	Sets the variable type for the channels in this texture.
+		*************************************************************************/
+		void setChannelType(EPVRTVariableType eVarType);
+
+		/*!***********************************************************************
+		 @Function		setOGLFormat
+		 @Input			internalformat
+		 @Input			format
+		 @Input			type
+		 @Return		bool Whether the format is valid or not.
+		 @Description	Sets the format of the texture to PVRTexLib's internal
+						representation of the OGL format.
+		*************************************************************************/
+		bool setOGLFormat(const uint32& internalformat, const uint32& format, const uint32& type);
+
+		/*!***********************************************************************
+		 @Function		setOGLESFormat
+		 @Input			internalformat
+		 @Input			format
+		 @Input			type
+		 @Return		bool Whether the format is valid or not.
+		 @Description	Sets the format of the texture to PVRTexLib's internal
+						representation of the OGLES format.
+		*************************************************************************/
+		bool setOGLESFormat(const uint32& internalformat, const uint32& format, const uint32& type);
+
+		/*!***********************************************************************
+		 @Function		setD3DFormat
+		 @Return		bool Whether the format is valid or not.
+		 @Description	Sets the format of the texture to PVRTexLib's internal
+						representation of the D3D format.
+		*************************************************************************/
+		bool setD3DFormat(const uint32& DWORD_D3D_FORMAT);
+		
+		/*!***********************************************************************
+		 @Function		setDXGIFormat
+		 @Return		bool Whether the format is valid or not.
+		 @Description	Sets the format of the texture to PVRTexLib's internal
+						representation of the DXGI format.
+		*************************************************************************/
+		bool setDXGIFormat(const uint32& DWORD_DXGI_FORMAT);
+
+		/*!***********************************************************************
+		 @Function		setWidth
+		 @Input				newWidth	The new width.
+		 @Description	Sets the width.
+		*************************************************************************/
+		void setWidth(uint32 newWidth);
+
+		/*!***********************************************************************
+		 @Function		setHeight
+		 @Input				newHeight	The new height.
+		 @Description	Sets the height.
+		*************************************************************************/
+		void setHeight(uint32 newHeight);
+
+		/*!***********************************************************************
+		 @Function		setDepth
+		 @Input				newDepth	The new depth.
+		 @Description	Sets the depth.
+		*************************************************************************/
+		void setDepth(uint32 newDepth);
+
+		/*!***********************************************************************
+		 @Function		setNumArrayMembers
+		 @Input				newNumMembers	The new number of members in this array.
+		 @Description	Sets the depth.
+		*************************************************************************/
+		void setNumArrayMembers(uint32 newNumMembers);
+
+		/*!***********************************************************************
+		 @Function		setNumMIPLevels
+		 @Input				newNumMIPLevels		New number of MIP-Map levels.
+		 @Description	Sets the number of MIP-Map levels in this texture.
+		*************************************************************************/
+		void setNumMIPLevels(uint32 newNumMIPLevels);
+
+		/*!***********************************************************************
+		 @Function		setNumFaces
+		 @Input				newNumFaces New number of faces for this texture.
+		 @Description	Sets the number of faces stored in this texture.
+		*************************************************************************/
+		void setNumFaces(uint32 newNumFaces);
+
+		/*!***********************************************************************
+		 @Function		setOrientation
+		 @Input				eAxisOrientation Enum specifying axis and orientation.
+		 @Description	Sets the data orientation for a given axis in this texture.
+		*************************************************************************/
+		void setOrientation(EPVRTOrientation eAxisOrientation);
+
+		/*!***********************************************************************
+		 @Function		setIsFileCompressed
+		 @Input				isFileCompressed	Sets file compression to true/false.
+		 @Description	Sets whether or not the texture is compressed using
+						PVRTexLib's FILE compression - this is independent of 
+						any texture compression. Currently unsupported.
+		*************************************************************************/
+		void setIsFileCompressed(bool isFileCompressed);
+		
+		/*!***********************************************************************
+		 @Function		isPreMultiplied
+		 @Return		isPreMultiplied	Sets if texture is premultiplied.
+		 @Description	Sets whether or not the texture's colour has been
+						pre-multiplied by the alpha values.
+		*************************************************************************/
+		void setIsPreMultiplied(bool isPreMultiplied);
+
+	/*!***********************************************************************
+	 Meta Data functions - Getters.
+	*************************************************************************/	
+
+		/*!***********************************************************************
+		 @Function		isBumpMap
+		 @Return		bool	True if it is a bump map.
+		 @Description	Returns whether the texture is a bump map or not.
+		*************************************************************************/
+		const bool isBumpMap() const;
+
+		/*!***********************************************************************
+		 @Function		getBumpMapScale
+		 @Return		float	Returns the bump map scale.
+		 @Description	Gets the bump map scaling value for this texture. If the
+						texture is not a bump map, 0.0f is returned. If the
+						texture is a bump map but no meta data is stored to
+						specify its scale, then 1.0f is returned.
+		*************************************************************************/
+		const float getBumpMapScale() const;
+
+		/*!***********************************************************************
+		 @Function		getBumpMapOrder
+		 @Return		CPVRTString		Returns bump map order relative to rgba.
+		 @Description	Gets the bump map channel order relative to rgba. For
+						example, an RGB texture with bumps mapped to XYZ returns 
+						'xyz'. A BGR texture with bumps in the order ZYX will also 
+						return 'xyz' as the mapping is the same: R=X, G=Y, B=Z.
+						If the letter 'h' is present in the string, it means that
+						the height map has been stored here.
+						Other characters are possible if the bump map was created
+						manually, but PVRTexLib will ignore these characters. They
+						are returned simply for completeness.
+		*************************************************************************/
+		const CPVRTString getBumpMapOrder() const;
+
+		/*!***********************************************************************
+		 @Function		getNumTextureAtlasMembers
+		 @Return		int		Returns number of sub textures defined by meta data.
+		 @Description	Works out the number of possible texture atlas members in
+						the texture based on the w/h/d and the data size.
+						TODO: Is this the right way to do things? Should I return number of floats? Or just data size? Hmm. Also need to make it TWO floats per dimension, and also possibly a rotated value. Not sure.
+		*************************************************************************/
+		const int getNumTextureAtlasMembers() const;
+
+		/*!***********************************************************************
+		 @Function		getTextureAtlasData
+		 @Return		float*		Returns a pointer directly to the texture atlas data.
+		 @Description	Returns a pointer to the texture atlas data.
+						TODO: Maybe I should return a copy rather than the original.
+		*************************************************************************/
+		const float* getTextureAtlasData() const;
+
+		/*!***********************************************************************
+		 @Function		getCubeMapOrder
+		 @Return		CPVRTString		Returns cube map order.
+		 @Description	Gets the cube map face order. Returned string will be in 
+						the form "ZzXxYy" with capitals representing positive and
+						small letters representing negative. I.e. Z=Z-Positive,
+						z=Z-Negative.
+		*************************************************************************/
+		const CPVRTString getCubeMapOrder() const;
+
+		/*!***********************************************************************
+		 @Function		getBorder
+		 @Input			uiBorderWidth
+		 @Input			uiBorderHeight
+		 @Input			uiBorderDepth
+		 @Description	Obtains the border size in each dimension for this texture.
+		*************************************************************************/
+		void getBorder(uint32& uiBorderWidth, uint32& uiBorderHeight, uint32& uiBorderDepth) const;
+
+		/*!***********************************************************************
+		 @Function		getMetaData
+		 @Input			DevFOURCC
+		 @Input			u32Key
+		 @Return		pvrtexture::MetaDataBlock A copy of the meta data from the texture.
+		 @Description	Returns a block of meta data from the texture. If the meta data doesn't exist, a block with data size 0 will be returned.
+		*************************************************************************/
+		MetaDataBlock getMetaData(uint32 DevFOURCC, uint32 u32Key) const;
+
+		/*!***********************************************************************
+		 @Function		hasMetaData
+		 @Input			DevFOURCC
+		 @Input			u32Key
+		 @Return		bool Whether or not the meta data bock specified exists
+		 @Description	Returns whether or not the specified meta data exists as 
+						part of this texture header.
+		*************************************************************************/
+		bool hasMetaData(uint32 DevFOURCC, uint32 u32Key) const;
+
+		/*!***********************************************************************
+		 @Function		getMetaDataMap
+		 @Return		MetaDataMap* A direct pointer to the MetaData map.
+		 @Description	A pointer directly to the Meta Data Map, to allow users to read out data.
+		*************************************************************************/
+		const MetaDataMap* const getMetaDataMap() const;
+
+	/*!***********************************************************************
+	 Meta Data functions - Setters.
+	*************************************************************************/	
+		
+		/*!***********************************************************************
+		 @Function		setBumpMap
+		 @Input				bumpScale	Floating point "height" value to scale the bump map.
+		 @Input				bumpOrder	Up to 4 character string, with values x,y,z,h in 
+									some combination. Not all values need to be present.
+									Denotes channel order; x,y,z refer to the 
+									corresponding axes, h indicates presence of the
+									original height map. It is possible to have only some
+									of these values rather than all. For example if 'h'
+									is present alone it will be considered a height map.
+									The values should be presented in RGBA order, regardless
+									of the texture format, so a zyxh order in a bgra texture
+									should still be passed as 'xyzh'. Capitals are allowed.
+									Any character stored here that is not one of x,y,z,h
+									or a NULL character	will be ignored when PVRTexLib 
+									reads the data,	but will be preserved. This is useful
+									if you wish to define a custom data channel for instance.
+									In these instances PVRTexLib will assume it is simply
+									colour data.
+		 @Description	Sets a texture's bump map data.
+		*************************************************************************/
+		void setBumpMap(float bumpScale, CPVRTString bumpOrder="xyz");
+
+		/*!***********************************************************************
+		 @Function		setTextureAtlas
+		 @Input				pAtlasData	Pointer to an array of atlas data.
+		 @Input				dataSize	Number of floats that the data pointer contains.
+		 @Description	Sets the texture atlas coordinate meta data for later display.
+						It is up to the user to make sure that this texture atlas
+						data actually makes sense in the context of the header. It is
+						suggested that the "generateTextureAtlas" method in the tools
+						is used to create a texture atlas, manually setting one up is 
+						possible but should be done with care.
+		*************************************************************************/
+		void setTextureAtlas(float* pAtlasData, uint32 dataSize);
+
+		/*!***********************************************************************
+		 @Function		setCubeMapOrder
+		 @Input				cubeMapOrder	Up to 6 character string, with values 
+										x,X,y,Y,z,Z in some combination. Not all 
+										values need to be present. Denotes face 
+										order; Capitals refer to positive axis 
+										positions and small letters refer to 
+										negative axis positions. E.g. x=X-Negative,
+										X=X-Positive. It is possible to have only 
+										some of these values rather than all, as 
+										long as they are NULL terminated.
+										NB: Values past the 6th character are not read.
+		 @Description	Sets a texture's bump map data.
+		*************************************************************************/
+		void setCubeMapOrder(CPVRTString cubeMapOrder="XxYyZz");
+
+		/*!***********************************************************************
+		 @Function		setBorder
+		 @Input			uiBorderWidth
+		 @Input			uiBorderHeight
+		 @Input			uiBorderDepth
+		 @Return		void 
+		 @Description	Sets a texture's border size data. This value is subtracted 
+						from the current texture height/width/depth to get the valid 
+						texture data.
+		*************************************************************************/
+		void setBorder(uint32 uiBorderWidth, uint32 uiBorderHeight, uint32 uiBorderDepth);
+
+		/*!***********************************************************************
+		 @Function		addMetaData
+		 @Input				MetaBlock	Meta data block to be added.
+		 @Description	Adds an arbitrary piece of meta data.
+		*************************************************************************/
+		void addMetaData(const MetaDataBlock& MetaBlock);
+				
+		/*!***********************************************************************
+		 @Function		removeMetaData
+		 @Input			DevFourCC
+		 @Input			u32Key
+		 @Return		void 
+		 @Description	Removes a specified piece of meta data, if it exists.
+		*************************************************************************/
+		void removeMetaData(const uint32& DevFourCC, const uint32& u32Key);
+	};
+};
+
+#endif
diff --git a/extern/pvrtextool/Include/PVRTextureUtilities.h b/extern/pvrtextool/Include/PVRTextureUtilities.h
new file mode 100644
index 0000000..0d5050a
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTextureUtilities.h
@@ -0,0 +1,171 @@
+/******************************************************************************
+
+ @File         PVRTextureUtilities.h
+
+ @Title        
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     
+
+ @Description  
+
+******************************************************************************/
+#ifndef _PVRTEXTURE_UTILITIES_H
+#define _PVRTEXTURE_UTILITIES_H
+
+#include "PVRTextureFormat.h"
+#include "PVRTexture.h"
+
+namespace pvrtexture
+{
+	/*!***********************************************************************
+	 @Function		Resize
+	 @Input			sTexture
+	 @Input			u32NewWidth
+	 @Input			u32NewHeight
+	 @Input			u32NewDepth
+	 @Input			eResizeMode
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Resizes the texture to new specified dimensions. Filtering 
+					mode is specified with "eResizeMode".
+	*************************************************************************/
+	bool PVR_DLL Resize(CPVRTexture& sTexture, const uint32& u32NewWidth, const uint32& u32NewHeight, const uint32& u32NewDepth, const EResizeMode eResizeMode);
+
+	/*!***********************************************************************
+	 @Function		Rotate90
+	 @Input			sTexture
+	 @Input			eRotationAxis
+	 @Input			bForward
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Rotates a texture by 90 degrees around the given axis. bForward controls direction of rotation.
+	*************************************************************************/
+	bool PVR_DLL Rotate90(CPVRTexture& sTexture, const EPVRTAxis eRotationAxis, const bool bForward);
+
+	/*!***********************************************************************
+	 @Function		Flip
+	 @Input			sTexture
+	 @Input			eFlipDirection
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Flips a texture in a given direction.
+	*************************************************************************/
+	bool PVR_DLL Flip(CPVRTexture& sTexture, const EPVRTAxis eFlipDirection);
+
+	/*!***********************************************************************
+	 @Function		Border
+	 @Input			sTexture
+	 @Input			uiBorderX
+	 @Input			uiBorderY
+	 @Input			uiBorderZ
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Adds a user specified border to the texture.
+	*************************************************************************/
+	bool PVR_DLL Border(CPVRTexture& sTexture, uint32 uiBorderX, uint32 uiBorderY, uint32 uiBorderZ);
+
+	/*!***********************************************************************
+	 @Function		PreMultiplyAlpha
+	 @Input			sTexture
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Pre-multiplies a texture's colours by its alpha values.
+	*************************************************************************/
+	bool PVR_DLL PreMultiplyAlpha(CPVRTexture& sTexture);
+
+	/*!***********************************************************************
+	 @Function		Bleed
+	 @Input			sTexture
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Allows a texture's colours to run into any fully transparent areas.
+	*************************************************************************/
+	bool PVR_DLL Bleed(CPVRTexture& sTexture);
+
+	/*!***********************************************************************
+	 @Function		SetChannels
+	 @Input			sTexture
+	 @Input			uiNumChannelSets
+	 @Input			eChannels
+	 @Input			pValues
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Sets the specified number of channels to values specified in pValues.
+	 *************************************************************************/
+	bool PVR_DLL SetChannels(CPVRTexture& sTexture, uint32 uiNumChannelSets, EChannelName *eChannels, uint32 *pValues);
+	bool PVR_DLL SetChannelsFloat(CPVRTexture& sTexture, uint32 uiNumChannelSets, EChannelName *eChannels, float *pValues);
+
+	/*!***********************************************************************
+	 @Function		CopyChannels
+	 @Input			sTexture
+	 @Input			sTextureSource
+	 @Input			uiNumChannelCopies
+	 @Input			eChannels
+	 @Input			eChannelsSource
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Copies the specified channels from sTextureSource into sTexture. 
+					sTextureSource is not modified so it is possible to use the
+					same texture as both input and output. When using the same 
+					texture as source and destination, channels are preserved
+					between swaps (e.g. copying Red to Green and then Green to Red
+					will result in the two channels trading places correctly).
+					Channels in eChannels are set to the value of the channels 
+					in eChannelSource.
+	*************************************************************************/
+	bool PVR_DLL CopyChannels(CPVRTexture& sTexture, const CPVRTexture& sTextureSource, uint32 uiNumChannelCopies, EChannelName *eChannels, EChannelName *eChannelsSource);
+
+	/*!***********************************************************************
+	 @Function		GenerateNormalMap
+	 @Input			sTexture
+	 @Input			fScale
+	 @Input			sChannelOrder
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Generates a Normal Map from a given height map.
+					Assumes the red channel has the height values.
+					By default outputs to red/green/blue = x/y/z,
+					this can be overridden by specifying a channel
+					order in sChannelOrder. The channels specified
+					will output to red/green/blue/alpha in that order.
+					So "xyzh" maps x to red, y to green, z to blue
+					and h to alpha. 'h' is used to specify that the
+					original height map data should be preserved in
+					the given channel.
+	*************************************************************************/
+	bool PVR_DLL GenerateNormalMap(CPVRTexture& sTexture, const float fScale, CPVRTString sChannelOrder);
+
+	/*!***********************************************************************
+	 @Function		GenerateMIPMaps
+	 @Input			sTexture
+	 @Input			eFilterMode
+	 @Input			uiMIPMapsToDo
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Generates MIPMaps for a source texture. Default is to
+					create a complete MIPMap chain, however this can be
+					overridden with uiMIPMapsToDo.
+	*************************************************************************/
+	bool PVR_DLL GenerateMIPMaps(CPVRTexture& sTexture, const EResizeMode eFilterMode, const uint32 uiMIPMapsToDo=PVRTEX_ALLMIPLEVELS);
+	
+	/*!***********************************************************************
+	 @Function		ColourMIPMaps
+	 @Input			sTexture
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Colours a texture's MIPMap levels with artificial colours 
+					for debugging. MIP levels are coloured in the order:
+					Red, Green, Blue, Cyan, Magenta and Yellow
+					in a repeating pattern.
+	*************************************************************************/
+	bool PVR_DLL ColourMIPMaps(CPVRTexture& sTexture);
+	
+	/*!***********************************************************************
+	 @Function		Transcode
+	 @Input			sTexture
+	 @Input			ptFormat
+	 @Input			eChannelType
+	 @Input			eColourspace
+	 @Input			eQuality
+	 @Input			bDoDither
+	 @Return		bool Whether the method succeeds or not.
+	 @Description	Transcodes a texture from its original format into a newly specified format.
+					Will either quantise or dither to lower precisions based on bDoDither.
+					uiQuality specifies the quality for PVRTC and ETC compression.					
+	*************************************************************************/
+	bool PVR_DLL Transcode(CPVRTexture& sTexture, const PixelType ptFormat, const EPVRTVariableType eChannelType, const EPVRTColourSpace eColourspace, const ECompressorQuality eQuality=ePVRTCNormal, const bool bDoDither=false);
+};
+#endif //_PVRTEXTURE_UTILTIES_H
diff --git a/extern/pvrtextool/Include/PVRTextureVersion.h b/extern/pvrtextool/Include/PVRTextureVersion.h
new file mode 100644
index 0000000..6c98c84
--- /dev/null
+++ b/extern/pvrtextool/Include/PVRTextureVersion.h
@@ -0,0 +1,23 @@
+/******************************************************************************
+
+ @File         PVRTextureVersion.h
+
+ @Title        
+
+ @Version      
+
+ @Copyright    Copyright (c) Imagination Technologies Limited. All Rights Reserved. Strictly Confidential.
+
+ @Platform     ANSI
+
+ @Description  Texture processing routines.
+
+******************************************************************************/
+#ifndef PVRTEXLIBVERSION_H
+#define PVRTEXLIBVERSION_H
+#define PVRTLMAJORVERSION 4
+#define PVRTLMINORVERSION 2
+#define PVRTLSTRINGVERSION "4.2"
+#define PVRTLVERSIONDESCRIPTOR "" //"BETA" //"ALPHA" //"ENGINEERING DROP"
+#endif
+
diff --git a/extern/pvrtextool/OSX_x86/Static/libPVRTexLib.a b/extern/pvrtextool/OSX_x86/Static/libPVRTexLib.a
new file mode 100644
index 0000000..e8c3acd
Binary files /dev/null and b/extern/pvrtextool/OSX_x86/Static/libPVRTexLib.a differ
diff --git a/extern/pvrtextool/Windows_x86_32/Static/PVRTexLib.lib b/extern/pvrtextool/Windows_x86_32/Static/PVRTexLib.lib
new file mode 100644
index 0000000..2665d34
Binary files /dev/null and b/extern/pvrtextool/Windows_x86_32/Static/PVRTexLib.lib differ
diff --git a/extern/pvrtextool/Windows_x86_64/Static/PVRTexLib.lib b/extern/pvrtextool/Windows_x86_64/Static/PVRTexLib.lib
new file mode 100644
index 0000000..1329c21
Binary files /dev/null and b/extern/pvrtextool/Windows_x86_64/Static/PVRTexLib.lib differ
diff --git a/extern/rg_etc1_v104/CMakeLists.txt b/extern/rg_etc1_v104/CMakeLists.txt
new file mode 100644
index 0000000..cb371f7
--- /dev/null
+++ b/extern/rg_etc1_v104/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+SET(RGETC_SRCS
+	rg_etc1.cpp
+	rg_etc1.h)
+
+ADD_LIBRARY(rg_etc1 STATIC ${RGETC_SRCS})
+
diff --git a/extern/rg_etc1_v104/rg_etc1.cpp b/extern/rg_etc1_v104/rg_etc1.cpp
new file mode 100644
index 0000000..d9b9331
--- /dev/null
+++ b/extern/rg_etc1_v104/rg_etc1.cpp
@@ -0,0 +1,2452 @@
+// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
+// Please see ZLIB license at the end of rg_etc1.h.
+//
+// For more information Ericsson Texture Compression (ETC/ETC1), see:
+// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
+//
+// v1.04 - 5/15/14 - Fix signed vs. unsigned subtraction problem (noticed when compiled with gcc) in pack_etc1_block_init(). 
+//         This issue would cause an assert when this func. was called in debug. (Note this module was developed/testing with MSVC, 
+//         I still need to test it throughly when compiled with gcc.)
+//
+// v1.03 - 5/12/13 - Initial public release
+#include "rg_etc1.h"
+
+#include <stdlib.h>
+#include <memory.h>
+#include <assert.h>
+//#include <stdio.h>
+#include <math.h>
+
+#pragma warning (disable: 4201) //  nonstandard extension used : nameless struct/union
+
+#if defined(_DEBUG) || defined(DEBUG)
+#define RG_ETC1_BUILD_DEBUG
+#endif
+
+#define RG_ETC1_ASSERT assert
+
+namespace rg_etc1
+{
+   typedef unsigned char uint8;
+   typedef unsigned short uint16;
+   typedef unsigned int uint;
+   typedef unsigned int uint32;
+#if defined ( __LP64__ )
+typedef long int64;
+typedef unsigned long uint64;
+#else
+   typedef long long int64;
+   typedef unsigned long long uint64;
+#endif
+
+   const uint32 cUINT32_MAX = 0xFFFFFFFFU;
+   const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64;
+   
+   template<typename T> inline T minimum(T a, T b) { return (a < b) ? a : b; }
+   template<typename T> inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); }
+   template<typename T> inline T maximum(T a, T b) { return (a > b) ? a : b; }
+   template<typename T> inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); }
+   template<typename T> inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); }
+   template<typename T> inline T square(T value) { return value * value; }
+   template<typename T> inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); }
+   template<typename T> inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); }
+
+   template<class T, size_t N> T decay_array_to_subtype(T (&a)[N]);   
+
+#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X)))
+
+   enum eNoClamp { cNoClamp };
+
+   struct color_quad_u8
+   {
+      static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast<int>(v) >> 31)) & 0xFF; return v; }
+
+      struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; };
+
+   public:
+      typedef unsigned char component_t;
+      typedef int parameter_t;
+
+      enum { cNumComps = 4 };
+
+      union
+      {
+         struct
+         {
+            component_t r;
+            component_t g;
+            component_t b;
+            component_t a;
+         };
+
+         component_t c[cNumComps];
+
+         uint32 m_u32;
+      };
+
+      inline color_quad_u8()
+      {
+      }
+
+      inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32)
+      {
+      }
+
+      explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax)
+      {
+         set(y, alpha);
+      }
+
+      inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+      {
+         set(red, green, blue, alpha);
+      }
+
+      explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax)
+      {
+         set_noclamp_y_alpha(y, alpha);
+      }
+
+      inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+      {
+         set_noclamp_rgba(red, green, blue, alpha);
+      }
+
+      inline void clear()
+      {
+         m_u32 = 0;
+      }
+
+      inline color_quad_u8& operator= (const color_quad_u8& other)
+      {
+         m_u32 = other.m_u32;
+         return *this;
+      }
+
+      inline color_quad_u8& set_rgb(const color_quad_u8& other)
+      {
+         r = other.r;
+         g = other.g;
+         b = other.b;
+         return *this;
+      }
+
+      inline color_quad_u8& operator= (parameter_t y)
+      {
+         set(y, component_traits::cMax);
+         return *this;
+      }
+
+      inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax)
+      {
+         y = clamp(y);
+         alpha = clamp(alpha);
+         r = static_cast<component_t>(y);
+         g = static_cast<component_t>(y);
+         b = static_cast<component_t>(y);
+         a = static_cast<component_t>(alpha);
+         return *this;
+      }
+
+      inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax)
+      {
+         RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
+
+         r = static_cast<component_t>(y);
+         g = static_cast<component_t>(y);
+         b = static_cast<component_t>(y);
+         a = static_cast<component_t>(alpha);
+         return *this;
+      }
+
+      inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+      {
+         r = static_cast<component_t>(clamp(red));
+         g = static_cast<component_t>(clamp(green));
+         b = static_cast<component_t>(clamp(blue));
+         a = static_cast<component_t>(clamp(alpha));
+         return *this;
+      }
+
+      inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha)
+      {
+         RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
+
+         r = static_cast<component_t>(red);
+         g = static_cast<component_t>(green);
+         b = static_cast<component_t>(blue);
+         a = static_cast<component_t>(alpha);
+         return *this;
+      }
+
+      inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue)
+      {
+         RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
+
+         r = static_cast<component_t>(red);
+         g = static_cast<component_t>(green);
+         b = static_cast<component_t>(blue);
+         return *this;
+      }
+
+      static inline parameter_t get_min_comp() { return component_traits::cMin; }
+      static inline parameter_t get_max_comp() { return component_traits::cMax; }
+      static inline bool get_comps_are_signed() { return component_traits::cSigned; }
+
+      inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
+      inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
+
+      inline color_quad_u8& set_component(uint i, parameter_t f)
+      {
+         RG_ETC1_ASSERT(i < cNumComps);
+
+         c[i] = static_cast<component_t>(clamp(f));
+
+         return *this;
+      }
+
+      inline color_quad_u8& set_grayscale(parameter_t l)
+      {
+         component_t x = static_cast<component_t>(clamp(l));
+         c[0] = x;
+         c[1] = x;
+         c[2] = x;
+         return *this;
+      }
+
+      inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h)
+      {
+         for (uint i = 0; i < cNumComps; i++)
+            c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l[i], h[i]));
+         return *this;
+      }
+
+      inline color_quad_u8& clamp(parameter_t l, parameter_t h)
+      {
+         for (uint i = 0; i < cNumComps; i++)
+            c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l, h));
+         return *this;
+      }
+
+      // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y).
+      inline parameter_t get_luma() const
+      {
+         return static_cast<parameter_t>((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U);
+      }
+
+      // Returns REC 709 luma.
+      inline parameter_t get_luma_rec709() const
+      {
+         return static_cast<parameter_t>((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U);
+      }
+
+      inline uint squared_distance_rgb(const color_quad_u8& c) const
+      {
+         return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b);
+      }
+
+      inline uint squared_distance_rgba(const color_quad_u8& c) const
+      {
+         return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a);
+      }
+
+      inline bool rgb_equals(const color_quad_u8& rhs) const
+      {
+         return (r == rhs.r) && (g == rhs.g) && (b == rhs.b);
+      }
+
+      inline bool operator== (const color_quad_u8& rhs) const
+      {
+         return m_u32 == rhs.m_u32;
+      }
+
+      color_quad_u8& operator+= (const color_quad_u8& other)
+      {
+         for (uint i = 0; i < 4; i++)
+            c[i] = static_cast<component_t>(clamp(c[i] + other.c[i]));
+         return *this;
+      }
+
+      color_quad_u8& operator-= (const color_quad_u8& other)
+      {
+         for (uint i = 0; i < 4; i++)
+            c[i] = static_cast<component_t>(clamp(c[i] - other.c[i]));
+         return *this;
+      }
+
+      friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs)
+      {
+         color_quad_u8 result(lhs);
+         result += rhs;
+         return result;
+      }
+
+      friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs)
+      {
+         color_quad_u8 result(lhs);
+         result -= rhs;
+         return result;
+      }
+   }; // class color_quad_u8
+
+   struct vec3F
+   {
+      float m_s[3];
+      
+      inline vec3F() { }
+      inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; }
+      inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; }
+      
+      inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; }
+
+      inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; }
+
+      inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; }
+   };
+     
+   enum etc_constants
+   {
+      cETC1BytesPerBlock = 8U,
+
+      cETC1SelectorBits = 2U,
+      cETC1SelectorValues = 1U << cETC1SelectorBits,
+      cETC1SelectorMask = cETC1SelectorValues - 1U,
+
+      cETC1BlockShift = 2U,
+      cETC1BlockSize = 1U << cETC1BlockShift,
+
+      cETC1LSBSelectorIndicesBitOffset = 0,
+      cETC1MSBSelectorIndicesBitOffset = 16,
+
+      cETC1FlipBitOffset = 32,
+      cETC1DiffBitOffset = 33,
+
+      cETC1IntenModifierNumBits = 3,
+      cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
+      cETC1RightIntenModifierTableBitOffset = 34,
+      cETC1LeftIntenModifierTableBitOffset = 37,
+
+      // Base+Delta encoding (5 bit bases, 3 bit delta)
+      cETC1BaseColorCompNumBits = 5,
+      cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
+
+      cETC1DeltaColorCompNumBits = 3,
+      cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
+      cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
+
+      cETC1BaseColor5RBitOffset = 59,
+      cETC1BaseColor5GBitOffset = 51,
+      cETC1BaseColor5BBitOffset = 43,
+
+      cETC1DeltaColor3RBitOffset = 56,
+      cETC1DeltaColor3GBitOffset = 48,
+      cETC1DeltaColor3BBitOffset = 40,
+
+      // Absolute (non-delta) encoding (two 4-bit per component bases)
+      cETC1AbsColorCompNumBits = 4,
+      cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
+
+      cETC1AbsColor4R1BitOffset = 60,
+      cETC1AbsColor4G1BitOffset = 52,
+      cETC1AbsColor4B1BitOffset = 44,
+
+      cETC1AbsColor4R2BitOffset = 56,
+      cETC1AbsColor4G2BitOffset = 48,
+      cETC1AbsColor4B2BitOffset = 40,
+
+      cETC1ColorDeltaMin = -4,
+      cETC1ColorDeltaMax = 3,
+
+      // Delta3:
+      // 0   1   2   3   4   5   6   7
+      // 000 001 010 011 100 101 110 111
+      // 0   1   2   3   -4  -3  -2  -1
+   };
+   
+   static uint8 g_quant5_tab[256+16];
+
+   static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = 
+   { 
+      { -8,  -2,   2,   8 }, { -17,  -5,  5,  17 }, { -29,  -9,   9,  29 }, {  -42, -13, 13,  42 }, 
+      { -60, -18, 18,  60 }, { -80, -24, 24,  80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } 
+   };
+
+   static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
+   static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
+      
+   // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte.
+   static uint16 g_etc1_inverse_lookup[2*8*4][256];      // [diff/inten_table/selector][desired_color]
+
+   // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color.
+   // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8)
+   static const uint16 g_color8_to_etc_block_config_0_255[2][33] =
+   {
+      { 0x0000,  0x0010,  0x0002,  0x0012,  0x0004,  0x0014,  0x0006,  0x0016,  0x0008,  0x0018,  0x000A,  0x001A,  0x000C,  0x001C,  0x000E,  0x001E,
+        0x0001,  0x0011,  0x0003,  0x0013,  0x0005,  0x0015,  0x0007,  0x0017,  0x0009,  0x0019,  0x000B,  0x001B,  0x000D,  0x001D,  0x000F,  0x001F, 0xFFFF },
+      { 0x0F20,  0x0F30,  0x0E32,  0x0F22,  0x0E34,  0x0F24,  0x0D36,  0x0F26,  0x0C38,  0x0E28,  0x0B3A,  0x0E2A,  0x093C,  0x0E2C,  0x053E,  0x0D2E,
+        0x1E31,  0x1F21,  0x1D33,  0x1F23,  0x1C35,  0x1E25,  0x1A37,  0x1E27,  0x1839,  0x1D29,  0x163B,  0x1C2B,  0x133D,  0x1B2D,  0x093F,  0x1A2F, 0xFFFF },
+   };
+
+   // Really only [254][11].
+   static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = 
+   {
+      { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E,
+      0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, {
+      0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306,
+      0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112,
+      0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707,
+      0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B,
+      0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605,
+      0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF
+      }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214,
+      0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A,
+      0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, {
+      0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B,
+      0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D,
+      0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805,
+      0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F,
+      0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, {
+      0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521,
+      0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523,
+      0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F,
+      0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B,
+      0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, {
+      0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F,
+      0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D,
+      0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529,
+      0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917,
+      0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E,
+      0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725,
+      0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139,
+      0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, {
+      0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A,
+      0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437,
+      0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500,
+      0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, {
+      0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19,
+      0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D,
+      0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, {
+      0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F,
+      0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D,
+      0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, {
+      0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05,
+      0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434,
+      0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01,
+      0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21,
+      0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27,
+      0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E,
+      0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D,
+      0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, {
+      0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, {
+      0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307,
+      0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33,
+      0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B,
+      0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, {
+      0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103,
+      0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B,
+      0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536,
+      0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A,
+      0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115,
+      0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, {
+      0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF
+      }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820,
+      0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031,
+      0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, {
+      0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35,
+      0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F,
+      0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D,
+      0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029,
+      0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832,
+      0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D,
+      0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133,
+      0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF
+      }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, {
+      0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331,
+      0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D,
+      0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513,
+      0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF
+      }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, {
+      0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, {
+      0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905,
+      0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09,
+      0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D,
+      0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621,
+      0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18,
+      0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919,
+      0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625,
+      0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F,
+      0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936,
+      0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A,
+      0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, {
+      0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913,
+      0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, {
+      0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20,
+      0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C,
+      0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, {
+      0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06,
+      0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, {
+      0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26,
+      0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18,
+      0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03,
+      0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929,
+      0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23,
+      0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF
+      }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B,
+      0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E,
+      0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18,
+      0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01,
+      0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16,
+      0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B,
+      0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01,
+      0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34,
+      0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11,
+      0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF },
+   };
+
+   struct etc1_block
+   {
+      // big endian uint64:
+      // bit ofs:  56  48  40  32  24  16   8   0
+      // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 
+      union 
+      {
+         uint64 m_uint64;
+         uint8 m_bytes[8];
+      };
+
+      uint8 m_low_color[2];
+      uint8 m_high_color[2];
+
+      enum { cNumSelectorBytes = 4 };
+      uint8 m_selectors[cNumSelectorBytes];
+
+      inline void clear()
+      {
+         zero_this(this);
+      }
+
+      inline uint get_byte_bits(uint ofs, uint num) const
+      {
+         RG_ETC1_ASSERT((ofs + num) <= 64U);
+         RG_ETC1_ASSERT(num && (num <= 8U));
+         RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
+         const uint byte_ofs = 7 - (ofs >> 3);
+         const uint byte_bit_ofs = ofs & 7;
+         return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
+      }
+
+      inline void set_byte_bits(uint ofs, uint num, uint bits)
+      {
+         RG_ETC1_ASSERT((ofs + num) <= 64U);
+         RG_ETC1_ASSERT(num && (num < 32U));
+         RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
+         RG_ETC1_ASSERT(bits < (1U << num));
+         const uint byte_ofs = 7 - (ofs >> 3);
+         const uint byte_bit_ofs = ofs & 7;
+         const uint mask = (1 << num) - 1;
+         m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
+         m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
+      }
+
+      // false = left/right subblocks
+      // true = upper/lower subblocks
+      inline bool get_flip_bit() const 
+      {
+         return (m_bytes[3] & 1) != 0;
+      }   
+
+      inline void set_flip_bit(bool flip)
+      {
+         m_bytes[3] &= ~1;
+         m_bytes[3] |= static_cast<uint8>(flip);
+      }
+
+      inline bool get_diff_bit() const
+      {
+         return (m_bytes[3] & 2) != 0;
+      }
+
+      inline void set_diff_bit(bool diff)
+      {
+         m_bytes[3] &= ~2;
+         m_bytes[3] |= (static_cast<uint>(diff) << 1);
+      }
+
+      // Returns intensity modifier table (0-7) used by subblock subblock_id.
+      // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2)
+      inline uint get_inten_table(uint subblock_id) const
+      {
+         RG_ETC1_ASSERT(subblock_id < 2);
+         const uint ofs = subblock_id ? 2 : 5;
+         return (m_bytes[3] >> ofs) & 7;
+      }
+
+      // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
+      inline void set_inten_table(uint subblock_id, uint t)
+      {
+         RG_ETC1_ASSERT(subblock_id < 2);
+         RG_ETC1_ASSERT(t < 8);
+         const uint ofs = subblock_id ? 2 : 5;
+         m_bytes[3] &= ~(7 << ofs);
+         m_bytes[3] |= (t << ofs);
+      }
+
+      // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+      inline uint get_selector(uint x, uint y) const
+      {
+         RG_ETC1_ASSERT((x | y) < 4);
+
+         const uint bit_index = x * 4 + y;
+         const uint byte_bit_ofs = bit_index & 7;
+         const uint8 *p = &m_bytes[7 - (bit_index >> 3)];
+         const uint lsb = (p[0] >> byte_bit_ofs) & 1;
+         const uint msb = (p[-2] >> byte_bit_ofs) & 1;
+         const uint val = lsb | (msb << 1);
+
+         return g_etc1_to_selector_index[val];
+      }
+
+      // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+      inline void set_selector(uint x, uint y, uint val)
+      {
+         RG_ETC1_ASSERT((x | y | val) < 4);
+         const uint bit_index = x * 4 + y;
+
+         uint8 *p = &m_bytes[7 - (bit_index >> 3)];
+
+         const uint byte_bit_ofs = bit_index & 7;
+         const uint mask = 1 << byte_bit_ofs;
+
+         const uint etc1_val = g_selector_index_to_etc1[val];
+
+         const uint lsb = etc1_val & 1;
+         const uint msb = etc1_val >> 1;
+
+         p[0] &= ~mask;
+         p[0] |= (lsb << byte_bit_ofs);
+
+         p[-2] &= ~mask;
+         p[-2] |= (msb << byte_bit_ofs);
+      }
+
+      inline void set_base4_color(uint idx, uint16 c)
+      {
+         if (idx)
+         {
+            set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
+            set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
+            set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
+         }
+         else
+         {
+            set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
+            set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
+            set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
+         }
+      }
+
+      inline uint16 get_base4_color(uint idx) const
+      {
+         uint r, g, b;
+         if (idx)
+         {
+            r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
+            g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
+            b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
+         }
+         else
+         {
+            r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
+            g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
+            b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
+         }
+         return static_cast<uint16>(b | (g << 4U) | (r << 8U));
+      }
+
+      inline void set_base5_color(uint16 c)
+      {
+         set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
+         set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
+         set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
+      }
+
+      inline uint16 get_base5_color() const
+      {
+         const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
+         const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
+         const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
+         return static_cast<uint16>(b | (g << 5U) | (r << 10U));
+      }
+
+      void set_delta3_color(uint16 c)
+      {
+         set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
+         set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
+         set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
+      }
+
+      inline uint16 get_delta3_color() const
+      {
+         const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
+         const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
+         const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
+         return static_cast<uint16>(b | (g << 3U) | (r << 6U));
+      }
+
+      // Base color 5
+      static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U);
+      static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U);
+
+      static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U);
+      static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled);
+
+      static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
+      static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
+
+      // Delta color 3
+      // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
+      static uint16 pack_delta3(int r, int g, int b);
+
+      // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
+      static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3);
+
+      // Abs color 4
+      static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U);
+      static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U);
+
+      static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U);
+      static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled);
+
+      // subblock colors
+      static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx);
+      static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx);
+      static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx);
+
+      static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4)
+      {
+         if (color4)
+         {
+            dst.r = src.r | (src.r << 4);
+            dst.g = src.g | (src.g << 4);
+            dst.b = src.b | (src.b << 4);
+         }
+         else
+         {
+            dst.r = (src.r >> 2) | (src.r << 3);
+            dst.g = (src.g >> 2) | (src.g << 3);
+            dst.b = (src.b >> 2) | (src.b << 3);
+         }
+         dst.a = src.a;
+      }
+   };
+
+   // Returns pointer to sorted array.
+   template<typename T, typename Q>
+   T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices)
+   {  
+      RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T)));
+      RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4));
+
+      if (init_indices)
+      {
+         T* p = pIndices0;
+         T* q = pIndices0 + (num_indices >> 1) * 2;
+         uint i;
+         for (i = 0; p != q; p += 2, i += 2)
+         {
+            p[0] = static_cast<T>(i);
+            p[1] = static_cast<T>(i + 1); 
+         }
+
+         if (num_indices & 1)
+            *p = static_cast<T>(i);
+      }
+
+      uint hist[256 * 4];
+
+      memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
+
+#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs))
+#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs))
+
+      if (key_size == 4)
+      {
+         T* p = pIndices0;
+         T* q = pIndices0 + num_indices;
+         for ( ; p != q; p++)
+         {
+            const uint key = RG_ETC1_GET_KEY(p);
+
+            hist[        key        & 0xFF]++;
+            hist[256 + ((key >>  8) & 0xFF)]++;
+            hist[512 + ((key >> 16) & 0xFF)]++;
+            hist[768 + ((key >> 24) & 0xFF)]++;
+         }
+      }
+      else if (key_size == 3)
+      {
+         T* p = pIndices0;
+         T* q = pIndices0 + num_indices;
+         for ( ; p != q; p++)
+         {
+            const uint key = RG_ETC1_GET_KEY(p);
+
+            hist[        key        & 0xFF]++;
+            hist[256 + ((key >>  8) & 0xFF)]++;
+            hist[512 + ((key >> 16) & 0xFF)]++;
+         }
+      }   
+      else if (key_size == 2)
+      {
+         T* p = pIndices0;
+         T* q = pIndices0 + (num_indices >> 1) * 2;
+
+         for ( ; p != q; p += 2)
+         {
+            const uint key0 = RG_ETC1_GET_KEY(p);
+            const uint key1 = RG_ETC1_GET_KEY(p+1);
+
+            hist[        key0         & 0xFF]++;
+            hist[256 + ((key0 >>  8) & 0xFF)]++;
+
+            hist[        key1        & 0xFF]++;
+            hist[256 + ((key1 >>  8) & 0xFF)]++;
+         }
+
+         if (num_indices & 1)
+         {
+            const uint key = RG_ETC1_GET_KEY(p);
+
+            hist[        key        & 0xFF]++;
+            hist[256 + ((key >>  8) & 0xFF)]++;
+         }
+      }      
+      else
+      {
+         RG_ETC1_ASSERT(key_size == 1);
+         if (key_size != 1)
+            return NULL;
+
+         T* p = pIndices0;
+         T* q = pIndices0 + (num_indices >> 1) * 2;
+
+         for ( ; p != q; p += 2)
+         {
+            const uint key0 = RG_ETC1_GET_KEY(p);
+            const uint key1 = RG_ETC1_GET_KEY(p+1);
+
+            hist[key0 & 0xFF]++;
+            hist[key1 & 0xFF]++;
+         }
+
+         if (num_indices & 1)
+         {
+            const uint key = RG_ETC1_GET_KEY(p);
+
+            hist[key & 0xFF]++;
+         }
+      }      
+
+      T* pCur = pIndices0;
+      T* pNew = pIndices1;
+
+      for (uint pass = 0; pass < key_size; pass++)
+      {
+         const uint* pHist = &hist[pass << 8];
+
+         uint offsets[256];
+
+         uint cur_ofs = 0;
+         for (uint i = 0; i < 256; i += 2)
+         {
+            offsets[i] = cur_ofs;
+            cur_ofs += pHist[i];
+
+            offsets[i+1] = cur_ofs;
+            cur_ofs += pHist[i+1];
+         }
+
+         const uint pass_shift = pass << 3;
+
+         T* p = pCur;
+         T* q = pCur + (num_indices >> 1) * 2;
+
+         for ( ; p != q; p += 2)
+         {
+            uint index0 = p[0];
+            uint index1 = p[1];
+
+            uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF;
+            uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF;
+
+            if (c0 == c1)
+            {
+               uint dst_offset0 = offsets[c0];
+
+               offsets[c0] = dst_offset0 + 2;
+
+               pNew[dst_offset0] = static_cast<T>(index0);
+               pNew[dst_offset0 + 1] = static_cast<T>(index1);
+            }
+            else
+            {
+               uint dst_offset0 = offsets[c0]++;
+               uint dst_offset1 = offsets[c1]++;
+
+               pNew[dst_offset0] = static_cast<T>(index0);
+               pNew[dst_offset1] = static_cast<T>(index1);
+            }
+         }
+
+         if (num_indices & 1)
+         {
+            uint index = *p;
+            uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF;
+
+            uint dst_offset = offsets[c];
+            offsets[c] = dst_offset + 1;
+
+            pNew[dst_offset] = static_cast<T>(index);
+         }
+
+         T* t = pCur;
+         pCur = pNew;
+         pNew = t;
+      }            
+
+      return pCur;
+   }
+
+#undef RG_ETC1_GET_KEY
+#undef RG_ETC1_GET_KEY_FROM_INDEX
+
+   uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias)
+   {
+      return pack_color5(color.r, color.g, color.b, scaled, bias);
+   }
+   
+   uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias)
+   {
+      if (scaled)
+      {
+         r = (r * 31U + bias) / 255U;
+         g = (g * 31U + bias) / 255U;
+         b = (b * 31U + bias) / 255U;
+      }
+
+      r = rg_etc1::minimum(r, 31U);
+      g = rg_etc1::minimum(g, 31U);
+      b = rg_etc1::minimum(b, 31U);
+
+      return static_cast<uint16>(b | (g << 5U) | (r << 10U));
+   }
+
+   color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha)
+   {
+      uint b = packed_color5 & 31U;
+      uint g = (packed_color5 >> 5U) & 31U;
+      uint r = (packed_color5 >> 10U) & 31U;
+
+      if (scaled)
+      {
+         b = (b << 3U) | (b >> 2U);
+         g = (g << 3U) | (g >> 2U);
+         r = (r << 3U) | (r >> 2U);
+      }
+
+      return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
+   }
+
+   void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled)
+   {
+      color_quad_u8 c(unpack_color5(packed_color5, scaled, 0));
+      r = c.r;
+      g = c.g;
+      b = c.b;
+   }
+
+   bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
+   {
+      int dc_r, dc_g, dc_b;
+      unpack_delta3(dc_r, dc_g, dc_b, packed_delta3);
+      
+      int b = (packed_color5 & 31U) + dc_b;
+      int g = ((packed_color5 >> 5U) & 31U) + dc_g;
+      int r = ((packed_color5 >> 10U) & 31U) + dc_r;
+
+      bool success = true;
+      if (static_cast<uint>(r | g | b) > 31U)
+      {
+         success = false;
+         r = rg_etc1::clamp<int>(r, 0, 31);
+         g = rg_etc1::clamp<int>(g, 0, 31);
+         b = rg_etc1::clamp<int>(b, 0, 31);
+      }
+
+      if (scaled)
+      {
+         b = (b << 3U) | (b >> 2U);
+         g = (g << 3U) | (g >> 2U);
+         r = (r << 3U) | (r >> 2U);
+      }
+
+      result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U));
+      return success;
+   }
+
+   bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
+   {
+      color_quad_u8 result;
+      const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha);
+      r = result.r;
+      g = result.g;
+      b = result.b;
+      return success;
+   }
+     
+   uint16 etc1_block::pack_delta3(int r, int g, int b)
+   {
+      RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
+      RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
+      RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
+      if (r < 0) r += 8;
+      if (g < 0) g += 8;
+      if (b < 0) b += 8;
+      return static_cast<uint16>(b | (g << 3) | (r << 6));
+   }
+   
+   void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3)
+   {
+      r = (packed_delta3 >> 6) & 7;
+      g = (packed_delta3 >> 3) & 7;
+      b = packed_delta3 & 7;
+      if (r >= 4) r -= 8;
+      if (g >= 4) g -= 8;
+      if (b >= 4) b -= 8;
+   }
+
+   uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias)
+   {
+      return pack_color4(color.r, color.g, color.b, scaled, bias);
+   }
+   
+   uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias)
+   {
+      if (scaled)
+      {
+         r = (r * 15U + bias) / 255U;
+         g = (g * 15U + bias) / 255U;
+         b = (b * 15U + bias) / 255U;
+      }
+
+      r = rg_etc1::minimum(r, 15U);
+      g = rg_etc1::minimum(g, 15U);
+      b = rg_etc1::minimum(b, 15U);
+
+      return static_cast<uint16>(b | (g << 4U) | (r << 8U));
+   }
+
+   color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha)
+   {
+      uint b = packed_color4 & 15U;
+      uint g = (packed_color4 >> 4U) & 15U;
+      uint r = (packed_color4 >> 8U) & 15U;
+
+      if (scaled)
+      {
+         b = (b << 4U) | b;
+         g = (g << 4U) | g;
+         r = (r << 4U) | r;
+      }
+
+      return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
+   }
+   
+   void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled)
+   {
+      color_quad_u8 c(unpack_color4(packed_color4, scaled, 0));
+      r = c.r;
+      g = c.g;
+      b = c.b;
+   }
+
+   void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx)
+   {
+      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+      uint r, g, b;
+      unpack_color5(r, g, b, packed_color5, true);
+
+      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+      const int y0 = pInten_modifer_table[0];
+      pDst[0].set(ir + y0, ig + y0, ib + y0);
+
+      const int y1 = pInten_modifer_table[1];
+      pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+      const int y2 = pInten_modifer_table[2];
+      pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+      const int y3 = pInten_modifer_table[3];
+      pDst[3].set(ir + y3, ig + y3, ib + y3);
+   }
+   
+   bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx)
+   {
+      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+      uint r, g, b;
+      bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true);
+
+      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+      const int y0 = pInten_modifer_table[0];
+      pDst[0].set(ir + y0, ig + y0, ib + y0);
+
+      const int y1 = pInten_modifer_table[1];
+      pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+      const int y2 = pInten_modifer_table[2];
+      pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+      const int y3 = pInten_modifer_table[3];
+      pDst[3].set(ir + y3, ig + y3, ib + y3);
+
+      return success;
+   }
+   
+   void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx)
+   {
+      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+      uint r, g, b;
+      unpack_color4(r, g, b, packed_color4, true);
+      
+      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+      const int y0 = pInten_modifer_table[0];
+      pDst[0].set(ir + y0, ig + y0, ib + y0);
+      
+      const int y1 = pInten_modifer_table[1];
+      pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+      const int y2 = pInten_modifer_table[2];
+      pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+      const int y3 = pInten_modifer_table[3];
+      pDst[3].set(ir + y3, ig + y3, ib + y3);
+   }
+      
+   bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha)
+   {
+      color_quad_u8* pDst = reinterpret_cast<color_quad_u8*>(pDst_pixels_rgba);
+      const etc1_block& block = *static_cast<const etc1_block*>(pETC1_block);
+
+      const bool diff_flag = block.get_diff_bit();
+      const bool flip_flag = block.get_flip_bit();
+      const uint table_index0 = block.get_inten_table(0);
+      const uint table_index1 = block.get_inten_table(1);
+
+      color_quad_u8 subblock_colors0[4];
+      color_quad_u8 subblock_colors1[4];
+      bool success = true;
+
+      if (diff_flag)
+      {
+         const uint16 base_color5 = block.get_base5_color();
+         const uint16 delta_color3 = block.get_delta3_color();
+         etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0);
+            
+         if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1))
+            success = false;
+      }
+      else
+      {
+         const uint16 base_color4_0 = block.get_base4_color(0);
+         etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0);
+
+         const uint16 base_color4_1 = block.get_base4_color(1);
+         etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1);
+      }
+
+      if (preserve_alpha)
+      {
+         if (flip_flag)
+         {
+            for (uint y = 0; y < 2; y++)
+            {
+               pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
+               pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
+               pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]);
+               pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]);
+               pDst += 4;
+            }
+
+            for (uint y = 2; y < 4; y++)
+            {
+               pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]);
+               pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]);
+               pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
+               pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
+               pDst += 4;
+            }
+         }
+         else
+         {
+            for (uint y = 0; y < 4; y++)
+            {
+               pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
+               pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
+               pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
+               pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
+               pDst += 4;
+            }
+         }
+      }
+      else 
+      {
+         if (flip_flag)
+         {
+            // 0000
+            // 0000
+            // 1111
+            // 1111
+            for (uint y = 0; y < 2; y++)
+            {
+               pDst[0] = subblock_colors0[block.get_selector(0, y)];
+               pDst[1] = subblock_colors0[block.get_selector(1, y)];
+               pDst[2] = subblock_colors0[block.get_selector(2, y)];
+               pDst[3] = subblock_colors0[block.get_selector(3, y)];
+               pDst += 4;
+            }
+
+            for (uint y = 2; y < 4; y++)
+            {
+               pDst[0] = subblock_colors1[block.get_selector(0, y)];
+               pDst[1] = subblock_colors1[block.get_selector(1, y)];
+               pDst[2] = subblock_colors1[block.get_selector(2, y)];
+               pDst[3] = subblock_colors1[block.get_selector(3, y)];
+               pDst += 4;
+            }
+         }
+         else
+         {
+            // 0011
+            // 0011
+            // 0011
+            // 0011
+            for (uint y = 0; y < 4; y++)
+            {
+               pDst[0] = subblock_colors0[block.get_selector(0, y)];
+               pDst[1] = subblock_colors0[block.get_selector(1, y)];
+               pDst[2] = subblock_colors1[block.get_selector(2, y)];
+               pDst[3] = subblock_colors1[block.get_selector(3, y)];
+               pDst += 4;
+            }
+         }
+      }
+      
+      return success;
+   }
+
+   struct etc1_solution_coordinates
+   {
+      inline etc1_solution_coordinates() :
+      m_unscaled_color(0, 0, 0, 0),
+         m_inten_table(0),
+         m_color4(false)
+      {
+      }
+
+      inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : 
+      m_unscaled_color(r, g, b, 255),
+         m_inten_table(inten_table),
+         m_color4(color4)
+      {
+      }
+
+      inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : 
+      m_unscaled_color(c),
+         m_inten_table(inten_table),
+         m_color4(color4)
+      {
+      }
+
+      inline etc1_solution_coordinates(const etc1_solution_coordinates& other)
+      {
+         *this = other;
+      }
+
+      inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs)
+      {
+         m_unscaled_color = rhs.m_unscaled_color;
+         m_inten_table = rhs.m_inten_table;
+         m_color4 = rhs.m_color4;
+         return *this;
+      }
+
+      inline void clear()
+      {
+         m_unscaled_color.clear();
+         m_inten_table = 0;
+         m_color4 = false;
+      }
+
+      inline color_quad_u8 get_scaled_color() const
+      {
+         int br, bg, bb;
+         if (m_color4)
+         {
+            br = m_unscaled_color.r | (m_unscaled_color.r << 4);
+            bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
+            bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
+         }
+         else
+         {
+            br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
+            bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
+            bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
+         }
+         return color_quad_u8(br, bg, bb);
+      }
+
+      inline void get_block_colors(color_quad_u8* pBlock_colors)
+      {
+         int br, bg, bb;
+         if (m_color4)
+         {
+            br = m_unscaled_color.r | (m_unscaled_color.r << 4);
+            bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
+            bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
+         }
+         else
+         {
+            br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
+            bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
+            bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
+         }
+         const int* pInten_table = g_etc1_inten_tables[m_inten_table];
+         pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]);
+         pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]);
+         pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]);
+         pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]);
+      }
+
+      color_quad_u8 m_unscaled_color;
+      uint m_inten_table;
+      bool m_color4;
+   };
+
+   class etc1_optimizer
+   {
+      etc1_optimizer(const etc1_optimizer&);
+      etc1_optimizer& operator= (const etc1_optimizer&);
+
+   public:
+      etc1_optimizer()
+      {
+         clear();
+      }
+
+      void clear()
+      {
+         m_pParams = NULL;
+         m_pResult = NULL;
+         m_pSorted_luma = NULL;
+         m_pSorted_luma_indices = NULL;
+      }
+
+      struct params : etc1_pack_params
+      {
+         params()
+         {
+            clear();
+         }
+
+         params(const etc1_pack_params& base_params) : 
+         etc1_pack_params(base_params)
+         {
+            clear_optimizer_params();
+         }
+
+         void clear()
+         {
+            etc1_pack_params::clear();
+            clear_optimizer_params();
+         }
+
+         void clear_optimizer_params()
+         {
+            m_num_src_pixels = 0;
+            m_pSrc_pixels = 0;
+
+            m_use_color4 = false;
+            static const int s_default_scan_delta[] = { 0 };
+            m_pScan_deltas = s_default_scan_delta;
+            m_scan_delta_size = 1;
+
+            m_base_color5.clear();
+            m_constrain_against_base_color5 = false;
+         }
+
+         uint m_num_src_pixels;
+         const color_quad_u8* m_pSrc_pixels;
+
+         bool m_use_color4;
+         const int* m_pScan_deltas;
+         uint m_scan_delta_size;
+
+         color_quad_u8 m_base_color5;
+         bool m_constrain_against_base_color5;
+      };
+
+      struct results
+      {
+         uint64 m_error;
+         color_quad_u8 m_block_color_unscaled;
+         uint m_block_inten_table;
+         uint m_n;
+         uint8* m_pSelectors;
+         bool m_block_color4;
+
+         inline results& operator= (const results& rhs)
+         {
+            m_block_color_unscaled = rhs.m_block_color_unscaled;
+            m_block_color4 = rhs.m_block_color4;
+            m_block_inten_table = rhs.m_block_inten_table;
+            m_error = rhs.m_error;
+            RG_ETC1_ASSERT(m_n == rhs.m_n);
+            memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n);
+            return *this;
+         }
+      };
+
+      void init(const params& params, results& result);
+      bool compute();
+
+   private:      
+      struct potential_solution
+      {
+         potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false)
+         {
+         }
+
+         etc1_solution_coordinates  m_coords;
+         uint8                      m_selectors[8];
+         uint64                     m_error;
+         bool                       m_valid;
+
+         void clear()
+         {
+            m_coords.clear();
+            m_error = cUINT64_MAX;
+            m_valid = false;
+         }
+      };
+
+      const params* m_pParams;
+      results* m_pResult;
+
+      int m_limit;
+
+      vec3F m_avg_color;
+      int m_br, m_bg, m_bb;
+      uint16 m_luma[8];
+      uint32 m_sorted_luma[2][8];
+      const uint32* m_pSorted_luma_indices;
+      uint32* m_pSorted_luma;
+
+      uint8 m_selectors[8];
+      uint8 m_best_selectors[8];
+
+      potential_solution m_best_solution;
+      potential_solution m_trial_solution;
+      uint8 m_temp_selectors[8];
+
+      bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
+      bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
+   };
+      
+   bool etc1_optimizer::compute()
+   {
+      const uint n = m_pParams->m_num_src_pixels;
+      const int scan_delta_size = m_pParams->m_scan_delta_size;
+      
+      // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color.
+      // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index.
+      for (int zdi = 0; zdi < scan_delta_size; zdi++)
+      {
+         const int zd = m_pParams->m_pScan_deltas[zdi];
+         const int mbb = m_bb + zd;
+         if (mbb < 0) continue; else if (mbb > m_limit) break;
+         
+         for (int ydi = 0; ydi < scan_delta_size; ydi++)
+         {
+            const int yd = m_pParams->m_pScan_deltas[ydi];
+            const int mbg = m_bg + yd;
+            if (mbg < 0) continue; else if (mbg > m_limit) break;
+
+            for (int xdi = 0; xdi < scan_delta_size; xdi++)
+            {
+               const int xd = m_pParams->m_pScan_deltas[xdi];
+               const int mbr = m_br + xd;
+               if (mbr < 0) continue; else if (mbr > m_limit) break;
+      
+               etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4);
+               if (m_pParams->m_quality == cHighQuality)
+               {
+                  if (!evaluate_solution(coords, m_trial_solution, &m_best_solution))
+                     continue;
+               }
+               else
+               {
+                  if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution))
+                     continue;
+               }
+               
+               // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index.
+               // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors:
+               // The goal is:
+               // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0
+               // Rearranging this:
+               // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0
+               // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0
+               // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0
+               // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0
+               // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0
+               // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4
+               // So what this means:
+               // optimal_block_color = avg_input - avg_inten_delta
+               // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta.
+               // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula.
+               // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping.
+
+               const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2);
+               for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++)
+               {
+                  const uint8* pSelectors = m_best_solution.m_selectors;
+                  const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table];
+
+                  int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0;
+                  const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color());
+                  for (uint r = 0; r < n; r++)
+                  {
+                     const uint s = *pSelectors++;
+                     const int yd = pInten_table[s];
+                     // Compute actual delta being applied to each pixel, taking into account clamping.
+                     delta_sum_r += rg_etc1::clamp<int>(base_color.r + yd, 0, 255) - base_color.r;
+                     delta_sum_g += rg_etc1::clamp<int>(base_color.g + yd, 0, 255) - base_color.g;
+                     delta_sum_b += rg_etc1::clamp<int>(base_color.b + yd, 0, 255) - base_color.b;
+                  }
+                  if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b))
+                     break;
+                  const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n;
+                  const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n;
+                  const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n;
+                  const int br1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit);
+                  const int bg1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit);
+                  const int bb1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit);
+                  
+                  bool skip = false;
+                  
+                  if ((mbr == br1) && (mbg == bg1) && (mbb == bb1))
+                     skip = true;
+                  else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b))
+                     skip = true;
+                  else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1))
+                     skip = true;
+
+                  if (skip)
+                     break;
+
+                  etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4);
+                  if (m_pParams->m_quality == cHighQuality)
+                  {
+                     if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) 
+                        break;
+                  }
+                  else
+                  {
+                     if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution))
+                        break;
+                  }
+
+               }  // refinement_trial
+
+            } // xdi
+         } // ydi
+      } // zdi
+
+      if (!m_best_solution.m_valid)
+      {
+         m_pResult->m_error = cUINT32_MAX;
+         return false;
+      }
+      
+      const uint8* pSelectors = m_best_solution.m_selectors;
+
+#ifdef RG_ETC1_BUILD_DEBUG
+      {
+         color_quad_u8 block_colors[4];
+         m_best_solution.m_coords.get_block_colors(block_colors);
+
+         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+         uint64 actual_error = 0;
+         for (uint i = 0; i < n; i++)
+            actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]);
+         
+         RG_ETC1_ASSERT(actual_error == m_best_solution.m_error);
+      }
+#endif      
+      
+      m_pResult->m_error = m_best_solution.m_error;
+
+      m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color;
+      m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4;
+      
+      m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table;
+      memcpy(m_pResult->m_pSelectors, pSelectors, n);
+      m_pResult->m_n = n;
+
+      return true;
+   }
+
+   void etc1_optimizer::init(const params& p, results& r)
+   {
+      // This version is hardcoded for 8 pixel subblocks.
+      RG_ETC1_ASSERT(p.m_num_src_pixels == 8);
+      
+      m_pParams = &p;
+      m_pResult = &r;
+                  
+      const uint n = 8;
+      
+      m_limit = m_pParams->m_use_color4 ? 15 : 31;
+
+      vec3F avg_color(0.0f);
+
+      for (uint i = 0; i < n; i++)
+      {
+         const color_quad_u8& c = m_pParams->m_pSrc_pixels[i];
+         const vec3F fc(c.r, c.g, c.b);
+
+         avg_color += fc;
+
+         m_luma[i] = static_cast<uint16>(c.r + c.g + c.b);
+         m_sorted_luma[0][i] = i;
+      }
+      avg_color *= (1.0f / static_cast<float>(n));
+      m_avg_color = avg_color;
+
+      m_br = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit);
+      m_bg = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit);
+      m_bb = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit);
+
+      if (m_pParams->m_quality <= cMediumQuality)
+      {
+         m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false);
+         m_pSorted_luma = m_sorted_luma[0];
+         if (m_pSorted_luma_indices == m_sorted_luma[0])
+            m_pSorted_luma = m_sorted_luma[1];
+      
+         for (uint i = 0; i < n; i++)
+            m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]];
+      }
+      
+      m_best_solution.m_coords.clear();
+      m_best_solution.m_valid = false;
+      m_best_solution.m_error = cUINT64_MAX;
+   }
+
+   bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
+   {
+      trial_solution.m_valid = false;
+
+      if (m_pParams->m_constrain_against_base_color5)
+      {
+         const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
+         const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
+         const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
+
+         if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
+            return false;
+      }
+
+      const color_quad_u8 base_color(coords.get_scaled_color());
+      
+      const uint n = 8;
+            
+      trial_solution.m_error = cUINT64_MAX;
+            
+      for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++)
+      {
+         const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+         color_quad_u8 block_colors[4];
+         for (uint s = 0; s < 4; s++)
+         {
+            const int yd = pInten_table[s];
+            block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
+         }
+         
+         uint64 total_error = 0;
+         
+         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+         for (uint c = 0; c < n; c++)
+         {
+            const color_quad_u8& src_pixel = *pSrc_pixels++;
+            
+            uint best_selector_index = 0;
+            uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b);
+
+            uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b);
+            if (trial_error < best_error)
+            {
+               best_error = trial_error;
+               best_selector_index = 1;
+            }
+
+            trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b);
+            if (trial_error < best_error)
+            {
+               best_error = trial_error;
+               best_selector_index = 2;
+            }
+
+            trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b);
+            if (trial_error < best_error)
+            {
+               best_error = trial_error;
+               best_selector_index = 3;
+            }
+
+            m_temp_selectors[c] = static_cast<uint8>(best_selector_index);
+
+            total_error += best_error;
+            if (total_error >= trial_solution.m_error)
+               break;
+         }
+         
+         if (total_error < trial_solution.m_error)
+         {
+            trial_solution.m_error = total_error;
+            trial_solution.m_coords.m_inten_table = inten_table;
+            memcpy(trial_solution.m_selectors, m_temp_selectors, 8);
+            trial_solution.m_valid = true;
+         }
+      }
+      trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
+      trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
+
+      bool success = false;
+      if (pBest_solution)
+      {
+         if (trial_solution.m_error < pBest_solution->m_error)
+         {
+            *pBest_solution = trial_solution;
+            success = true;
+         }
+      }
+
+      return success;
+   }
+
+   bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
+   {
+      if (m_pParams->m_constrain_against_base_color5)
+      {
+         const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
+         const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
+         const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
+
+         if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
+         {
+            trial_solution.m_valid = false;
+            return false;
+         }
+      }
+
+      const color_quad_u8 base_color(coords.get_scaled_color());
+
+      const uint n = 8;
+      
+      trial_solution.m_error = cUINT64_MAX;
+
+      for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table)
+      {
+         const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+         uint block_inten[4];
+         color_quad_u8 block_colors[4];
+         for (uint s = 0; s < 4; s++)
+         {
+            const int yd = pInten_table[s];
+            color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
+            block_colors[s] = block_color;
+            block_inten[s] = block_color.r + block_color.g + block_color.b;
+         }
+
+         // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors.
+         // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast.
+         // 0   1   2   3
+         //   01  12  23
+         const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] };
+
+         uint64 total_error = 0;
+         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+         if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0])
+         {
+            if (block_inten[0] > m_pSorted_luma[n - 1])
+            {
+               const uint min_error = labs(int(block_inten[0] - m_pSorted_luma[n - 1]));
+               if (min_error >= trial_solution.m_error)
+                  continue;
+            }
+
+            memset(&m_temp_selectors[0], 0, n);
+
+            for (uint c = 0; c < n; c++)
+               total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]);
+         }
+         else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2])
+         {
+            if (m_pSorted_luma[0] > block_inten[3])
+            {
+               const uint min_error = labs(int(m_pSorted_luma[0] - block_inten[3]));
+               if (min_error >= trial_solution.m_error)
+                  continue;
+            }
+
+            memset(&m_temp_selectors[0], 3, n);
+
+            for (uint c = 0; c < n; c++)
+               total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]);
+         }
+         else
+         {
+            uint cur_selector = 0, c;
+            for (c = 0; c < n; c++)
+            {
+               const uint y = m_pSorted_luma[c];
+               while ((y * 2) >= block_inten_midpoints[cur_selector])
+                  if (++cur_selector > 2)
+                     goto done;
+               const uint sorted_pixel_index = m_pSorted_luma_indices[c];
+               m_temp_selectors[sorted_pixel_index] = static_cast<uint8>(cur_selector);
+               total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
+            }
+done:
+            while (c < n)
+            {
+               const uint sorted_pixel_index = m_pSorted_luma_indices[c];
+               m_temp_selectors[sorted_pixel_index] = 3;
+               total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
+               ++c;
+            }
+         }
+
+         if (total_error < trial_solution.m_error)
+         {
+            trial_solution.m_error = total_error;
+            trial_solution.m_coords.m_inten_table = inten_table;
+            memcpy(trial_solution.m_selectors, m_temp_selectors, n);
+            trial_solution.m_valid = true;
+            if (!total_error)
+               break;
+         }
+      }
+      trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
+      trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
+      
+      bool success = false;
+      if (pBest_solution)
+      {
+         if (trial_solution.m_error < pBest_solution->m_error)
+         {
+            *pBest_solution = trial_solution;
+            success = true;
+         }
+      }
+
+      return success;
+   }
+         
+   static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c)
+   {
+      const uint limit = diff ? 32 : 16;
+      RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit));
+      int c;
+      if (diff)
+         c = (packed_c >> 2) | (packed_c << 3);
+      else 
+         c = packed_c | (packed_c << 4);
+      c += g_etc1_inten_tables[inten][selector];
+      c = rg_etc1::clamp<int>(c, 0, 255);
+      return c;
+   }
+
+   static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; }
+
+   void pack_etc1_block_init()
+   {
+      for (uint diff = 0; diff < 2; diff++)
+      {
+         const uint limit = diff ? 32 : 16;
+
+         for (uint inten = 0; inten < 8; inten++)
+         {
+            for (uint selector = 0; selector < 4; selector++)
+            {
+               const uint inverse_table_index = diff + (inten << 1) + (selector << 4);
+               for (uint color = 0; color < 256; color++)
+               {
+                  uint best_error = cUINT32_MAX, best_packed_c = 0;
+                  for (uint packed_c = 0; packed_c < limit; packed_c++)
+                  {
+                     int v = etc1_decode_value(diff, inten, selector, packed_c);
+                     uint err = labs(v - static_cast<int>(color));
+                     if (err < best_error)
+                     {
+                        best_error = err;
+                        best_packed_c = packed_c;
+                        if (!best_error) 
+                           break;
+                     }
+                  }
+                  RG_ETC1_ASSERT(best_error <= 255);
+                  g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16>(best_packed_c | (best_error << 8));
+               }
+            }
+         }
+      }
+      
+      uint expand5[32];
+      for(int i = 0; i < 32; i++)
+         expand5[i] = (i << 3) | (i >> 2);
+
+      for(int i = 0; i < 256 + 16; i++)
+      {
+         int v = clamp<int>(i - 8, 0, 255);
+         g_quant5_tab[i] = static_cast<uint8>(expand5[mul_8bit(v,31)]);
+      }
+   }
+
+   // Packs solid color blocks efficiently using a set of small precomputed tables.
+   // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time.
+   static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params)
+   {
+      //pack_params;
+      RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
+            
+      static uint s_next_comp[4] = { 1, 2, 0, 1 };
+            
+      uint best_error = cUINT32_MAX, best_i = 0;
+      int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
+
+      // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
+      for (uint i = 0; i < 3; i++)
+      {
+         const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
+
+         const int delta_range = 1;
+         for (int delta = -delta_range; delta <= delta_range; delta++)
+         {
+            const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
+
+            const uint16* pTable;
+            if (!c_plus_delta)
+               pTable = g_color8_to_etc_block_config_0_255[0];
+            else if (c_plus_delta == 255)
+               pTable = g_color8_to_etc_block_config_0_255[1];
+            else
+               pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
+
+            do
+            {
+               const uint x = *pTable++;
+
+#ifdef RG_ETC1_BUILD_DEBUG
+               const uint diff = x & 1;
+               const uint inten = (x >> 1) & 7;
+               const uint selector = (x >> 4) & 3;
+               const uint p0 = (x >> 8) & 255;
+               RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
+#endif
+
+               const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
+               uint16 p1 = pInverse_table[c1];
+               uint16 p2 = pInverse_table[c2];
+               const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
+               if (trial_error < best_error)
+               {
+                  best_error = trial_error;
+                  best_x = x;
+                  best_packed_c1 = p1 & 0xFF;
+                  best_packed_c2 = p2 & 0xFF;
+                  best_i = i;
+                  if (!best_error)
+                     goto found_perfect_match;
+               }
+            } while (*pTable != 0xFFFF);
+         }
+      }
+found_perfect_match:
+
+      const uint diff = best_x & 1;
+      const uint inten = (best_x >> 1) & 7;
+
+      block.m_bytes[3] = static_cast<uint8>(((inten | (inten << 3)) << 2) | (diff << 1));
+                        
+      const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3];
+      *reinterpret_cast<uint16*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0;
+      *reinterpret_cast<uint16*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0;
+
+      const uint best_packed_c0 = (best_x >> 8) & 255;
+      if (diff)
+      {
+         block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 << 3);
+         block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 << 3);
+         block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 << 3);
+      }
+      else
+      {
+         block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 | (best_packed_c0 << 4));
+         block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 | (best_packed_c1 << 4));
+         block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 | (best_packed_c2 << 4));
+      }
+
+      return best_error;
+   }
+      
+   static uint pack_etc1_block_solid_color_constrained(
+      etc1_optimizer::results& results, 
+      uint num_colors, const uint8* pColor, 
+      etc1_pack_params& pack_params, 
+      bool use_diff,
+      const color_quad_u8* pBase_color5_unscaled)
+   {
+      RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
+
+      //pack_params;
+      static uint s_next_comp[4] = { 1, 2, 0, 1 };
+
+      uint best_error = cUINT32_MAX, best_i = 0;
+      int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
+
+      // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
+      for (uint i = 0; i < 3; i++)
+      {
+         const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
+
+         const int delta_range = 1;
+         for (int delta = -delta_range; delta <= delta_range; delta++)
+         {
+            const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
+
+            const uint16* pTable;
+            if (!c_plus_delta)
+               pTable = g_color8_to_etc_block_config_0_255[0];
+            else if (c_plus_delta == 255)
+               pTable = g_color8_to_etc_block_config_0_255[1];
+            else
+               pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
+
+            do
+            {
+               const uint x = *pTable++;
+               const uint diff = x & 1;
+               if (static_cast<uint>(use_diff) != diff)
+               {
+                  if (*pTable == 0xFFFF)
+                     break;
+                  continue;
+               }
+
+               if ((diff) && (pBase_color5_unscaled))
+               {
+                  const int p0 = (x >> 8) & 255;
+                  int delta = p0 - static_cast<int>(pBase_color5_unscaled->c[i]);
+                  if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax))
+                  {
+                     if (*pTable == 0xFFFF)
+                        break;
+                     continue;
+                  }
+               }
+
+#ifdef RG_ETC1_BUILD_DEBUG
+               {
+                  const uint inten = (x >> 1) & 7;
+                  const uint selector = (x >> 4) & 3;
+                  const uint p0 = (x >> 8) & 255;
+                  RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
+               }
+#endif
+
+               const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
+               uint16 p1 = pInverse_table[c1];
+               uint16 p2 = pInverse_table[c2];
+
+               if ((diff) && (pBase_color5_unscaled))
+               {
+                  int delta1 = (p1 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i]]);
+                  int delta2 = (p2 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i + 1]]);
+                  if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax))
+                  {
+                     if (*pTable == 0xFFFF)
+                        break;
+                     continue;
+                  }
+               }
+
+               const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
+               if (trial_error < best_error)
+               {
+                  best_error = trial_error;
+                  best_x = x;
+                  best_packed_c1 = p1 & 0xFF;
+                  best_packed_c2 = p2 & 0xFF;
+                  best_i = i;
+                  if (!best_error)
+                     goto found_perfect_match;
+               }
+            } while (*pTable != 0xFFFF);
+         }
+      }
+found_perfect_match:
+
+      if (best_error == cUINT32_MAX)
+         return best_error;
+
+      best_error *= num_colors;
+
+      results.m_n = num_colors;
+      results.m_block_color4 = !(best_x & 1);
+      results.m_block_inten_table = (best_x >> 1) & 7;
+      memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors);
+
+      const uint best_packed_c0 = (best_x >> 8) & 255;
+      results.m_block_color_unscaled[best_i] = static_cast<uint8>(best_packed_c0);
+      results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1);
+      results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast<uint8>(best_packed_c2);
+      results.m_error = best_error;
+      
+      return best_error;
+   }
+
+   // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555.
+   static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block)
+   {
+      int err[8],*ep1 = err,*ep2 = err+4;
+      uint8 *quant = g_quant5_tab+8;
+
+      memset(dest, 0xFF, sizeof(color_quad_u8)*16);
+
+      // process channels seperately
+      for(int ch=0;ch<3;ch++)
+      {
+         uint8* bp = (uint8*)block;
+         uint8* dp = (uint8*)dest;
+
+         bp += ch; dp += ch;
+
+         memset(err,0, sizeof(err));
+         for(int y = 0; y < 4; y++)
+         {
+            // pixel 0
+            dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
+            ep1[0] = bp[ 0] - dp[ 0];
+
+            // pixel 1
+            dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
+            ep1[1] = bp[ 4] - dp[ 4];
+
+            // pixel 2
+            dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
+            ep1[2] = bp[ 8] - dp[ 8];
+
+            // pixel 3
+            dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
+            ep1[3] = bp[12] - dp[12];
+
+            // advance to next line
+            int* tmp = ep1; ep1 = ep2; ep2 = tmp;
+            bp += 16;
+            dp += 16;
+         }
+      }
+   }
+
+   unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params)
+   {
+      const color_quad_u8* pSrc_pixels = reinterpret_cast<const color_quad_u8*>(pSrc_pixels_rgba);
+      etc1_block& dst_block = *static_cast<etc1_block*>(pETC1_block);
+
+#ifdef RG_ETC1_BUILD_DEBUG
+      // Ensure all alpha values are 0xFF.
+      for (uint i = 0; i < 16; i++)
+      {
+         //RG_ETC1_ASSERT(pSrc_pixels[i].a == 255);
+      }
+#endif
+
+      color_quad_u8 src_pixel0(pSrc_pixels[0]);
+
+      // Check for solid block.
+      const uint32 first_pixel_u32 = pSrc_pixels->m_u32;
+      int r;
+      for (r = 15; r >= 1; --r)
+         if (pSrc_pixels[r].m_u32 != first_pixel_u32)
+            break;
+      //if (!r)
+      //   return static_cast<unsigned int>(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params));
+
+      
+      color_quad_u8 dithered_pixels[16];
+      if (pack_params.m_dithering)
+      {
+         dither_block_555(dithered_pixels, pSrc_pixels);
+         pSrc_pixels = dithered_pixels;
+      }
+
+      etc1_optimizer optimizer;
+
+      uint64 best_error = cUINT64_MAX;
+      uint best_flip = false, best_use_color4 = false;
+      
+      uint8 best_selectors[2][8];
+      etc1_optimizer::results best_results[2];
+      for (uint i = 0; i < 2; i++)
+      {
+         best_results[i].m_n = 8;
+         best_results[i].m_pSelectors = best_selectors[i];
+      }
+      
+      uint8 selectors[3][8];
+      etc1_optimizer::results results[3];
+      
+      for (uint i = 0; i < 3; i++)
+      {
+         results[i].m_n = 8;
+         results[i].m_pSelectors = selectors[i];
+      }
+            
+      color_quad_u8 subblock_pixels[8];
+
+      etc1_optimizer::params params(pack_params);
+      params.m_num_src_pixels = 8;
+      params.m_pSrc_pixels = subblock_pixels;
+
+      for (uint flip = 0; flip < 2; flip++)
+      {
+         for (uint use_color4 = 0; use_color4 < 2; use_color4++)
+         {
+            uint64 trial_error = 0;
+
+            uint subblock;
+            for (subblock = 0; subblock < 2; subblock++)
+            {
+               if (flip)
+                  memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8);
+               else
+               {
+                  const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2;
+                  subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12];
+                  subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13];
+               }
+
+               results[2].m_error = cUINT64_MAX;
+               if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4)))
+               {
+                  const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32;
+                  for (r = 7; r >= 1; --r)
+                     if (subblock_pixels[r].m_u32 != subblock_pixel0_u32)
+                        break;
+                  if (!r)
+                  {
+                  //   pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL);
+                  }
+               }
+
+               params.m_use_color4 = (use_color4 != 0);
+               params.m_constrain_against_base_color5 = false;
+
+               if ((!use_color4) && (subblock))
+               {
+                  params.m_constrain_against_base_color5 = true;
+                  params.m_base_color5 = results[0].m_block_color_unscaled;
+               }
+                              
+               if (params.m_quality == cHighQuality)
+               {
+                  static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 };
+                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4);
+                  params.m_pScan_deltas = s_scan_delta_0_to_4;
+               }
+               else if (params.m_quality == cMediumQuality)
+               {
+                  static const int s_scan_delta_0_to_1[] = { -1, 0, 1 };
+                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1);
+                  params.m_pScan_deltas = s_scan_delta_0_to_1;
+               }
+               else
+               {
+                  static const int s_scan_delta_0[] = { 0 };
+                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0);
+                  params.m_pScan_deltas = s_scan_delta_0;
+               }
+               
+               optimizer.init(params, results[subblock]);
+               if (!optimizer.compute())
+                  break;
+                              
+               if (params.m_quality >= cMediumQuality)
+               {
+                  // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions.
+                  const uint refinement_error_thresh0 = 3000;
+                  const uint refinement_error_thresh1 = 6000;
+                  if (results[subblock].m_error > refinement_error_thresh0)
+                  {
+                     if (params.m_quality == cMediumQuality)
+                     {
+                        static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 };
+                        params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3);
+                        params.m_pScan_deltas = s_scan_delta_2_to_3;
+                     }
+                     else
+                     {
+                        static const int s_scan_delta_5_to_5[] = { -5, 5 };
+                        static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 };
+                        if (results[subblock].m_error > refinement_error_thresh1)
+                        {
+                           params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8);
+                           params.m_pScan_deltas = s_scan_delta_5_to_8;
+                        }
+                        else
+                        {
+                           params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5);
+                           params.m_pScan_deltas = s_scan_delta_5_to_5;
+                        }
+                     }
+
+                     if (!optimizer.compute())
+                        break;
+                  }
+
+                  if (results[2].m_error < results[subblock].m_error)
+                     results[subblock] = results[2];
+               }
+                            
+               trial_error += results[subblock].m_error;
+               if (trial_error >= best_error)
+                  break;
+            }
+
+            if (subblock < 2)
+               continue;
+
+            best_error = trial_error;
+            best_results[0] = results[0];
+            best_results[1] = results[1];
+            best_flip = flip;
+            best_use_color4 = use_color4;
+            
+         } // use_color4
+
+      } // flip
+
+      int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r;
+      int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g;
+      int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b;
+      RG_ETC1_ASSERT(best_use_color4 || (rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax));
+           
+      if (best_use_color4)
+      {
+         dst_block.m_bytes[0] = static_cast<uint8>(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4));
+         dst_block.m_bytes[1] = static_cast<uint8>(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4));
+         dst_block.m_bytes[2] = static_cast<uint8>(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4));
+      }
+      else
+      {
+         if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast<uint8>((best_results[0].m_block_color_unscaled.r << 3) | dr);
+         if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast<uint8>((best_results[0].m_block_color_unscaled.g << 3) | dg);
+         if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast<uint8>((best_results[0].m_block_color_unscaled.b << 3) | db);
+      }
+      
+      dst_block.m_bytes[3] = static_cast<uint8>( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip );
+      
+      uint selector0 = 0, selector1 = 0;
+      if (best_flip)
+      {
+         // flipped:
+         // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },               
+         // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } 
+         //
+         // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
+         // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
+         const uint8* pSelectors0 = best_results[0].m_pSelectors;
+         const uint8* pSelectors1 = best_results[1].m_pSelectors;
+         for (int x = 3; x >= 0; --x)
+         {
+            uint b;
+            b = g_selector_index_to_etc1[pSelectors1[4 + x]];
+            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+            b = g_selector_index_to_etc1[pSelectors1[x]];
+            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+            b = g_selector_index_to_etc1[pSelectors0[4 + x]];
+            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+            b = g_selector_index_to_etc1[pSelectors0[x]];
+            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+         }
+      }
+      else
+      {
+         // non-flipped:
+         // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
+         // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
+         //
+         // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
+         // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
+         for (int subblock = 1; subblock >= 0; --subblock)
+         {
+            const uint8* pSelectors = best_results[subblock].m_pSelectors + 4;
+            for (uint i = 0; i < 2; i++)
+            {
+               uint b;
+               b = g_selector_index_to_etc1[pSelectors[3]];
+               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+               b = g_selector_index_to_etc1[pSelectors[2]];
+               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+               b = g_selector_index_to_etc1[pSelectors[1]];
+               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+               b = g_selector_index_to_etc1[pSelectors[0]];
+               selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1);
+
+               pSelectors -= 4;
+            }
+         }
+      }
+                  
+      dst_block.m_bytes[4] = static_cast<uint8>(selector1 >> 8); dst_block.m_bytes[5] = static_cast<uint8>(selector1 & 0xFF);
+      dst_block.m_bytes[6] = static_cast<uint8>(selector0 >> 8); dst_block.m_bytes[7] = static_cast<uint8>(selector0 & 0xFF);
+
+      return static_cast<unsigned int>(best_error);
+   }
+
+} // namespace rg_etc1
diff --git a/extern/rg_etc1_v104/rg_etc1.h b/extern/rg_etc1_v104/rg_etc1.h
new file mode 100644
index 0000000..9a70150
--- /dev/null
+++ b/extern/rg_etc1_v104/rg_etc1.h
@@ -0,0 +1,76 @@
+// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
+// Please see ZLIB license at the end of this file.
+#pragma once
+
+namespace rg_etc1
+{
+   // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels.
+   // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping.
+   // This function is thread safe, and does not dynamically allocate any memory.
+   // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255.
+   bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false);
+
+   // Quality setting = the higher the quality, the slower. 
+   // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality).
+   enum etc1_quality
+   { 
+      cLowQuality,
+      cMediumQuality,
+      cHighQuality,
+   };
+      
+   struct etc1_pack_params
+   {
+      etc1_quality m_quality;
+      bool m_dithering;
+                              
+      inline etc1_pack_params() 
+      {
+         clear();
+      }
+
+      void clear()
+      {
+         m_quality = cHighQuality;
+         m_dithering = false;
+      }
+   };
+
+   // Important: pack_etc1_block_init() must be called before calling pack_etc1_block().
+   void pack_etc1_block_init();
+
+   // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block.
+   // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255.
+   // Returns squared error of result.
+   // This function is thread safe, and does not dynamically allocate any memory.
+   // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE.
+   unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params);
+            
+} // namespace rg_etc1
+
+//------------------------------------------------------------------------------
+//
+// rg_etc1 uses the ZLIB license:
+// http://opensource.org/licenses/Zlib
+//
+// Copyright (c) 2012 Rich Geldreich
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source distribution.
+//
+//------------------------------------------------------------------------------