Fix errors in new cluster fit compressor.

2012-01-02 08:49:13 +00:00
parent 23bfc1b514
commit f2d90ee844
16 changed files with 677 additions and 632 deletions
--- a/src/nvtt/ClusterFit.cpp
+++ b/src/nvtt/ClusterFit.cpp
@ -49,7 +49,7 @@ void ClusterFit::setColourSet(const ColorSet * set)
 #endif

    // cache some values
-    m_count = set->count;
+    m_count = set->colorCount;

    Vector3 values[16];
    for (uint i = 0; i < m_count; i++)
@ -148,7 +148,7 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
    SimdVector besterror = SimdVector( FLT_MAX );

    SimdVector x0 = zero;
-	
+
    int b0 = 0, b1 = 0;

    // check all possible clusters for this total order
@ -191,22 +191,22 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
            SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
            SimdVector e4 = multiplyAdd( two, e3, e1 );

-	    // apply the metric to the error term
-	    SimdVector e5 = e4 * m_metricSqr;
-	    SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
+            // apply the metric to the error term
+            SimdVector e5 = e4 * m_metricSqr;
+            SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();

-	    // keep the solution if it wins
-	    if( compareAnyLessThan( error, besterror ) )
-	    {
-		besterror = error;
-		beststart = a;
-		bestend = b;
-		b0 = c0;
-		b1 = c1;
-	    }
+            // keep the solution if it wins
+            if( compareAnyLessThan( error, besterror ) )
+            {
+                besterror = error;
+                beststart = a;
+                bestend = b;
+                b0 = c0;
+                b1 = c1;
+            }

-	    x1 += m_weighted[c0+c1];
-	}
+            x1 += m_weighted[c0+c1];
+        }

        x0 += m_weighted[c0];
    }
@ -218,8 +218,8 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
        *start = beststart.toVector3();
        *end = bestend.toVector3();

-	// save the error
-	m_besterror = besterror;
+        // save the error
+        m_besterror = besterror;

        return true;
    }
@ -308,10 +308,10 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
                }

                x2 += m_weighted[c0+c1+c2];
-	    }
+            }

-	    x1 += m_weighted[c0+c1];
-	}
+            x1 += m_weighted[c0+c1];
+        }

        x0 += m_weighted[c0];
    }
@ -321,9 +321,9 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
    {
        *start = beststart.toVector3();
        *end = bestend.toVector3();
-		
-	// save the error
-	m_besterror = besterror;
+
+        // save the error
+        m_besterror = besterror;

        return true;
    }
@ -404,12 +404,12 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
    // save the block if necessary
    if( besterror < m_besterror )
    {
-		
+
        *start = beststart;
        *end = bestend;

-	// save the error
-	m_besterror = besterror;
+        // save the error
+        m_besterror = besterror;

        return true;
    }
@ -420,8 +420,8 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
 bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
 {
    const uint count = m_count;
-    Vector3 const grid( 31.0f, 63.0f, 31.0f );
-    Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
+    const Vector3 grid( 31.0f, 63.0f, 31.0f );
+    const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );

    // declare variables
    Vector3 beststart( 0.0f );
--- a/src/nvtt/QuickCompressDXT.cpp
+++ b/src/nvtt/QuickCompressDXT.cpp
@ -179,8 +179,13 @@ inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor,
 	palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
 	
 	uint indices = 0;
-	for(int i = 0; i < 16; i++)
+    for(int i = 0; i < 16; i++)
 	{
+        if (!set.isValidIndex(i)) {
+            // Skip masked pixels and out of bounds.
+            continue;
+        }
+
        Vector3 color = set.color(i).xyz();

 		float d0 = colorDistance(palette[0], color);
@ -237,16 +242,20 @@ inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor,
 	uint indices = 0;
 	for(int i = 0; i < 16; i++)
 	{
+        if (!set.isValidIndex(i)) {
+            // Skip masked pixels and out of bounds.
+            indices |= 3 << (2 * i);
+            continue;
+        }
+
        Vector3 color = set.color(i).xyz();
-		float alpha = set.color(i).w;
 		
 		float d0 = colorDistance(palette[0], color);
 		float d1 = colorDistance(palette[1], color);
 		float d2 = colorDistance(palette[2], color);
 		
 		uint index;
-		if (alpha == 0) index = 3;
-		else if (d0 < d1 && d0 < d2) index = 0;
+		if (d0 < d1 && d0 < d2) index = 0;
 		else if (d1 < d2) index = 1;
 		else index = 2;
 		
--- a/src/nvtt/tools/imgdiff.cpp
+++ b/src/nvtt/tools/imgdiff.cpp
@ -72,11 +72,11 @@ struct Error
 		mse = 0.0f;
 	}

-	void addSample(float e)
+	void addSample(double e)
 	{
 		samples++;
-		mabse += fabsf(e);
-		maxabse = nv::max(maxabse, fabsf(e));
+		mabse += fabs(e);
+		maxabse = nv::max(maxabse, fabs(e));
 		mse += e * e;
 	}

@ -84,8 +84,8 @@ struct Error
 	{
 		mabse /= samples;
 		mse /= samples;
-		rmse = sqrtf(mse);
-		psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse);
+		rmse = sqrt(mse);
+		psnr = (rmse == 0) ? 999.0 : 20.0 * log10(255.0 / rmse);
 	}

 	void print()
@ -97,11 +97,11 @@ struct Error
 	}

 	int samples;
-	float mabse;
-	float maxabse;
-	float mse;
-	float rmse;
-	float psnr;
+	double mabse;
+	double maxabse;
+	double mse;
+	double rmse;
+	double psnr;
 };

 struct NormalError
@ -230,10 +230,10 @@ int main(int argc, char *argv[])
 			const nv::Color32 c0(image0.pixel(e, i));
 			const nv::Color32 c1(image1.pixel(e, i));

-			float r = float(c0.r - c1.r);
-			float g = float(c0.g - c1.g);
-			float b = float(c0.b - c1.b);
-			float a = float(c0.a - c1.a);
+			double r = float(c0.r - c1.r);
+			double g = float(c0.g - c1.g);
+			double b = float(c0.b - c1.b);
+			double a = float(c0.a - c1.a);

 			error_r.addSample(r);
 			error_g.addSample(g);
@ -247,9 +247,9 @@ int main(int argc, char *argv[])

 			if (compareAlpha)
 			{
-				error_total.addSample(r * c0.a / 255.0f);
-				error_total.addSample(g * c0.a / 255.0f);
-				error_total.addSample(b * c0.a / 255.0f);
+				error_total.addSample(r * c0.a / 255.0);
+				error_total.addSample(g * c0.a / 255.0);
+				error_total.addSample(b * c0.a / 255.0);
 			}
 			else
 			{