From 265dbf33c3743d8928f0243846e65584209646b6 Mon Sep 17 00:00:00 2001
From: castano <castano@95f4ed2b-212e-0410-8b90-d31948207fce>
Date: Mon, 19 Apr 2010 19:40:38 +0000
Subject: [PATCH] Better estimate of principle component in CUDA. Fixes issue
 120.

---
 src/nvtt/cuda/CudaMath.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/nvtt/cuda/CudaMath.h b/src/nvtt/cuda/CudaMath.h
index 50af320..c6c0d06 100644
--- a/src/nvtt/cuda/CudaMath.h
+++ b/src/nvtt/cuda/CudaMath.h
@@ -127,8 +127,21 @@ inline __device__ __host__ float3 normalize(float3 v)
 inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
 {
 	// 8 iterations seems to be more than enough.
+
+	float3 row0 = make_float3(matrix[0], matrix[1], matrix[2]);
+	float3 row1 = make_float3(matrix[1], matrix[3], matrix[4]);
+	float3 row2 = make_float3(matrix[2], matrix[4], matrix[5]);
 
-	float3 v = make_float3(1.0f, 1.0f, 1.0f);
+	float r0 = dot(row0, row0);
+	float r1 = dot(row1, row1);
+	float r2 = dot(row2, row2);
+
+	float3 v;
+	if (r0 > r1 && r0 > r2) v = row0;
+	else if (r1 > r2) v = row1;
+	else v = row2;
+
+	//float3 v = make_float3(1.0f, 1.0f, 1.0f);
 	for(int i = 0; i < 8; i++) {
 		float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
 		float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];