From f2d90ee844e89e6f5e6b4c75a6ea18c65f8c60e4 Mon Sep 17 00:00:00 2001 From: castano Date: Mon, 2 Jan 2012 08:49:13 +0000 Subject: [PATCH] Fix errors in new cluster fit compressor. --- project/vc9/nvcompress/nvcompress.vcproj | 16 +- project/vc9/nvtt.sln | 414 ++++++++++++----------- project/vc9/nvtt/nvtt.vcproj | 88 ++--- src/nvimage/ColorBlock.cpp | 140 +++++--- src/nvimage/ColorBlock.h | 22 +- src/nvmath/Box.h | 2 +- src/nvmath/Box.inl | 2 +- src/nvmath/Matrix.h | 30 +- src/nvmath/Matrix.inl | 190 +++++------ src/nvmath/Plane.h | 4 +- src/nvmath/Plane.inl | 4 +- src/nvmath/Vector.h | 54 ++- src/nvmath/Vector.inl | 94 ++--- src/nvtt/ClusterFit.cpp | 58 ++-- src/nvtt/QuickCompressDXT.cpp | 17 +- src/nvtt/tools/imgdiff.cpp | 34 +- 16 files changed, 607 insertions(+), 562 deletions(-) diff --git a/project/vc9/nvcompress/nvcompress.vcproj b/project/vc9/nvcompress/nvcompress.vcproj index db204ba..44c07b2 100644 --- a/project/vc9/nvcompress/nvcompress.vcproj +++ b/project/vc9/nvcompress/nvcompress.vcproj @@ -20,7 +20,7 @@ diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp index 2087e85..ddf02cb 100644 --- a/src/nvimage/ColorBlock.cpp +++ b/src/nvimage/ColorBlock.cpp @@ -461,15 +461,30 @@ float ColorBlock::volume() const }*/ +void ColorSet::allocate(uint w, uint h) +{ + nvDebugCheck(w <= 4 && h <= 4); + + this->colorCount = w * h; + this->indexCount = 16; + this->w = 4; + this->h = 4; + + //colors = new Vector4[colorCount]; + //weights = new float[colorCount]; + //indices = new int[indexCount]; +} + +// Allocate 4x4 block and fill with void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y) { nvDebugCheck(img_x < img_w && img_y < img_h); - w = min(4U, img_w - img_x); - h = min(4U, img_h - img_y); - nvDebugCheck(w != 0 && h != 0); + const uint block_w = min(4U, img_w - img_x); + const uint block_h = min(4U, img_h - img_y); + nvDebugCheck(block_w != 0 && block_h != 0); - count = w * h; + allocate(block_w, block_h); const float * r = data + img_w * img_h * 0; const float * g = data + img_w * img_h * 1; @@ -477,9 +492,9 @@ void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x, const float * a = data + img_w * img_h * 3; // Set colors. - for (uint y = 0, i = 0; y < h; y++) + for (uint y = 0, i = 0; y < block_h; y++) { - for (uint x = 0; x < w; x++, i++) + for (uint x = 0; x < block_w; x++, i++) { uint idx = x + img_x + (y + img_y) * img_w; colors[i].x = r[idx]; @@ -488,11 +503,25 @@ void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x, colors[i].w = a[idx]; } } + + // Set default indices. + for (uint y = 0, i = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + if (x < block_w && y < block_h) { + indices[y*4+x] = i++; + } + else { + indices[y*4+x] = -1; + } + } + } } void ColorSet::setAlphaWeights() { - for (uint i = 0; i < count; i++) + for (uint i = 0; i < colorCount; i++) { weights[i] = max(colors[i].w, 0.001f); // Avoid division by zero. } @@ -500,72 +529,71 @@ void ColorSet::setAlphaWeights() void ColorSet::setUniformWeights() { - for (uint i = 0; i < count; i++) + for (uint i = 0; i < colorCount; i++) { weights[i] = 1.0f; } } +// @@ Handle complex blocks (not 4x4). void ColorSet::createMinimalSet(bool ignoreTransparent) { - nvDebugCheck(count == w*h); // Do not call this method multiple times. + nvDebugCheck(colorCount <= 16); Vector4 C[16]; float W[16]; - memcpy(C, colors, sizeof(Vector4)*count); - memcpy(W, weights, sizeof(float)*count); + memcpy(C, colors, sizeof(Vector4)*colorCount); + memcpy(W, weights, sizeof(float)*colorCount); uint n = 0; - for (uint y = 0, i = 0; y < h; y++) + for (uint i = 0; i < indexCount; i++) { - for (uint x = 0; x < w; x++, i++) - { - if (ignoreTransparent && C[i].w == 0) { - continue; - } + if (indices[i] < 0) { + continue; + } + + Vector4 ci = C[indices[i]]; + float wi = W[indices[i]]; + + if (ignoreTransparent && ci.w == 0) { + indices[i] = -1; + continue; + } - uint idx = y * 4 + x; - - // loop over previous points for a match - for (int j = 0; ; j++) - { - // allocate a new point - if (j == i) - { - colors[n] = C[i]; - weights[n] = W[i]; - remap[idx] = n; - n++; - break; - } - - // check for a match - bool colorMatch = (C[i].x == C[j].x) && (C[i].w == C[j].w) && (C[i].z == C[j].z); - //bool alphaMatch = (C[i].w == C[j].w); - - if (colorMatch) - { - // get the index of the match - int index = remap[j]; - - // map to this point and increase the weight - weights[index] += W[i]; - remap[idx] = index; - break; - } + // Find matching color. + uint j; + for (j = 0; j < n; j++) { + bool colorMatch = equal(colors[j].x, ci.x) && equal(colors[j].y, ci.y) && equal(colors[j].z, ci.z); + //bool alphaMatch = equal(colors[j].w, ci.w); + + if (colorMatch) { + weights[j] += wi; + indices[i] = j; + break; } } + + // No match found. Add new color. + if (j == n) { + colors[n] = ci; + weights[n] = wi; + indices[i] = n; + n++; + } + } + nvDebugCheck(n != 0); + + for (uint i = n; i < colorCount; i++) { + weights[i] = 0; } - count = n; + colorCount = n; // Avoid empty blocks. - if (count == 0) { - count = 1; - //colors[0] = C[0]; - //weights[0] = W[0]; - memset(remap, 0, sizeof(int)*16); + if (colorCount == 0) { + colorCount = 1; + indices[0] = 0; } } @@ -578,7 +606,7 @@ void ColorSet::wrapIndices() uint base = (y % h) * w; for (uint x = w; x < 4; x++) { - remap[y*4+3] = remap[base + (x % w)]; + indices[y*4+3] = indices[base + (x % w)]; } } } @@ -588,7 +616,7 @@ bool ColorSet::isSingleColor(bool ignoreAlpha) const Vector4 v = colors[0]; if (ignoreAlpha) v.w = 1.0f; - for (uint i = 1; i < count; i++) + for (uint i = 1; i < colorCount; i++) { Vector4 c = colors[i]; if (ignoreAlpha) c.w = 1.0f; @@ -615,7 +643,7 @@ static inline float component(Vector4::Arg c, uint i) void ColorSet::swizzle(uint x, uint y, uint z, uint w) { - for (uint i = 0; i < count; i++) + for (uint i = 0; i < colorCount; i++) { Vector4 c = colors[i]; colors[i].x = component(c, x); @@ -627,7 +655,7 @@ void ColorSet::swizzle(uint x, uint y, uint z, uint w) bool ColorSet::hasAlpha() const { - for (uint i = 0; i < count; i++) + for (uint i = 0; i < colorCount; i++) { if (colors[i].w != 0.0f) return true; } diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h index ea0aaec..6541fa8 100644 --- a/src/nvimage/ColorBlock.h +++ b/src/nvimage/ColorBlock.h @@ -83,6 +83,11 @@ namespace nv struct ColorSet { + ColorSet() : colorCount(0), indexCount(0), w(0), h(0) {} + //~ColorSet() {} + + void allocate(uint w, uint h); + void setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y); void setAlphaWeights(); @@ -97,19 +102,22 @@ namespace nv bool hasAlpha() const; // These methods require indices to be set: - Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[remap[y * 4 + x]]; } - Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[remap[y * 4 + x]]; } + Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[indices[y * 4 + x]]; } + Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[indices[y * 4 + x]]; } - Vector4 color(uint i) const { nvDebugCheck(i < 16); return colors[remap[i]]; } - Vector4 & color(uint i) { nvDebugCheck(i < 16); return colors[remap[i]]; } + Vector4 color(uint i) const { nvDebugCheck(i < indexCount); return colors[indices[i]]; } + Vector4 & color(uint i) { nvDebugCheck(i < indexCount); return colors[indices[i]]; } + bool isValidIndex(uint i) const { return i < indexCount && indices[i] >= 0; } - uint count; - uint w, h; + uint colorCount; + uint indexCount; // Fixed to 16 + uint w, h; // Fixed to 4x4 + // Allocate color set dynamically and add support for sets larger than 4x4. Vector4 colors[16]; float weights[16]; - int remap[16]; + int indices[16]; }; } // nv namespace diff --git a/src/nvmath/Box.h b/src/nvmath/Box.h index dcbfd39..74e4bf3 100644 --- a/src/nvmath/Box.h +++ b/src/nvmath/Box.h @@ -43,7 +43,7 @@ namespace nv Vector3 extents() const; // Return extents of the box. - scalar extents(uint axis) const; + float extents(uint axis) const; // Add a point to this box. void addPointToBounds(const Vector3 & p); diff --git a/src/nvmath/Box.inl b/src/nvmath/Box.inl index 33623e9..9b69828 100644 --- a/src/nvmath/Box.inl +++ b/src/nvmath/Box.inl @@ -56,7 +56,7 @@ namespace nv } // Return extents of the box. - scalar Box::extents(uint axis) const + float Box::extents(uint axis) const { nvDebugCheck(axis < 3); if (axis == 0) return (maxCorner.x - minCorner.x) * 0.5f; diff --git a/src/nvmath/Matrix.h b/src/nvmath/Matrix.h index 1601c3d..3edb8af 100644 --- a/src/nvmath/Matrix.h +++ b/src/nvmath/Matrix.h @@ -19,9 +19,9 @@ namespace nv Matrix3(const Matrix3 & m); Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2); - scalar get(uint row, uint col) const; - scalar operator()(uint row, uint col) const; - scalar & operator()(uint row, uint col); + float get(uint row, uint col) const; + float operator()(uint row, uint col) const; + float & operator()(uint row, uint col); Vector3 row(uint i) const; Vector3 column(uint i) const; @@ -34,7 +34,7 @@ namespace nv float determinant() const; private: - scalar m_data[9]; + float m_data[9]; }; @@ -52,28 +52,28 @@ namespace nv explicit Matrix(identity_t); Matrix(const Matrix & m); Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3); - //explicit Matrix(const scalar m[]); // m is assumed to contain 16 elements + //explicit Matrix(const float m[]); // m is assumed to contain 16 elements - scalar data(uint idx) const; - scalar & data(uint idx); - scalar get(uint row, uint col) const; - scalar operator()(uint row, uint col) const; - scalar & operator()(uint row, uint col); - const scalar * ptr() const; + float data(uint idx) const; + float & data(uint idx); + float get(uint row, uint col) const; + float operator()(uint row, uint col) const; + float & operator()(uint row, uint col); + const float * ptr() const; Vector4 row(uint i) const; Vector4 column(uint i) const; - void scale(scalar s); + void scale(float s); void scale(Vector3::Arg s); void translate(Vector3::Arg t); - void rotate(scalar theta, scalar v0, scalar v1, scalar v2); - scalar determinant() const; + void rotate(float theta, float v0, float v1, float v2); + float determinant() const; void apply(Matrix::Arg m); private: - scalar m_data[16]; + float m_data[16]; }; } // nv namespace diff --git a/src/nvmath/Matrix.inl b/src/nvmath/Matrix.inl index dd8b84d..9ef3f10 100644 --- a/src/nvmath/Matrix.inl +++ b/src/nvmath/Matrix.inl @@ -40,17 +40,17 @@ namespace nv m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z; } - inline scalar Matrix3::get(uint row, uint col) const + inline float Matrix3::get(uint row, uint col) const { nvDebugCheck(row < 3 && col < 3); return m_data[col * 3 + row]; } - inline scalar Matrix3::operator()(uint row, uint col) const + inline float Matrix3::operator()(uint row, uint col) const { nvDebugCheck(row < 3 && col < 3); return m_data[col * 3 + row]; } - inline scalar & Matrix3::operator()(uint row, uint col) + inline float & Matrix3::operator()(uint row, uint col) { nvDebugCheck(row < 3 && col < 3); return m_data[col * 3 + row]; @@ -136,7 +136,7 @@ namespace nv Matrix3 m; for(int i = 0; i < 3; i++) { - const scalar ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2); + const float ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2); m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0); m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1); m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2); @@ -198,7 +198,7 @@ namespace nv m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w; } - /*inline Matrix::Matrix(const scalar m[]) + /*inline Matrix::Matrix(const float m[]) { for(int i = 0; i < 16; i++) { m_data[i] = m[i]; @@ -207,33 +207,33 @@ namespace nv // Accessors - inline scalar Matrix::data(uint idx) const + inline float Matrix::data(uint idx) const { nvDebugCheck(idx < 16); return m_data[idx]; } - inline scalar & Matrix::data(uint idx) + inline float & Matrix::data(uint idx) { nvDebugCheck(idx < 16); return m_data[idx]; } - inline scalar Matrix::get(uint row, uint col) const + inline float Matrix::get(uint row, uint col) const { nvDebugCheck(row < 4 && col < 4); return m_data[col * 4 + row]; } - inline scalar Matrix::operator()(uint row, uint col) const + inline float Matrix::operator()(uint row, uint col) const { nvDebugCheck(row < 4 && col < 4); return m_data[col * 4 + row]; } - inline scalar & Matrix::operator()(uint row, uint col) + inline float & Matrix::operator()(uint row, uint col) { nvDebugCheck(row < 4 && col < 4); return m_data[col * 4 + row]; } - inline const scalar * Matrix::ptr() const + inline const float * Matrix::ptr() const { return m_data; } @@ -251,7 +251,7 @@ namespace nv } // Apply scale. - inline void Matrix::scale(scalar s) + inline void Matrix::scale(float s) { m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s; m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s; @@ -276,10 +276,10 @@ namespace nv m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15]; } - Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2); + Matrix rotation(float theta, float v0, float v1, float v2); // Apply rotation. - inline void Matrix::rotate(scalar theta, scalar v0, scalar v1, scalar v2) + inline void Matrix::rotate(float theta, float v0, float v1, float v2) { Matrix R(rotation(theta, v0, v1, v2)); apply(R); @@ -291,7 +291,7 @@ namespace nv nvDebugCheck(this != &m); for(int i = 0; i < 4; i++) { - const scalar ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); + const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0); m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1); m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2); @@ -310,7 +310,7 @@ namespace nv } // Get scale matrix. - inline Matrix scale(scalar s) + inline Matrix scale(float s) { Matrix m(identity); m(0,0) = m(1,1) = m(2,2) = s; @@ -328,10 +328,10 @@ namespace nv } // Get rotation matrix. - inline Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2) + inline Matrix rotation(float theta, float v0, float v1, float v2) { - scalar cost = cosf(theta); - scalar sint = sinf(theta); + float cost = cosf(theta); + float sint = sinf(theta); Matrix m(identity); @@ -348,18 +348,18 @@ namespace nv m(0,1) = sint; m(1,1) = cost; } else { - scalar a2, b2, c2; + float a2, b2, c2; a2 = v0 * v0; b2 = v1 * v1; c2 = v2 * v2; - scalar iscale = 1.0f / sqrtf(a2 + b2 + c2); + float iscale = 1.0f / sqrtf(a2 + b2 + c2); v0 *= iscale; v1 *= iscale; v2 *= iscale; - scalar abm, acm, bcm; - scalar mcos, asin, bsin, csin; + float abm, acm, bcm; + float mcos, asin, bsin, csin; mcos = 1.0f - cost; abm = v0 * v1 * mcos; acm = v0 * v2 * mcos; @@ -380,18 +380,18 @@ namespace nv return m; } - //Matrix rotation(scalar yaw, scalar pitch, scalar roll); - //Matrix skew(scalar angle, Vector3::Arg v1, Vector3::Arg v2); + //Matrix rotation(float yaw, float pitch, float roll); + //Matrix skew(float angle, Vector3::Arg v1, Vector3::Arg v2); // Get frustum matrix. - inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar) + inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar) { Matrix m(0.0f); - scalar doubleznear = 2.0f * zNear; - scalar one_deltax = 1.0f / (xmax - xmin); - scalar one_deltay = 1.0f / (ymax - ymin); - scalar one_deltaz = 1.0f / (zFar - zNear); + float doubleznear = 2.0f * zNear; + float one_deltax = 1.0f / (xmax - xmin); + float one_deltay = 1.0f / (ymax - ymin); + float one_deltaz = 1.0f / (zFar - zNear); m(0,0) = doubleznear * one_deltax; m(1,1) = doubleznear * one_deltay; @@ -405,14 +405,14 @@ namespace nv } // Get infinite frustum matrix. - inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear) + inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear) { Matrix m(0.0f); - scalar doubleznear = 2.0f * zNear; - scalar one_deltax = 1.0f / (xmax - xmin); - scalar one_deltay = 1.0f / (ymax - ymin); - scalar nudge = 1.0; // 0.999; + float doubleznear = 2.0f * zNear; + float one_deltax = 1.0f / (xmax - xmin); + float one_deltay = 1.0f / (ymax - ymin); + float nudge = 1.0; // 0.999; m(0,0) = doubleznear * one_deltax; m(1,1) = doubleznear * one_deltay; @@ -426,27 +426,27 @@ namespace nv } // Get perspective matrix. - inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear, scalar zFar) + inline Matrix perspective(float fovy, float aspect, float zNear, float zFar) { - scalar xmax = zNear * tan(fovy / 2); - scalar xmin = -xmax; + float xmax = zNear * tan(fovy / 2); + float xmin = -xmax; - scalar ymax = xmax / aspect; - scalar ymin = -ymax; + float ymax = xmax / aspect; + float ymin = -ymax; return frustum(xmin, xmax, ymin, ymax, zNear, zFar); } // Get infinite perspective matrix. - inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear) + inline Matrix perspective(float fovy, float aspect, float zNear) { - scalar x = zNear * tan(fovy / 2); - scalar y = x / aspect; + float x = zNear * tan(fovy / 2); + float y = x / aspect; return frustum( -x, x, -y, y, zNear ); } // Get matrix determinant. - inline scalar Matrix::determinant() const + inline float Matrix::determinant() const { return m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] + @@ -563,9 +563,9 @@ void TranslationMatrix(const Vec3 & v) { } /** Rotate theta degrees around v. */ -void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) { - scalar cost = cos(theta); - scalar sint = sin(theta); +void RotationMatrix( float theta, float v0, float v1, float v2 ) { + float cost = cos(theta); + float sint = sin(theta); if( 1 == v0 && 0 == v1 && 0 == v2 ) { data[0] = 1.0f; data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; @@ -587,18 +587,18 @@ void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) { } else { //we need scale a,b,c to unit length. - scalar a2, b2, c2; + float a2, b2, c2; a2 = v0 * v0; b2 = v1 * v1; c2 = v2 * v2; - scalar iscale = 1.0f / sqrtf(a2 + b2 + c2); + float iscale = 1.0f / sqrtf(a2 + b2 + c2); v0 *= iscale; v1 *= iscale; v2 *= iscale; - scalar abm, acm, bcm; - scalar mcos, asin, bsin, csin; + float abm, acm, bcm; + float mcos, asin, bsin, csin; mcos = 1.0f - cost; abm = v0 * v1 * mcos; acm = v0 * v2 * mcos; @@ -626,7 +626,7 @@ void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) { } /* -void SkewMatrix(scalar angle, const Vec3 & v1, const Vec3 & v2) { +void SkewMatrix(float angle, const Vec3 & v1, const Vec3 & v2) { v1.Normalize(); v2.Normalize(); @@ -635,9 +635,9 @@ v3.Cross(v1, v2); v3.Normalize(); // Get skew factor. -scalar costheta = Vec3DotProduct(v1, v2); -scalar sintheta = Real.Sqrt(1 - costheta * costheta); -scalar skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta; +float costheta = Vec3DotProduct(v1, v2); +float sintheta = Real.Sqrt(1 - costheta * costheta); +float skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta; // Build orthonormal matrix. v1 = FXVector3.Cross(v3, v2); @@ -669,13 +669,13 @@ return R * S * R.Transpose; // Not sure this is in the correct order... * * @todo Have to recompute this code for our new convention. **/ -void RotationMatrix( scalar yaw, scalar pitch, scalar roll ) { - scalar sy = sin(yaw+ToRadian(90)); - scalar cy = cos(yaw+ToRadian(90)); - scalar sp = sin(pitch-ToRadian(90)); - scalar cp = cos(pitch-ToRadian(90)); - scalar sr = sin(roll); - scalar cr = cos(roll); +void RotationMatrix( float yaw, float pitch, float roll ) { + float sy = sin(yaw+ToRadian(90)); + float cy = cos(yaw+ToRadian(90)); + float sp = sin(pitch-ToRadian(90)); + float cp = cos(pitch-ToRadian(90)); + float sr = sin(roll); + float cr = cos(roll); data[0] = cr*cy + sr*sp*sy; data[1] = cp*sy; @@ -699,35 +699,35 @@ void RotationMatrix( scalar yaw, scalar pitch, scalar roll ) { } /** Create a frustum matrix with the far plane at the infinity. */ -void Frustum( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar ) { - scalar one_deltax, one_deltay, one_deltaz, doubleznear; +void Frustum( float xmin, float xmax, float ymin, float ymax, float zNear, float zFar ) { + float one_deltax, one_deltay, one_deltaz, doubleznear; doubleznear = 2.0f * zNear; one_deltax = 1.0f / (xmax - xmin); one_deltay = 1.0f / (ymax - ymin); one_deltaz = 1.0f / (zFar - zNear); - data[0] = (scalar)(doubleznear * one_deltax); + data[0] = (float)(doubleznear * one_deltax); data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; data[4] = 0.0f; - data[5] = (scalar)(doubleznear * one_deltay); + data[5] = (float)(doubleznear * one_deltay); data[6] = 0.f; data[7] = 0.f; - data[8] = (scalar)((xmax + xmin) * one_deltax); - data[9] = (scalar)((ymax + ymin) * one_deltay); - data[10] = (scalar)(-(zFar + zNear) * one_deltaz); + data[8] = (float)((xmax + xmin) * one_deltax); + data[9] = (float)((ymax + ymin) * one_deltay); + data[10] = (float)(-(zFar + zNear) * one_deltaz); data[11] = -1.f; data[12] = 0.f; data[13] = 0.f; - data[14] = (scalar)(-(zFar * doubleznear) * one_deltaz); + data[14] = (float)(-(zFar * doubleznear) * one_deltaz); data[15] = 0.f; } /** Create a frustum matrix with the far plane at the infinity. */ -void FrustumInf( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear ) { - scalar one_deltax, one_deltay, doubleznear, nudge; +void FrustumInf( float xmin, float xmax, float ymin, float ymax, float zNear ) { + float one_deltax, one_deltay, doubleznear, nudge; doubleznear = 2.0f * zNear; one_deltax = 1.0f / (xmax - xmin); @@ -756,8 +756,8 @@ void FrustumInf( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNea } /** Create an inverse frustum matrix with the far plane at the infinity. */ -void FrustumInfInv( scalar left, scalar right, scalar bottom, scalar top, scalar zNear ) { - // this matrix is wrong (not tested scalarly) I think it should be transposed. +void FrustumInfInv( float left, float right, float bottom, float top, float zNear ) { + // this matrix is wrong (not tested floatly) I think it should be transposed. data[0] = (right - left) / (2 * zNear); data[1] = 0; data[2] = 0; @@ -777,8 +777,8 @@ void FrustumInfInv( scalar left, scalar right, scalar bottom, scalar top, scalar } /** Create an homogeneous projection matrix. */ -void Perspective( scalar fov, scalar aspect, scalar zNear, scalar zFar ) { - scalar xmin, xmax, ymin, ymax; +void Perspective( float fov, float aspect, float zNear, float zFar ) { + float xmin, xmax, ymin, ymax; xmax = zNear * tan( fov/2 ); xmin = -xmax; @@ -790,22 +790,22 @@ void Perspective( scalar fov, scalar aspect, scalar zNear, scalar zFar ) { } /** Create a projection matrix with the far plane at the infinity. */ -void PerspectiveInf( scalar fov, scalar aspect, scalar zNear ) { - scalar x = zNear * tan( fov/2 ); - scalar y = x / aspect; +void PerspectiveInf( float fov, float aspect, float zNear ) { + float x = zNear * tan( fov/2 ); + float y = x / aspect; FrustumInf( -x, x, -y, y, zNear ); } /** Create an inverse projection matrix with far plane at the infinity. */ -void PerspectiveInfInv( scalar fov, scalar aspect, scalar zNear ) { - scalar x = zNear * tan( fov/2 ); - scalar y = x / aspect; +void PerspectiveInfInv( float fov, float aspect, float zNear ) { + float x = zNear * tan( fov/2 ); + float y = x / aspect; FrustumInfInv( -x, x, -y, y, zNear ); } /** Build bone matrix from quatertion and offset. */ void BoneMatrix(const Quat & q, const Vec3 & offset) { - scalar x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz; + float x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz; // calculate coefficients x2 = q.x + q.x; @@ -844,7 +844,7 @@ void BoneMatrix(const Quat & q, const Vec3 & offset) { //@{ /** Apply a general scale. */ -void Scale( scalar x, scalar y, scalar z ) { +void Scale( float x, float y, float z ) { data[0] *= x; data[4] *= y; data[8] *= z; data[1] *= x; data[5] *= y; data[9] *= z; data[2] *= x; data[6] *= y; data[10] *= z; @@ -852,14 +852,14 @@ void Scale( scalar x, scalar y, scalar z ) { } /** Apply a rotation of theta degrees around the axis v*/ -void Rotate( scalar theta, const Vec3 & v ) { +void Rotate( float theta, const Vec3 & v ) { Matrix b; b.RotationMatrix( theta, v[0], v[1], v[2] ); Multiply4x3( b ); } /** Apply a rotation of theta degrees around the axis v*/ -void Rotate( scalar theta, scalar v0, scalar v1, scalar v2 ) { +void Rotate( float theta, float v0, float v1, float v2 ) { Matrix b; b.RotationMatrix( theta, v0, v1, v2 ); Multiply4x3( b ); @@ -881,7 +881,7 @@ void Translate( const Vec3 &t ) { * Translate the matrix by x, y, z. This is the same as multiplying by a * translation matrix with the given offsets. */ -void Translate( scalar x, scalar y, scalar z ) { +void Translate( float x, float y, float z ) { data[12] = data[0] * x + data[4] * y + data[8] * z + data[12]; data[13] = data[1] * x + data[5] * y + data[9] * z + data[13]; data[14] = data[2] * x + data[6] * y + data[10] * z + data[14]; @@ -922,7 +922,7 @@ void AffineInverse() { //@{ /** Return the determinant of this matrix. */ -scalar Determinant() const { +float Determinant() const { return data[0] * data[5] * data[10] * data[15] + data[1] * data[6] * data[11] * data[12] + data[2] * data[7] * data[ 8] * data[13] + @@ -944,7 +944,7 @@ void Multiply4x4( const Matrix & A, const Matrix & restrict B ) { piDebugCheck(this != &B); for(int i = 0; i < 4; i++) { - const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); + const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); @@ -981,7 +981,7 @@ void Multiply4x3( const Matrix & A, const Matrix & restrict B ) { piDebugCheck(this != &B); for(int i = 0; i < 3; i++) { - const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); + const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); @@ -1038,9 +1038,9 @@ void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const { } /** Transform a point, normalize it, and return w. */ -scalar TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const { +float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const { piDebugCheck(&orig != dest); - scalar w; + float w; dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; @@ -1050,7 +1050,7 @@ scalar TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict de } /** Transform a point and return w. */ -scalar TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const { +float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const { piDebugCheck(&orig != dest); dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; @@ -1071,7 +1071,7 @@ void TransformVec4(const Vec3 & orig, Vec4 * dest) const { //@{ /** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */ -void GetEulerAnglesZYZ(scalar * s, scalar * t, scalar * r) const { +void GetEulerAnglesZYZ(float * s, float * t, float * r) const { if( GetElem(2,2) < 1.0f ) { if( GetElem(2,2) > -1.0f ) { // cs*ct*cr-ss*sr -ss*ct*cr-cs*sr st*cr @@ -1115,7 +1115,7 @@ void Print() const { public: - scalar data[16]; + float data[16]; }; #endif diff --git a/src/nvmath/Plane.h b/src/nvmath/Plane.h index 0552801..b81fb2b 100644 --- a/src/nvmath/Plane.h +++ b/src/nvmath/Plane.h @@ -25,12 +25,12 @@ namespace nv const Plane & operator=(Plane::Arg v); Vector3 vector() const; - scalar offset() const; + float offset() const; const Vector4 & asVector() const; Vector4 & asVector(); - void operator*=(scalar s); + void operator*=(float s); private: Vector4 p; diff --git a/src/nvmath/Plane.inl b/src/nvmath/Plane.inl index c8bd3ea..4bc87f1 100644 --- a/src/nvmath/Plane.inl +++ b/src/nvmath/Plane.inl @@ -18,7 +18,7 @@ namespace nv inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; } inline Vector3 Plane::vector() const { return p.xyz(); } - inline scalar Plane::offset() const { return p.w; } + inline float Plane::offset() const { return p.w; } inline const Vector4 & Plane::asVector() const { return p; } inline Vector4 & Plane::asVector() { return p; } @@ -38,7 +38,7 @@ namespace nv return dot(plane.vector(), point) - plane.offset(); } - inline void Plane::operator*=(scalar s) + inline void Plane::operator*=(float s) { scale(p, s); } diff --git a/src/nvmath/Vector.h b/src/nvmath/Vector.h index a54ccce..a0cc539 100644 --- a/src/nvmath/Vector.h +++ b/src/nvmath/Vector.h @@ -8,30 +8,26 @@ namespace nv { - - // I should probably use templates. - typedef float scalar; - class NVMATH_CLASS Vector2 { public: typedef Vector2 const & Arg; Vector2(); - explicit Vector2(scalar f); - Vector2(scalar x, scalar y); + explicit Vector2(float f); + Vector2(float x, float y); Vector2(Vector2::Arg v); const Vector2 & operator=(Vector2::Arg v); - const scalar * ptr() const; + const float * ptr() const; - void set(scalar x, scalar y); + void set(float x, float y); Vector2 operator-() const; void operator+=(Vector2::Arg v); void operator-=(Vector2::Arg v); - void operator*=(scalar s); + void operator*=(float s); void operator*=(Vector2::Arg v); friend bool operator==(Vector2::Arg a, Vector2::Arg b); @@ -39,9 +35,9 @@ namespace nv union { struct { - scalar x, y; + float x, y; }; - scalar component[2]; + float component[2]; }; }; @@ -55,24 +51,24 @@ namespace nv typedef Vector3 const & Arg; Vector3(); - explicit Vector3(scalar x); - Vector3(scalar x, scalar y, scalar z); - Vector3(Vector2::Arg v, scalar z); + explicit Vector3(float x); + Vector3(float x, float y, float z); + Vector3(Vector2::Arg v, float z); Vector3(Vector3::Arg v); const Vector3 & operator=(Vector3::Arg v); Vector2 xy() const; - const scalar * ptr() const; + const float * ptr() const; - void set(scalar x, scalar y, scalar z); + void set(float x, float y, float z); Vector3 operator-() const; void operator+=(Vector3::Arg v); void operator-=(Vector3::Arg v); - void operator*=(scalar s); - void operator/=(scalar s); + void operator*=(float s); + void operator/=(float s); void operator*=(Vector3::Arg v); friend bool operator==(Vector3::Arg a, Vector3::Arg b); @@ -80,9 +76,9 @@ namespace nv union { struct { - scalar x, y, z; + float x, y, z; }; - scalar component[3]; + float component[3]; }; }; @@ -96,11 +92,11 @@ namespace nv typedef Vector4 const & Arg; Vector4(); - explicit Vector4(scalar x); - Vector4(scalar x, scalar y, scalar z, scalar w); - Vector4(Vector2::Arg v, scalar z, scalar w); + explicit Vector4(float x); + Vector4(float x, float y, float z, float w); + Vector4(Vector2::Arg v, float z, float w); Vector4(Vector2::Arg v, Vector2::Arg u); - Vector4(Vector3::Arg v, scalar w); + Vector4(Vector3::Arg v, float w); Vector4(Vector4::Arg v); // Vector4(const Quaternion & v); @@ -110,14 +106,14 @@ namespace nv Vector2 zw() const; Vector3 xyz() const; - const scalar * ptr() const; + const float * ptr() const; - void set(scalar x, scalar y, scalar z, scalar w); + void set(float x, float y, float z, float w); Vector4 operator-() const; void operator+=(Vector4::Arg v); void operator-=(Vector4::Arg v); - void operator*=(scalar s); + void operator*=(float s); void operator*=(Vector4::Arg v); friend bool operator==(Vector4::Arg a, Vector4::Arg b); @@ -125,9 +121,9 @@ namespace nv union { struct { - scalar x, y, z, w; + float x, y, z, w; }; - scalar component[4]; + float component[4]; }; }; diff --git a/src/nvmath/Vector.inl b/src/nvmath/Vector.inl index 9b0ec0a..d2d3341 100644 --- a/src/nvmath/Vector.inl +++ b/src/nvmath/Vector.inl @@ -22,8 +22,8 @@ namespace nv // Vector2 inline Vector2::Vector2() {} - inline Vector2::Vector2(scalar f) : x(f), y(f) {} - inline Vector2::Vector2(scalar x, scalar y) : x(x), y(y) {} + inline Vector2::Vector2(float f) : x(f), y(f) {} + inline Vector2::Vector2(float x, float y) : x(x), y(y) {} inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {} inline const Vector2 & Vector2::operator=(Vector2::Arg v) @@ -33,12 +33,12 @@ namespace nv return *this; } - inline const scalar * Vector2::ptr() const + inline const float * Vector2::ptr() const { return &x; } - inline void Vector2::set(scalar x, scalar y) + inline void Vector2::set(float x, float y) { this->x = x; this->y = y; @@ -61,7 +61,7 @@ namespace nv y -= v.y; } - inline void Vector2::operator*=(scalar s) + inline void Vector2::operator*=(float s) { x *= s; y *= s; @@ -85,9 +85,9 @@ namespace nv // Vector3 inline Vector3::Vector3() {} - inline Vector3::Vector3(scalar f) : x(f), y(f), z(f) {} - inline Vector3::Vector3(scalar x, scalar y, scalar z) : x(x), y(y), z(z) {} - inline Vector3::Vector3(Vector2::Arg v, scalar z) : x(v.x), y(v.y), z(z) {} + inline Vector3::Vector3(float f) : x(f), y(f), z(f) {} + inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {} + inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {} inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {} inline const Vector3 & Vector3::operator=(Vector3::Arg v) @@ -104,12 +104,12 @@ namespace nv return Vector2(x, y); } - inline const scalar * Vector3::ptr() const + inline const float * Vector3::ptr() const { return &x; } - inline void Vector3::set(scalar x, scalar y, scalar z) + inline void Vector3::set(float x, float y, float z) { this->x = x; this->y = y; @@ -135,14 +135,14 @@ namespace nv z -= v.z; } - inline void Vector3::operator*=(scalar s) + inline void Vector3::operator*=(float s) { x *= s; y *= s; z *= s; } - inline void Vector3::operator/=(scalar s) + inline void Vector3::operator/=(float s) { float is = 1.0f / s; x *= is; @@ -169,11 +169,11 @@ namespace nv // Vector4 inline Vector4::Vector4() {} - inline Vector4::Vector4(scalar f) : x(f), y(f), z(f), w(f) {} - inline Vector4::Vector4(scalar x, scalar y, scalar z, scalar w) : x(x), y(y), z(z), w(w) {} - inline Vector4::Vector4(Vector2::Arg v, scalar z, scalar w) : x(v.x), y(v.y), z(z), w(w) {} + inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {} + inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} + inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {} inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {} - inline Vector4::Vector4(Vector3::Arg v, scalar w) : x(v.x), y(v.y), z(v.z), w(w) {} + inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {} inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {} inline const Vector4 & Vector4::operator=(const Vector4 & v) @@ -200,12 +200,12 @@ namespace nv return Vector3(x, y, z); } - inline const scalar * Vector4::ptr() const + inline const float * Vector4::ptr() const { return &x; } - inline void Vector4::set(scalar x, scalar y, scalar z, scalar w) + inline void Vector4::set(float x, float y, float z, float w) { this->x = x; this->y = y; @@ -234,7 +234,7 @@ namespace nv w -= v.w; } - inline void Vector4::operator*=(scalar s) + inline void Vector4::operator*=(float s) { x *= s; y *= s; @@ -284,7 +284,7 @@ namespace nv return sub(a, b); } - inline Vector2 scale(Vector2::Arg v, scalar s) + inline Vector2 scale(Vector2::Arg v, float s) { return Vector2(v.x * s, v.y * s); } @@ -294,7 +294,7 @@ namespace nv return Vector2(v.x * s.x, v.y * s.y); } - inline Vector2 operator*(Vector2::Arg v, scalar s) + inline Vector2 operator*(Vector2::Arg v, float s) { return scale(v, s); } @@ -304,32 +304,32 @@ namespace nv return Vector2(v1.x*v2.x, v1.y*v2.y); } - inline Vector2 operator*(scalar s, Vector2::Arg v) + inline Vector2 operator*(float s, Vector2::Arg v) { return scale(v, s); } - inline Vector2 operator/(Vector2::Arg v, scalar s) + inline Vector2 operator/(Vector2::Arg v, float s) { return scale(v, 1.0f/s); } - inline scalar dot(Vector2::Arg a, Vector2::Arg b) + inline float dot(Vector2::Arg a, Vector2::Arg b) { return a.x * b.x + a.y * b.y; } - inline scalar lengthSquared(Vector2::Arg v) + inline float lengthSquared(Vector2::Arg v) { return v.x * v.x + v.y * v.y; } - inline scalar length(Vector2::Arg v) + inline float length(Vector2::Arg v) { return sqrtf(lengthSquared(v)); } - inline scalar inverseLength(Vector2::Arg v) + inline float inverseLength(Vector2::Arg v) { return 1.0f / sqrtf(lengthSquared(v)); } @@ -444,7 +444,7 @@ namespace nv return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); } - inline Vector3 scale(Vector3::Arg v, scalar s) + inline Vector3 scale(Vector3::Arg v, float s) { return Vector3(v.x * s, v.y * s, v.z * s); } @@ -454,12 +454,12 @@ namespace nv return Vector3(v.x * s.x, v.y * s.y, v.z * s.z); } - inline Vector3 operator*(Vector3::Arg v, scalar s) + inline Vector3 operator*(Vector3::Arg v, float s) { return scale(v, s); } - inline Vector3 operator*(scalar s, Vector3::Arg v) + inline Vector3 operator*(float s, Vector3::Arg v) { return scale(v, s); } @@ -469,38 +469,38 @@ namespace nv return scale(v, s); } - inline Vector3 operator/(Vector3::Arg v, scalar s) + inline Vector3 operator/(Vector3::Arg v, float s) { return scale(v, 1.0f/s); } - /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s) + /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s) { return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s); }*/ - inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, scalar t) + inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t) { - const scalar s = 1.0f - t; + const float s = 1.0f - t; return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z); } - inline scalar dot(Vector3::Arg a, Vector3::Arg b) + inline float dot(Vector3::Arg a, Vector3::Arg b) { return a.x * b.x + a.y * b.y + a.z * b.z; } - inline scalar lengthSquared(Vector3::Arg v) + inline float lengthSquared(Vector3::Arg v) { return v.x * v.x + v.y * v.y + v.z * v.z; } - inline scalar length(Vector3::Arg v) + inline float length(Vector3::Arg v) { return sqrtf(lengthSquared(v)); } - inline scalar inverseLength(Vector3::Arg v) + inline float inverseLength(Vector3::Arg v) { return 1.0f / sqrtf(lengthSquared(v)); } @@ -602,7 +602,7 @@ namespace nv return sub(a, b); } - inline Vector4 scale(Vector4::Arg v, scalar s) + inline Vector4 scale(Vector4::Arg v, float s) { return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); } @@ -612,42 +612,42 @@ namespace nv return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w); } - inline Vector4 operator*(Vector4::Arg v, scalar s) + inline Vector4 operator*(Vector4::Arg v, float s) { return scale(v, s); } - inline Vector4 operator*(scalar s, Vector4::Arg v) + inline Vector4 operator*(float s, Vector4::Arg v) { return scale(v, s); } - inline Vector4 operator/(Vector4::Arg v, scalar s) + inline Vector4 operator/(Vector4::Arg v, float s) { return scale(v, 1.0f/s); } - inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, scalar s) + inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s) { return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s); } - inline scalar dot(Vector4::Arg a, Vector4::Arg b) + inline float dot(Vector4::Arg a, Vector4::Arg b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } - inline scalar lengthSquared(Vector4::Arg v) + inline float lengthSquared(Vector4::Arg v) { return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; } - inline scalar length(Vector4::Arg v) + inline float length(Vector4::Arg v) { return sqrtf(lengthSquared(v)); } - inline scalar inverseLength(Vector4::Arg v) + inline float inverseLength(Vector4::Arg v) { return 1.0f / sqrtf(lengthSquared(v)); } diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp index c191c35..6717a84 100644 --- a/src/nvtt/ClusterFit.cpp +++ b/src/nvtt/ClusterFit.cpp @@ -49,7 +49,7 @@ void ClusterFit::setColourSet(const ColorSet * set) #endif // cache some values - m_count = set->count; + m_count = set->colorCount; Vector3 values[16]; for (uint i = 0; i < m_count; i++) @@ -148,7 +148,7 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) SimdVector besterror = SimdVector( FLT_MAX ); SimdVector x0 = zero; - + int b0 = 0, b1 = 0; // check all possible clusters for this total order @@ -191,22 +191,22 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 ); SimdVector e4 = multiplyAdd( two, e3, e1 ); - // apply the metric to the error term - SimdVector e5 = e4 * m_metricSqr; - SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); + // apply the metric to the error term + SimdVector e5 = e4 * m_metricSqr; + SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); - // keep the solution if it wins - if( compareAnyLessThan( error, besterror ) ) - { - besterror = error; - beststart = a; - bestend = b; - b0 = c0; - b1 = c1; - } + // keep the solution if it wins + if( compareAnyLessThan( error, besterror ) ) + { + besterror = error; + beststart = a; + bestend = b; + b0 = c0; + b1 = c1; + } - x1 += m_weighted[c0+c1]; - } + x1 += m_weighted[c0+c1]; + } x0 += m_weighted[c0]; } @@ -218,8 +218,8 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) *start = beststart.toVector3(); *end = bestend.toVector3(); - // save the error - m_besterror = besterror; + // save the error + m_besterror = besterror; return true; } @@ -308,10 +308,10 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) } x2 += m_weighted[c0+c1+c2]; - } + } - x1 += m_weighted[c0+c1]; - } + x1 += m_weighted[c0+c1]; + } x0 += m_weighted[c0]; } @@ -321,9 +321,9 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) { *start = beststart.toVector3(); *end = bestend.toVector3(); - - // save the error - m_besterror = besterror; + + // save the error + m_besterror = besterror; return true; } @@ -404,12 +404,12 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end) // save the block if necessary if( besterror < m_besterror ) { - + *start = beststart; *end = bestend; - // save the error - m_besterror = besterror; + // save the error + m_besterror = besterror; return true; } @@ -420,8 +420,8 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end) bool ClusterFit::compress4(Vector3 * start, Vector3 * end) { const uint count = m_count; - Vector3 const grid( 31.0f, 63.0f, 31.0f ); - Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + const Vector3 grid( 31.0f, 63.0f, 31.0f ); + const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); // declare variables Vector3 beststart( 0.0f ); diff --git a/src/nvtt/QuickCompressDXT.cpp b/src/nvtt/QuickCompressDXT.cpp index b6e788a..214f66d 100644 --- a/src/nvtt/QuickCompressDXT.cpp +++ b/src/nvtt/QuickCompressDXT.cpp @@ -179,8 +179,13 @@ inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor, palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f); uint indices = 0; - for(int i = 0; i < 16; i++) + for(int i = 0; i < 16; i++) { + if (!set.isValidIndex(i)) { + // Skip masked pixels and out of bounds. + continue; + } + Vector3 color = set.color(i).xyz(); float d0 = colorDistance(palette[0], color); @@ -237,16 +242,20 @@ inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, uint indices = 0; for(int i = 0; i < 16; i++) { + if (!set.isValidIndex(i)) { + // Skip masked pixels and out of bounds. + indices |= 3 << (2 * i); + continue; + } + Vector3 color = set.color(i).xyz(); - float alpha = set.color(i).w; float d0 = colorDistance(palette[0], color); float d1 = colorDistance(palette[1], color); float d2 = colorDistance(palette[2], color); uint index; - if (alpha == 0) index = 3; - else if (d0 < d1 && d0 < d2) index = 0; + if (d0 < d1 && d0 < d2) index = 0; else if (d1 < d2) index = 1; else index = 2; diff --git a/src/nvtt/tools/imgdiff.cpp b/src/nvtt/tools/imgdiff.cpp index bc00fb5..3e8ea1e 100644 --- a/src/nvtt/tools/imgdiff.cpp +++ b/src/nvtt/tools/imgdiff.cpp @@ -72,11 +72,11 @@ struct Error mse = 0.0f; } - void addSample(float e) + void addSample(double e) { samples++; - mabse += fabsf(e); - maxabse = nv::max(maxabse, fabsf(e)); + mabse += fabs(e); + maxabse = nv::max(maxabse, fabs(e)); mse += e * e; } @@ -84,8 +84,8 @@ struct Error { mabse /= samples; mse /= samples; - rmse = sqrtf(mse); - psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse); + rmse = sqrt(mse); + psnr = (rmse == 0) ? 999.0 : 20.0 * log10(255.0 / rmse); } void print() @@ -97,11 +97,11 @@ struct Error } int samples; - float mabse; - float maxabse; - float mse; - float rmse; - float psnr; + double mabse; + double maxabse; + double mse; + double rmse; + double psnr; }; struct NormalError @@ -230,10 +230,10 @@ int main(int argc, char *argv[]) const nv::Color32 c0(image0.pixel(e, i)); const nv::Color32 c1(image1.pixel(e, i)); - float r = float(c0.r - c1.r); - float g = float(c0.g - c1.g); - float b = float(c0.b - c1.b); - float a = float(c0.a - c1.a); + double r = float(c0.r - c1.r); + double g = float(c0.g - c1.g); + double b = float(c0.b - c1.b); + double a = float(c0.a - c1.a); error_r.addSample(r); error_g.addSample(g); @@ -247,9 +247,9 @@ int main(int argc, char *argv[]) if (compareAlpha) { - error_total.addSample(r * c0.a / 255.0f); - error_total.addSample(g * c0.a / 255.0f); - error_total.addSample(b * c0.a / 255.0f); + error_total.addSample(r * c0.a / 255.0); + error_total.addSample(g * c0.a / 255.0); + error_total.addSample(b * c0.a / 255.0); } else {