// This code is in the public domain -- Ignacio Castaņo #include #include using namespace nv; /// Ctor. FullVector::FullVector(uint dim) { m_array.resize(dim); } /// Copy ctor. FullVector::FullVector(const FullVector & v) : m_array(v.m_array) { } /// Copy operator const FullVector & FullVector::operator=(const FullVector & v) { nvCheck(dimension() == v.dimension()); m_array = v.m_array; return *this; } void FullVector::fill(float f) { const uint dim = dimension(); for (uint i = 0; i < dim; i++) { m_array[i] = f; } } void FullVector::operator+= (const FullVector & v) { nvDebugCheck(dimension() == v.dimension()); const uint dim = dimension(); for (uint i = 0; i < dim; i++) { m_array[i] += v.m_array[i]; } } void FullVector::operator-= (const FullVector & v) { nvDebugCheck(dimension() == v.dimension()); const uint dim = dimension(); for (uint i = 0; i < dim; i++) { m_array[i] -= v.m_array[i]; } } void FullVector::operator*= (const FullVector & v) { nvDebugCheck(dimension() == v.dimension()); const uint dim = dimension(); for (uint i = 0; i < dim; i++) { m_array[i] *= v.m_array[i]; } } void FullVector::operator+= (float f) { const uint dim = dimension(); for (uint i = 0; i < dim; i++) { m_array[i] += f; } } void FullVector::operator-= (float f) { const uint dim = dimension(); for (uint i = 0; i < dim; i++) { m_array[i] -= f; } } void FullVector::operator*= (float f) { const uint dim = dimension(); for (uint i = 0; i < dim; i++) { m_array[i] *= f; } } void nv::saxpy(float a, const FullVector & x, FullVector & y) { nvDebugCheck(x.dimension() == y.dimension()); const uint dim = x.dimension(); for (uint i = 0; i < dim; i++) { y[i] += a * x[i]; } } void nv::copy(const FullVector & x, FullVector & y) { nvDebugCheck(x.dimension() == y.dimension()); const uint dim = x.dimension(); for (uint i = 0; i < dim; i++) { y[i] = x[i]; } } void nv::scal(float a, FullVector & x) { const uint dim = x.dimension(); for (uint i = 0; i < dim; i++) { x[i] *= a; } } float nv::dot(const FullVector & x, const FullVector & y) { nvDebugCheck(x.dimension() == y.dimension()); const uint dim = x.dimension(); /*float sum = 0; for (uint i = 0; i < dim; i++) { sum += x[i] * y[i]; } return sum;*/ KahanSum kahan; for (uint i = 0; i < dim; i++) { kahan.add(x[i] * y[i]); } return kahan.sum(); } FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d) { m_array.resize(d*d, 0.0f); } FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h) { m_array.resize(w*h, 0.0f); } FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height) { m_array = m.m_array; } const FullMatrix & FullMatrix::operator=(const FullMatrix & m) { nvCheck(width() == m.width()); nvCheck(height() == m.height()); m_array = m.m_array; return *this; } float FullMatrix::getCoefficient(uint x, uint y) const { nvDebugCheck( x < width() ); nvDebugCheck( y < height() ); return m_array[y * width() + x]; } void FullMatrix::setCoefficient(uint x, uint y, float f) { nvDebugCheck( x < width() ); nvDebugCheck( y < height() ); m_array[y * width() + x] = f; } void FullMatrix::addCoefficient(uint x, uint y, float f) { nvDebugCheck( x < width() ); nvDebugCheck( y < height() ); m_array[y * width() + x] += f; } void FullMatrix::mulCoefficient(uint x, uint y, float f) { nvDebugCheck( x < width() ); nvDebugCheck( y < height() ); m_array[y * width() + x] *= f; } float FullMatrix::dotRow(uint y, const FullVector & v) const { nvDebugCheck( v.dimension() == width() ); nvDebugCheck( y < height() ); float sum = 0; const uint count = v.dimension(); for (uint i = 0; i < count; i++) { sum += m_array[y * count + i] * v[i]; } return sum; } void FullMatrix::madRow(uint y, float alpha, FullVector & v) const { nvDebugCheck( v.dimension() == width() ); nvDebugCheck( y < height() ); const uint count = v.dimension(); for (uint i = 0; i < count; i++) { v[i] += m_array[y * count + i]; } } // y = M * x void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y) { mult(NoTransposed, M, x, y); } void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y) { const uint w = M.width(); const uint h = M.height(); if (TM == Transposed) { nvDebugCheck( h == x.dimension() ); nvDebugCheck( w == y.dimension() ); y.fill(0.0f); for (uint i = 0; i < h; i++) { M.madRow(i, x[i], y); } } else { nvDebugCheck( w == x.dimension() ); nvDebugCheck( h == y.dimension() ); for (uint i = 0; i < h; i++) { y[i] = M.dotRow(i, x); } } } // y = alpha*A*x + beta*y void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y) { sgemv(alpha, NoTransposed, A, x, beta, y); } void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y) { const uint w = A.width(); const uint h = A.height(); if (TA == Transposed) { nvDebugCheck( h == x.dimension() ); nvDebugCheck( w == y.dimension() ); for (uint i = 0; i < h; i++) { A.madRow(i, alpha * x[i], y); } } else { nvDebugCheck( w == x.dimension() ); nvDebugCheck( h == y.dimension() ); for (uint i = 0; i < h; i++) { y[i] = alpha * A.dotRow(i, x) + beta * y[i]; } } } // Multiply a row of A by a column of B. static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B) { const uint w = (TA == NoTransposed) ? A.width() : A.height(); nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width()); float sum = 0.0f; for (uint k = 0; k < w; k++) { const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop? const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i); sum += a * b; } return sum; } // C = A * B void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C) { mult(NoTransposed, A, NoTransposed, B, C); } void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C) { sgemm(1.0f, TA, A, TB, B, 0.0f, C); } // C = alpha*A*B + beta*C void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C) { sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C); } void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C) { const uint w = C.width(); const uint h = C.height(); uint aw = (TA == NoTransposed) ? A.width() : A.height(); uint ah = (TA == NoTransposed) ? A.height() : A.width(); uint bw = (TB == NoTransposed) ? B.width() : B.height(); uint bh = (TB == NoTransposed) ? B.height() : B.width(); nvDebugCheck(aw == bh); nvDebugCheck(bw == ah); nvDebugCheck(w == bw); nvDebugCheck(h == ah); for (uint y = 0; y < h; y++) { for (uint x = 0; x < w; x++) { float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y); C.setCoefficient(x, y, c); } } } /// Ctor. Init the size of the sparse matrix. SparseMatrix::SparseMatrix(uint d) : m_width(d) { m_array.resize(d); } /// Ctor. Init the size of the sparse matrix. SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w) { m_array.resize(h); } SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width) { m_array = m.m_array; } const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m) { nvCheck(width() == m.width()); nvCheck(height() == m.height()); m_array = m.m_array; return *this; } // x is column, y is row float SparseMatrix::getCoefficient(uint x, uint y) const { nvDebugCheck( x < width() ); nvDebugCheck( y < height() ); const uint count = m_array[y].count(); for (uint i = 0; i < count; i++) { if (m_array[y][i].x == x) return m_array[y][i].v; } return 0.0f; } void SparseMatrix::setCoefficient(uint x, uint y, float f) { nvDebugCheck( x < width() ); nvDebugCheck( y < height() ); const uint count = m_array[y].count(); for (uint i = 0; i < count; i++) { if (m_array[y][i].x == x) { m_array[y][i].v = f; return; } } Coefficient c = { x, f }; m_array[y].append( c ); } void SparseMatrix::addCoefficient(uint x, uint y, float f) { nvDebugCheck( x < width() ); nvDebugCheck( y < height() ); const uint count = m_array[y].count(); for (uint i = 0; i < count; i++) { if (m_array[y][i].x == x) { m_array[y][i].v += f; return; } } Coefficient c = { x, f }; m_array[y].append( c ); } void SparseMatrix::mulCoefficient(uint x, uint y, float f) { nvDebugCheck( x < width() ); nvDebugCheck( y < height() ); const uint count = m_array[y].count(); for (uint i = 0; i < count; i++) { if (m_array[y][i].x == x) { m_array[y][i].v *= f; return; } } Coefficient c = { x, f }; m_array[y].append( c ); } float SparseMatrix::sumRow(uint y) const { nvDebugCheck( y < height() ); const uint count = m_array[y].count(); /*float sum = 0; for (uint i = 0; i < count; i++) { sum += m_array[y][i].v; } return sum;*/ KahanSum kahan; for (uint i = 0; i < count; i++) { kahan.add(m_array[y][i].v); } return kahan.sum(); } float SparseMatrix::dotRow(uint y, const FullVector & v) const { nvDebugCheck( y < height() ); const uint count = m_array[y].count(); /*float sum = 0; for (uint i = 0; i < count; i++) { sum += m_array[y][i].v * v[m_array[y][i].x]; } return sum;*/ KahanSum kahan; for (uint i = 0; i < count; i++) { kahan.add(m_array[y][i].v * v[m_array[y][i].x]); } return kahan.sum(); } void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const { nvDebugCheck(y < height()); const uint count = m_array[y].count(); for (uint i = 0; i < count; i++) { v[m_array[y][i].x] += alpha * m_array[y][i].v; } } void SparseMatrix::clearRow(uint y) { nvDebugCheck( y < height() ); m_array[y].clear(); } void SparseMatrix::scaleRow(uint y, float f) { nvDebugCheck( y < height() ); const uint count = m_array[y].count(); for (uint i = 0; i < count; i++) { m_array[y][i].v *= f; } } void SparseMatrix::normalizeRow(uint y) { nvDebugCheck( y < height() ); float norm = 0.0f; const uint count = m_array[y].count(); for (uint i = 0; i < count; i++) { float f = m_array[y][i].v; norm += f * f; } scaleRow(y, 1.0f / sqrtf(norm)); } void SparseMatrix::clearColumn(uint x) { nvDebugCheck(x < width()); for (uint y = 0; y < height(); y++) { const uint count = m_array[y].count(); for (uint e = 0; e < count; e++) { if (m_array[y][e].x == x) { m_array[y][e].v = 0.0f; break; } } } } void SparseMatrix::scaleColumn(uint x, float f) { nvDebugCheck(x < width()); for (uint y = 0; y < height(); y++) { const uint count = m_array[y].count(); for (uint e = 0; e < count; e++) { if (m_array[y][e].x == x) { m_array[y][e].v *= f; break; } } } } const Array & SparseMatrix::getRow(uint y) const { return m_array[y]; } // y = M * x void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y) { mult(NoTransposed, M, x, y); } void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y) { const uint w = M.width(); const uint h = M.height(); if (TM == Transposed) { nvDebugCheck( h == x.dimension() ); nvDebugCheck( w == y.dimension() ); y.fill(0.0f); for (uint i = 0; i < h; i++) { M.madRow(i, x[i], y); } } else { nvDebugCheck( w == x.dimension() ); nvDebugCheck( h == y.dimension() ); for (uint i = 0; i < h; i++) { y[i] = M.dotRow(i, x); } } } // y = alpha*A*x + beta*y void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y) { sgemv(alpha, NoTransposed, A, x, beta, y); } void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y) { const uint w = A.width(); const uint h = A.height(); if (TA == Transposed) { nvDebugCheck( h == x.dimension() ); nvDebugCheck( w == y.dimension() ); for (uint i = 0; i < h; i++) { A.madRow(i, alpha * x[i], y); } } else { nvDebugCheck( w == x.dimension() ); nvDebugCheck( h == y.dimension() ); for (uint i = 0; i < h; i++) { y[i] = alpha * A.dotRow(i, x) + beta * y[i]; } } } // dot y-row of A by x-column of B static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B) { const Array & row = A.getRow(y); const uint count = row.count(); /*float sum = 0.0f; for (uint i = 0; i < count; i++) { const SparseMatrix::Coefficient & c = row[i]; sum += c.v * B.getCoefficient(x, c.x); } return sum;*/ KahanSum kahan; for (uint i = 0; i < count; i++) { const SparseMatrix::Coefficient & c = row[i]; kahan.add(c.v * B.getCoefficient(x, c.x)); } return kahan.sum(); } // dot y-row of A by x-row of B static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B) { const Array & row = A.getRow(y); const uint count = row.count(); /*float sum = 0.0f; for (uint i = 0; i < count; i++) { const SparseMatrix::Coefficient & c = row[i]; sum += c.v * B.getCoefficient(c.x, x); } //return sum;*/ KahanSum kahan; for (uint i = 0; i < count; i++) { const SparseMatrix::Coefficient & c = row[i]; kahan.add(c.v * B.getCoefficient(c.x, x)); } return kahan.sum(); } // dot y-column of A by x-column of B static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B) { nvDebugCheck(A.height() == B.height()); const uint h = A.height(); /*float sum = 0.0f; for (uint i = 0; i < h; i++) { sum += A.getCoefficient(y, i) * B.getCoefficient(x, i); } //return sum;*/ KahanSum kahan; for (uint i = 0; i < h; i++) { kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i)); } return kahan.sum(); } // C = A * B void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C) { mult(NoTransposed, A, NoTransposed, B, C); } void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C) { sgemm(1.0f, TA, A, TB, B, 0.0f, C); } // C = alpha*A*B + beta*C void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C) { sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C); } void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C) { const uint w = C.width(); const uint h = C.height(); uint aw = (TA == NoTransposed) ? A.width() : A.height(); uint ah = (TA == NoTransposed) ? A.height() : A.width(); uint bw = (TB == NoTransposed) ? B.width() : B.height(); uint bh = (TB == NoTransposed) ? B.height() : B.width(); nvDebugCheck(aw == bh); nvDebugCheck(bw == ah); nvDebugCheck(w == bw); nvDebugCheck(h == ah); for (uint y = 0; y < h; y++) { for (uint x = 0; x < w; x++) { float c = beta * C.getCoefficient(x, y); if (TA == NoTransposed && TB == NoTransposed) { // dot y-row of A by x-column of B. c += alpha * dotRowColumn(y, A, x, B); } else if (TA == Transposed && TB == Transposed) { // dot y-column of A by x-row of B. c += alpha * dotRowColumn(x, B, y, A); } else if (TA == Transposed && TB == NoTransposed) { // dot y-column of A by x-column of B. c += alpha * dotColumnColumn(y, A, x, B); } else if (TA == NoTransposed && TB == Transposed) { // dot y-row of A by x-row of B. c += alpha * dotRowRow(y, A, x, B); } if (c != 0.0f) { C.setCoefficient(x, y, c); } } } } // C = At * A void nv::sqm(const SparseMatrix & A, SparseMatrix & C) { // This is quite expensive... mult(Transposed, A, NoTransposed, A, C); }