/* * Copyright (c) 2002-2006 Samit Basu * Copyright (c) 2006 Thomas Beutlich * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include "Sparse.hpp" #include "Malloc.hpp" #include #include "IEEEFP.hpp" #if HAVE_UMFPACK extern "C" { #include "umfpack.h" } #endif #include "LAPACK.hpp" #include "MemPtr.hpp" #include "Math.hpp" #include // Routines that need to be Colon-compliant // SetSparseNDimSubsets // // Routines that could stand to optimized // getSparseNDimSubsets and getSparseRowSubset // // Need to move resize for sparse matrices back into Array class so that // no unnecessary copies get made during the set operations. #if HAVE_ARPACK extern "C" { int znaupd_(int *ido, char *bmat, int *n, const char* which, int *nev, double *tol, double *resid, int *ncv, double *v, int *ldv, int *iparam, int *ipntr, double *workd, double *workl, int *lworkl, double *rwork, int *info); int zneupd_(int *rvec, char *howmny, int *select, double *d, double *z, int *ldz, double *sigma, double *workev, char *bmat, int *n, const char *which, int *nev, double *tol, double *resid, int *ncv, double *v, int *ldv, int *iparam, int *ipntr, double *workd, double *workl, int *lworkl, double *rwork, int *info); int dnaupd_(int *ido, char *bmat, int *n, const char* which, int *nev, double *tol, double *resid, int *ncv, double *v, int *ldv, int *iparam, int *ipntr, double *workd, double *workl, int *lworkl, int *info, int len1, int len2); int dneupd_(int *rvec, char *howmny, int *select, double *dr, double *di, double *z__, int *ldz, double *sigmar, double *sigmai, double *workev, char * bmat, int *n, const char *which, int *nev, double *tol, double *resid, int *ncv, double *v, int *ldv, int *iparam, int *ipntr, double *workd, double *workl, int *lworkl, int *info); int dsaupd_(int *ido, char *bmat, int *n, const char* which, int *nev, double *tol, double *resid, int *ncv, double *v, int *ldv, int *iparam, int *ipntr, double *workd, double *workl, int *lworkl, int *info); int dseupd_(int *rvec, char *howmny, int *select, double *d, double *z__, int *ldz, double *sigma, char *bmat, int *n, const char *which, int *nev, double *tol, double *resid, int *ncv, double *v, int *ldv, int *iparam, int *ipntr, double *workd, double *workl, int *lworkl, int *info); } #endif template T* RLEDuplicate(const T*src) { int blen = (int) (src[0]+1); T* ret = new T[blen]; memcpy(ret,src,sizeof(T)*blen); return ret; } template class RLEEncoder { T* buffer; int m; int n; int len; int zlen; int state; public: RLEEncoder(T* buf, int alen) { m = 0; n = 0; buffer = buf; len = alen; state = 0; zlen = 0; } int row() {return m;} void set(int p) { if (p <= m) return; p -= m; if (state == 0) { zlen = p; state = 1; } else zlen += p; m += p; } void push(T val) { if (state == 0) { if (val != 0) { buffer[n] = val; n++; } else { state = 1; zlen = 1; } } else { if (val == 0) { zlen++; } else { if (zlen) { buffer[n++] = 0; buffer[n++] = zlen; } buffer[n++] = val; state = 0; zlen = 0; } } m++; } void end() { set(len); if (zlen) { buffer[n++] = 0; buffer[n++] = zlen; } state = 0; } T* copyout() { T* ret; ret = new T[n+1]; ret[0] = n; memcpy(ret+1,buffer,n*sizeof(T)); return ret; } }; template class RLEDecoder { const T* data; int m; int n; int len; public: RLEDecoder(const T* str, int alen) { data = str; m = 0; n = 1; len = alen; } int row() { return m; } void update() { while ((m < len) && (data[n] == 0)) { m += (int) data[n+1]; n += 2; if ((m < len) && (n>data[0])) { throw Exception("Invalid data string!\n"); } } } void advance() { if (m < len) { m++; n++; update(); } } T value() { if (m >= len) throw Exception("RLE Decoder overflow - corrupted sparse matrix string encountered"); return data[n]; } bool more() { return (m < len); } int nnzs() { int nnz = 0; int t = 0; int p = 1; while (t < len) { if (data[p] != 0) { nnz++; p++; t++; } else { t += (int) data[p+1]; p += 2; } } return nnz; } }; template class RLEEncoderComplex { T* buffer; int m; int n; int len; int zlen; int state; public: RLEEncoderComplex(T* buf, int alen) { m = 0; n = 0; buffer = buf; len = alen; state = 0; zlen = 0; } int row() { return m; } void set(int p) { if (p <= m) return; p -= m; if (state == 0) { zlen = p; state = 1; } else zlen += p; m += p; } void push(T valr, T vali) { if (state == 0) { if ((valr != 0) || (vali != 0)) { buffer[n++] = valr; buffer[n++] = vali; } else { state = 1; zlen = 1; } } else { if ((valr == 0) && (vali == 0)) { zlen++; } else { if (zlen) { buffer[n++] = 0; buffer[n++] = 0; buffer[n++] = zlen; } buffer[n++] = valr; buffer[n++] = vali; state = 0; zlen = 0; } } m++; } void end() { set(len); if (zlen>0) { buffer[n++] = 0; buffer[n++] = 0; buffer[n++] = zlen; } state = 0; } T* copyout() { T* ret; ret = new T[n+1]; ret[0] = n; memcpy(ret+1,buffer,n*sizeof(T)); return ret; } }; template class RLEDecoderComplex { const T* data; int m; int n; int len; public: RLEDecoderComplex(const T* str, int alen) { data = str; m = 0; n = 1; len = alen; } int row() { return m; } void update() { while ((m < len) && (data[n] == 0) && (data[n+1] == 0)) { m += (int) data[n+2]; n += 3; } } void advance() { if (m < len) { m++; n+=2; update(); } } T value_real() { if (m >= len) throw Exception("RLE DecoderComplex overflow - corrupted sparse matrix string encountered"); return data[n]; } T value_imag() { if (m >= len) throw Exception("RLE DecoderComplex overflow - corrupted sparse matrix string encountered"); return data[n+1]; } bool more() { return (m < len); } int nnzs() { int nnz = 0; int t = 0; int p = 1; while (t < len) { if ((data[p] != 0) || (data[p+1] != 0)) { nnz++; p+=2; t++; } else { t += (int) data[p+2]; p += 3; } } return nnz; } }; // The following ops are O(N^2) instead of O(nnz^2): // // GetSparseNDimSubsets - If the rowindex is sorted into an IJV type list, it can be done without the Decompress step. Although it is really not too bad, since the vector being recompressed is of the subset size. // SetSparseNDimSubsets - same story, only this time the vector being recompressed is of size (N) instead of size (M). So the cost is higher. The right way to do this is with a list merge. // DeleteSparseMatrixRowsReal template void DeleteSparse(T** src, int cols) { int i; for (i=0;i void DecompressComplexString(const T* src, T* dst, int count) { RLEDecoderComplex A(src,count); A.update(); while (A.more()) { dst[2*A.row()] = A.value_real(); dst[2*A.row()+1] = A.value_imag(); A.advance(); } } template T* CompressComplexVector(T* buffer, const T* src, int count) { RLEEncoderComplex A(buffer,count); for (int i=0;i void DecompressRealString(const T* src, T* dst, int count) { RLEDecoder A(src,count); A.update(); while (A.more()) { dst[A.row()] = A.value(); A.advance(); } } template T* CompressRealVector(T* buffer, const T* src, int count) { RLEEncoder A(buffer,count); for (int i=0;i T** ConvertDenseToSparseComplex(const T* src, int rows, int cols) { T** dp = new T*[cols]; MemBlock bufferBlock(rows*4); T* buffer = bufferBlock.Pointer(); for (int i=0;i(buffer,src+i*rows*2, rows); return dp; } template T** ConvertDenseToSparseReal(const T* src, int rows, int cols) { T** dp = new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = bufferBlock.Pointer(); for (int i=0;i(buffer,src+i*rows, rows); return dp; } template T* ConvertSparseToDenseComplex(const T** src, int rows, int cols) { T* dst; dst = (T*) Malloc(sizeof(T)*rows*cols*2); for (int i=0;i(src[i],dst+i*2*rows,rows); return dst; } template T* ConvertSparseToDenseReal(const T** src, int rows, int cols) { T* dst; dst = (T*) Malloc(sizeof(T)*rows*cols); for (int i=0;i(src[i],dst+i*rows,rows); return dst; } logical* ConvertSparseToDenseLogical(const uint32** src, int rows, int cols) { logical* dst; uint32* tbuf; dst = (logical*) Malloc(sizeof(logical)*rows*cols); tbuf = (uint32*) Malloc(sizeof(uint32)*rows); for (int i=0;i(src[i],tbuf,rows); for (int j=0;j((const int32 **)cp,rows,cols); case FM_FLOAT: return ConvertSparseToDenseReal((const float **)cp,rows,cols); case FM_DOUBLE: return ConvertSparseToDenseReal((const double **)cp,rows,cols); case FM_COMPLEX: return ConvertSparseToDenseComplex((const float **)cp,rows,cols); case FM_DCOMPLEX: return ConvertSparseToDenseComplex((const double **)cp,rows,cols); default: throw Exception("Unsupported type in makeDenseArray"); } } void* makeSparseArray(Class dclass, int rows, int cols, const void *cp) { switch(dclass) { case FM_LOGICAL: return ConvertDenseToSparseReal((const logical *)cp,rows,cols); case FM_INT32: return ConvertDenseToSparseReal((const int32 *)cp,rows,cols); case FM_FLOAT: return ConvertDenseToSparseReal((const float *)cp,rows,cols); case FM_DOUBLE: return ConvertDenseToSparseReal((const double *)cp,rows,cols); case FM_COMPLEX: return ConvertDenseToSparseComplex((const float *)cp,rows,cols); case FM_DCOMPLEX: return ConvertDenseToSparseComplex((const double *)cp,rows,cols); default: throw Exception("Unsupported type in makeSparseArray"); } } template class IJVEntry { public: int32 I; int32 J; T Vreal; T Vimag; }; template bool operator<(const IJVEntry& a, const IJVEntry& b) { return ((a.J < b.J) || ((a.J == b.J) && (a.I <= b.I))); } template bool operator==(const IJVEntry& a, const IJVEntry& b) { return ((a.I == b.I) && (a.J == b.J) && (a.Vreal == b.Vreal) && (a.Vimag == b.Vimag)); } template T* CompressRealIJV(T* buffer, IJVEntry *mlist, int len, int &ptr, int col, int row) { // Special case an all zeros column if ((ptr >= len) || (mlist[ptr].J > col)) { T* buf = new T[3]; buf[0] = 2; buf[1] = 0; buf[2] = row; return buf; } RLEEncoder A(buffer, row); while (ptr < len && mlist[ptr].J == col) { int n; n = mlist[ptr].I; T accum = 0; while (ptr < len && (mlist[ptr].I == n) && (mlist[ptr].J == col)) { accum += mlist[ptr].Vreal; ptr++; } A.set(n); A.push(accum); } A.end(); return A.copyout(); } template T* CompressComplexIJV(T* buffer, IJVEntry *mlist, int len, int &ptr, int col, int row) { if ((ptr >= len) || (mlist[ptr].J > col)) { T* buf = new T[4]; buf[0] = 3; buf[1] = 0; buf[2] = 0; buf[3] = row; return buf; } RLEEncoderComplex A(buffer, row); while (ptr < len && mlist[ptr].J == col) { int n; n = mlist[ptr].I; T accum_real = 0; T accum_imag = 0; while (ptr < len && (mlist[ptr].I == n) && (mlist[ptr].J == col)) { accum_real += mlist[ptr].Vreal; accum_imag += mlist[ptr].Vimag; ptr++; } A.set(n); A.push(accum_real,accum_imag); } A.end(); return A.copyout(); } template void* makeSparseFromIJVReal(int rows, int cols, int nnz, const uint32* I, int istride, const uint32 *J, int jstride, const T* cp, int cpstride) { // build an IJV master list IJVEntry *mlist = new IJVEntry[nnz]; int i; for (i=0;i= rows)) throw Exception("an element of the I vector in the I,J,V exceeds the size of the output matrix"); mlist[i].J = J[jstride*i]-1; if ((mlist[i].J < 0) || (mlist[i].J >= cols)) throw Exception("an element of the J vector in the I,J,V exceeds the size of the output matrix"); mlist[i].Vreal = cp[cpstride*i]; } std::sort(mlist,mlist+nnz); T** op; // Allocate the output (sparse) array op = new T*[cols]; // Get an integer pointer into the IJV list int ptr = 0; // Stringify... MemBlock bufferBlock(rows*2); T* buffer = bufferBlock.Pointer(); for (int col=0;col(buffer,mlist,nnz,ptr,col,rows); // Free the space delete[] mlist; // Return the array return op; } void* makeSparseFromIJVLogical(int rows, int cols, int nnz, const uint32* I, int istride, const uint32 *J, int jstride, const logical* cp, int cpstride) { // build an IJV master list IJVEntry *mlist = new IJVEntry[nnz]; int i; for (i=0;i= rows)) throw Exception("an element of the I vector in the I,J,V exceeds the size of the output matrix"); mlist[i].J = J[jstride*i]-1; if ((mlist[i].J < 0) || (mlist[i].J >= cols)) throw Exception("an element of the J vector in the I,J,V exceeds the size of the output matrix"); mlist[i].Vreal = cp[cpstride*i]; } std::sort(mlist,mlist+nnz); uint32** op; // Allocate the output (sparse) array op = new uint32*[cols]; // Get an integer pointer into the IJV list int ptr = 0; // Stringify... MemBlock bufferBlock(rows*2); uint32* buffer = bufferBlock.Pointer(); for (int col=0;col(buffer,mlist,nnz,ptr,col,rows); // Free the space delete[] mlist; // Return the array return op; } template void *SparseZerosReal(int rows, int cols) { T **src; src = new T*[cols]; int i; for (i=0;i(rows,cols); } template void *SparseZerosComplex(int rows, int cols) { T **src; src = new T*[cols]; int i; for (i=0;i void* MakeSparseFromDiagReal(T* cp, int len) { T** B; B = new T*[len]; // Each column is in 5,0,N,k,0,M for (int i=0;i void* MakeSparseFromDiagComplex(T* cp, int len) { T** B; B = new T*[len]; // Each column is in 8,0,0,N,kr,ki,0,0,M for (int i=0;i void* MakeSparseScaledIdentityReal(T cp, int len) { T** B; if (cp == 0) return SparseZerosReal(len, len); B = new T*[len]; // Each column is in 5,0,N,k,0,M for (int i=0;i void* MakeSparseScaledIdentityComplex(T cp_r, T cp_i, int len) { T** B; B = new T*[len]; if ((cp_r == 0) && (cp_i == 0)) return SparseZerosComplex(len, len); // Each column is in 8,0,0,N,kr,ki,0,0,M for (int i=0;i void* makeSparseFromIJVComplex(int rows, int cols, int nnz, const uint32* I, int istride, const uint32 *J, int jstride, const T* cp, int cpstride) { // build an IJV master list IJVEntry *mlist = new IJVEntry[nnz]; int i; for (i=0;i= rows)) throw Exception("an element of the I vector in the I,J,V exceeds the size of the output matrix"); mlist[i].J = J[jstride*i]-1; if ((mlist[i].J < 0) || (mlist[i].J >= cols)) throw Exception("an element of the J vector in the I,J,V exceeds the size of the output matrix"); mlist[i].Vreal = cp[2*cpstride*i]; mlist[i].Vimag = cp[2*cpstride*i+1]; } std::sort(mlist,mlist+nnz); T** op; // Allocate the output (sparse) array op = new T*[cols]; // Get an integer pointer into the IJV list int ptr = 0; // Stringify... MemBlock bufferBlock(rows*4); T* buffer = bufferBlock.Pointer(); for (int col=0;col(buffer,mlist,nnz,ptr,col,rows); // Free the space delete[] mlist; // Return the array return op; } void* makeSparseFromIJV(Class dclass, int rows, int cols, int nnz, const uint32* I, int istride, const uint32 *J, int jstride, const void* cp, int cpstride) { switch(dclass) { case FM_LOGICAL: return makeSparseFromIJVLogical(rows,cols,nnz,I,istride, J,jstride,(const logical*)cp, cpstride); case FM_INT32: return makeSparseFromIJVReal(rows,cols,nnz,I,istride, J,jstride,(const int32*)cp, cpstride); case FM_FLOAT: return makeSparseFromIJVReal(rows,cols,nnz,I,istride, J,jstride,(const float*)cp, cpstride); case FM_DOUBLE: return makeSparseFromIJVReal(rows,cols,nnz,I,istride, J,jstride,(const double*)cp, cpstride); case FM_COMPLEX: return makeSparseFromIJVComplex(rows,cols,nnz,I,istride, J,jstride,(const float*)cp, cpstride); case FM_DCOMPLEX: return makeSparseFromIJVComplex(rows,cols,nnz,I,istride, J,jstride,(const double*)cp, cpstride); default: throw Exception("unsupported type for makeSparseFromIJV"); } } void DeleteSparseMatrix(Class dclass, int cols, void *cp) { switch(dclass) { case FM_LOGICAL: DeleteSparse((uint32**)cp,cols); return; case FM_INT32: DeleteSparse((int32**)cp,cols); return; case FM_FLOAT: DeleteSparse((float**)cp,cols); return; case FM_DOUBLE: DeleteSparse((double**)cp,cols); return; case FM_COMPLEX: DeleteSparse((float**)cp,cols); return; case FM_DCOMPLEX: DeleteSparse((double**)cp,cols); return; default: throw Exception("unsupported type for DeleteSparseMatrix"); } } template S** TypeConvertSparseRealReal(T** src, int rows, int cols) { S** dp; dp = new S*[cols]; MemBlock bufferBlock(rows*2); S* buffer = bufferBlock.Pointer(); for (int p=0;p B(buffer,rows); RLEDecoder A(src[p],rows); A.update(); while (A.more()) { B.set(A.row()); B.push((S) A.value()); A.advance(); } B.end(); dp[p] = B.copyout(); } return dp; } template S** TypeConvertSparseComplexComplex(T** src, int rows, int cols) { S** dp; dp = new S*[cols]; MemBlock bufferBlock(rows*4); S* buffer = bufferBlock.Pointer(); for (int p=0;p B(buffer,rows); RLEDecoderComplex A(src[p],rows); A.update(); while (A.more()) { B.set(A.row()); B.push((S) A.value_real(),(S) A.value_imag()); A.advance(); } B.end(); dp[p] = B.copyout(); } return dp; } template S** TypeConvertSparseRealComplex(T** src, int rows, int cols) { S** dp; dp = new S*[cols]; MemBlock bufferBlock(rows*4); S* buffer = bufferBlock.Pointer(); for (int p=0;p B(buffer,rows); RLEDecoder A(src[p],rows); A.update(); while (A.more()) { B.set(A.row()); B.push(A.value(),0); A.advance(); } B.end(); dp[p] = B.copyout(); } return dp; } template S** TypeConvertSparseComplexReal(T** src, int rows, int cols) { S** dp; dp = new S*[cols]; MemBlock bufferBlock(rows*2); S* buffer = bufferBlock.Pointer(); for (int p=0;p B(buffer,rows); RLEDecoderComplex A(src[p],rows); A.update(); while (A.more()) { B.set(A.row()); B.push((S) A.value_real()); A.advance(); } B.end(); dp[p] = B.copyout(); } return dp; } template uint32** TypeConvertSparseRealLogical(T** src, int rows, int cols) { uint32** dp; dp = new uint32*[cols]; MemBlock bufferBlock(rows*2); uint32* buffer = bufferBlock.Pointer(); for (int p=0;p B(buffer,rows); RLEDecoder A(src[p],rows); A.update(); while (A.more()) { B.set(A.row()); if (A.value() == 0) B.push(0); else B.push(1); A.advance(); } B.end(); dp[p] = B.copyout(); } return dp; } template uint32** TypeConvertSparseComplexLogical(T** src, int rows, int cols) { uint32** dp; dp = new uint32*[cols]; MemBlock bufferBlock(rows*2); uint32* buffer = bufferBlock.Pointer(); for (int p=0;p B(buffer,rows); RLEDecoderComplex A(src[p],rows); A.update(); while (A.more()) { B.set(A.row()); if ((A.value_real() == 0) && (A.value_imag() == 0)) B.push(0); else B.push(1); A.advance(); } B.end(); dp[p] = B.copyout(); } return dp; } template void* CopySparseMatrix(const T** src, int cols) { T** dp; dp = new T*[cols]; int i; for (i=0;i int CountNonzerosComplex(const T** src, int rows, int cols) { int nnz = 0; for (int i=0;i A(src[i],rows); nnz += A.nnzs(); } return nnz; } template int CountNonzerosReal(const T** src, int rows, int cols) { int nnz = 0; for (int i=0;i A(src[i],rows); nnz += A.nnzs(); } return nnz; } template T* SparseToIJVComplex(const T**src, int rows, int cols, uint32* &I, uint32* &J, int &nnz) { nnz = CountNonzerosComplex(src,rows,cols); I = (uint32*) Malloc(sizeof(uint32)*nnz); J = (uint32*) Malloc(sizeof(uint32)*nnz); T* V = (T*) Malloc(sizeof(T)*nnz*2); int ptr = 0; for (int i=0;i A(src[i],rows); A.update(); while (A.more()) { I[ptr] = A.row()+1; J[ptr] = i+1; V[2*ptr] = A.value_real(); V[2*ptr+1] = A.value_imag(); ptr++; A.advance(); } } return V; } template T* SparseToIJVComplex2(const T**src, int rows, int cols, uint32* &I, uint32* &J, int &nnz) { nnz = CountNonzerosComplex(src,rows,cols); I = (uint32*) Malloc(sizeof(uint32)*nnz); J = (uint32*) Malloc(sizeof(uint32)*(cols + 1)); T* V = (T*) Malloc(sizeof(T)*nnz*2); int ptr = 0; J[0] = 0; for (int i=0;i A(src[i],rows); A.update(); while (A.more()) { I[ptr] = A.row(); V[ptr] = A.value_real(); V[ptr+nnz] = A.value_imag(); ptr++; A.advance(); } J[i+1] = ptr; } return V; } template T* SparseToIJVReal(const T**src, int rows, int cols, uint32* &I, uint32* &J, int &nnz) { nnz = CountNonzerosReal(src,rows,cols); I = (uint32*) Malloc(sizeof(uint32)*nnz); J = (uint32*) Malloc(sizeof(uint32)*nnz); T* V = (T*) Malloc(sizeof(T)*nnz); int ptr = 0; for (int i=0;i A(src[i],rows); A.update(); while (A.more()) { I[ptr] = A.row()+1; J[ptr] = i+1; V[ptr] = A.value(); ptr++; A.advance(); } } return V; } template T* SparseToIJVReal2(const T**src, int rows, int cols, uint32* &I, uint32* &J, int &nnz) { nnz = CountNonzerosReal(src,rows,cols); I = (uint32*) Malloc(sizeof(uint32)*nnz); J = (uint32*) Malloc(sizeof(uint32)*(cols + 1)); T* V = (T*) Malloc(sizeof(T)*nnz); int ptr = 0; J[0] = 0; for (int i=0;i A(src[i],rows); A.update(); while (A.more()) { I[ptr] = A.row(); V[ptr] = A.value(); ptr++; A.advance(); } J[i+1] = ptr; } return V; } // Convert a sparse matrix to IJV void* SparseToIJV(Class dclass, int rows, int cols, const void* cp, uint32* &I, uint32* &J, int &nnz) { switch (dclass) { case FM_LOGICAL: return SparseToIJVReal((const uint32**)cp,rows,cols,I,J,nnz); case FM_INT32: return SparseToIJVReal((const int32**)cp,rows,cols,I,J,nnz); case FM_FLOAT: return SparseToIJVReal((const float**)cp,rows,cols,I,J,nnz); case FM_DOUBLE: return SparseToIJVReal((const double**)cp,rows,cols,I,J,nnz); case FM_COMPLEX: return SparseToIJVComplex((const float**)cp,rows,cols,I,J,nnz); case FM_DCOMPLEX: return SparseToIJVComplex((const double**)cp,rows,cols,I,J,nnz); default: throw Exception("unsupported type for SparseToIJV"); } } // Convert a sparse matrix to IJV in MATLAB format void* SparseToIJV2(Class dclass, int rows, int cols, const void* cp, uint32* &I, uint32* &J, int &nnz) { switch (dclass) { case FM_LOGICAL: return SparseToIJVReal2((const uint32**)cp,rows,cols,I,J,nnz); case FM_INT32: return SparseToIJVReal2((const int32**)cp,rows,cols,I,J,nnz); case FM_FLOAT: return SparseToIJVReal2((const float**)cp,rows,cols,I,J,nnz); case FM_DOUBLE: return SparseToIJVReal2((const double**)cp,rows,cols,I,J,nnz); case FM_COMPLEX: return SparseToIJVComplex2((const float**)cp,rows,cols,I,J,nnz); case FM_DCOMPLEX: return SparseToIJVComplex2((const double**)cp,rows,cols,I,J,nnz); default: throw Exception("unsupported type for SparseToIJV2"); } } void* CopySparseMatrix(Class dclass, int cols, const void* cp) { switch (dclass) { case FM_LOGICAL: return CopySparseMatrix((const uint32**)cp,cols); case FM_INT32: return CopySparseMatrix((const int32**)cp,cols); case FM_FLOAT: return CopySparseMatrix((const float**)cp,cols); case FM_DOUBLE: return CopySparseMatrix((const double**)cp,cols); case FM_COMPLEX: return CopySparseMatrix((const float**)cp,cols); case FM_DCOMPLEX: return CopySparseMatrix((const double**)cp,cols); default: throw Exception("unsupported type for CopySparseMatrix"); } } int CountNonzeros(Class dclass, int rows, int cols, const void *cp) { switch (dclass) { case FM_LOGICAL: return CountNonzerosReal((const uint32**)cp,rows,cols); case FM_INT32: return CountNonzerosReal((const int32**)cp,rows,cols); case FM_FLOAT: return CountNonzerosReal((const float**)cp,rows,cols); case FM_DOUBLE: return CountNonzerosReal((const double**)cp,rows,cols); case FM_COMPLEX: return CountNonzerosComplex((const float**)cp,rows,cols); case FM_DCOMPLEX: return CountNonzerosComplex((const double**)cp,rows,cols); default: throw Exception("unsupported type for CountNonzeros"); } } template bool AllZerosReal(T* A) { return ((A[0] == 2) && (A[1] == 0)); } template bool AllZerosComplex(T* A) { return ((A[0] == 3) && (A[1] == 0) && (A[2] == 0)); } template bool MatchRLESpotReal(T* A, int pos, int nrows) { if (pos == 1) return ((A[0] == 3) && (A[1] != 0) && (A[2] == 0) && (A[3] == nrows-1)); if (pos == nrows) return ((A[0] == 3) && (A[1] == 0) && (A[2] == nrows-1) && (A[3] != 0)); return ((A[0] == 5) && (A[1] == 0) && (A[2] == pos-1) && (A[3] != 0) && (A[4] == 0) && (A[5] == nrows-pos)); } template bool MatchRLESpotComplex(T* A, int pos, int nrows) { if (pos == 1) return ((A[0] == 5) && ((A[1] != 0) || (A[2] != 0)) && (A[3] == 0) && (A[4] == 0) && (A[5] == (nrows-1))); if (pos == nrows) return ((A[0] == 5) && (A[1] == 0) && (A[2] == 0) && (A[3] == nrows-1) && (A[4] != 0) || (A[5] != 0)); // The mth column should [3 0 0 nrows] or [9 0 0 m-1 nz_r nz_i 0 0 nrows-m-1] return ((A[0] == 8) && (A[1] == 0) && (A[2] == 0) && (A[3] == pos-1) && ((A[4] != 0) || (A[5] != 0)) && (A[6] == 0) && (A[7] == 0) && (A[8] == nrows-pos)); } template bool IsSparseMatrixDiagonalReal(T** A, int rows, int cols) { if (rows != cols) return false; // First column should be [3 nz 0 nrows-1] or [2 0 nrows] // The mth column should [2 0 nrows] or [5 0 m-1 nz 0 nrows-m-1] // Last column should be [3 0 nrows-1 nz] or [2 0 nrows] for (int i=1;i<=cols;i++) if (!AllZerosReal(A[i-1]) && !MatchRLESpotReal(A[i-1],i,rows)) return false; return true; } template bool IsSparseMatrixDiagonalComplex(T** A, int rows, int cols) { if (rows != cols) return false; // First column should be [5 nz_r nz_i 0 0 nrows-1] or [3 0 0 nrows] // The mth column should [3 0 0 nrows] or [8 0 0 m-1 nz_r nz_i 0 0 nrows-m-1] // Last column should be [5 0 0 nrows-1 nz_r nz_i] or [3 0 0 nrows] for (int i=1;i<=cols;i++) if (!AllZerosComplex(A[i-1]) && !MatchRLESpotComplex(A[i-1],i,rows)) return false; return true; } template void DiagSparseRealMultiply(T** A, int A_rows, int A_cols, T** B, int B_cols, T**C) { MemBlock AweightsBlock(A_rows); T* Aweights = AweightsBlock.Pointer(); for (int i=0;i bufferBlock(2*A_rows); T* buffer = bufferBlock.Pointer(); for (int i=0;i Bcol(B[i],A_cols); RLEEncoder Ccol(buffer,A_rows); Bcol.update(); while (Bcol.more()) { Ccol.set(Bcol.row()); Ccol.push(Bcol.value()*Aweights[Bcol.row()]); Bcol.advance(); } Ccol.end(); C[i] = Ccol.copyout(); } } template void DiagSparseComplexMultiply(T** A, int A_rows, int A_cols, T** B, int B_cols, T**C) { MemBlock AweightsBlock(2*A_rows); T* Aweights = AweightsBlock.Pointer(); for (int i=0;i bufferBlock(4*A_rows); T* buffer = bufferBlock.Pointer(); for (int i=0;i Bcol(B[i],A_cols); RLEEncoderComplex Ccol(buffer,A_rows); Bcol.update(); while (Bcol.more()) { Ccol.set(Bcol.row()); Ccol.push(Bcol.value_real()*Aweights[2*Bcol.row()] - Bcol.value_imag()*Aweights[2*Bcol.row()+1], Bcol.value_real()*Aweights[2*Bcol.row()+1] + Bcol.value_imag()*Aweights[2*Bcol.row()]); Bcol.advance(); } Ccol.end(); C[i] = Ccol.copyout(); } } // Multiply a sparse matrix by a sparse matrix (result is sparse) // Here we use the following order for the loops: // A normal multiply is // for j=1:Ccols // for k=1:Acols // for i=1:Arows // c[i][j] += a[i][k]*b[k][j] template void SparseSparseRealMultiply(T** A, int A_rows, int A_cols, T** B, int B_cols, T** C) { // This new version of the sparse-sparse matrix multiply works // as follows. The outer loop is over the columns of B if (IsSparseMatrixDiagonalReal(A,A_rows,A_cols)) return DiagSparseRealMultiply(A,A_rows,A_cols, B,B_cols,C); MemBlock abuffBlock(2*A_rows); T* abuff = abuffBlock.Pointer(); MemBlock dbuffBlock(A_rows); T* dbuff = dbuffBlock.Pointer(); for (int j=0;j Bcol(B[j],A_cols); Bcol.update(); // Zero out the accumulator memset(dbuff,0,sizeof(T)*A_rows); while (Bcol.more()) { T scale = Bcol.value(); RLEDecoder Acol(A[Bcol.row()],A_rows); Acol.update(); while (Acol.more()) { dbuff[Acol.row()] += Acol.value()*scale; Acol.advance(); } Bcol.advance(); } C[j] = CompressRealVector(abuff,dbuff,A_rows); } } // Multiply a sparse matrix by a sparse matrix (result is sparse) // Here we use the following order for the loops: // A normal multiply is // for j=1:Ccols // for k=1:Acols // for i=1:Arows // c[i][j] += a[i][k]*b[k][j] template void SparseSparseComplexMultiply(T** A, int A_rows, int A_cols, T** B, int B_cols, T** C) { if (IsSparseMatrixDiagonalComplex(A,A_rows,A_cols)) return DiagSparseComplexMultiply(A,A_rows,A_cols, B,B_cols,C); // This new version of the sparse-sparse matrix multiply works // as follows. The outer loop is over the columns of B MemBlock abuffBlock(4*A_rows); T* abuff = abuffBlock.Pointer(); MemBlock dbuffBlock(2*A_rows); T* dbuff = dbuffBlock.Pointer(); for (int j=0;j Bcol(B[j],A_cols); Bcol.update(); // Zero out the accumulator memset(dbuff,0,sizeof(T)*A_rows*2); while (Bcol.more()) { T scale_real = Bcol.value_real(); T scale_imag = Bcol.value_imag(); RLEDecoderComplex Acol(A[Bcol.row()],A_rows); Acol.update(); while (Acol.more()) { dbuff[2*Acol.row()] += Acol.value_real()*scale_real - Acol.value_imag()*scale_imag; dbuff[2*Acol.row()+1] += Acol.value_imag()*scale_real + Acol.value_real()*scale_imag; Acol.advance(); } Bcol.advance(); } C[j] = CompressComplexVector(abuff,dbuff,A_rows); } } // Multiply a sparse matrix by a dense matrix (result is dense) // A normal muliply is // for i=1:Arows // for j=1:Ccols // c[i][j] = 0; // for k = 1:Acols // c[i][j] += a[i][k]*b[k][j] // If we assume c is initialized to zero, we can write: // for i=1:Arows // for j=1:Ccols // for k = 1:Acols // c[i][j] += a[i][k]*b[k][j] // And change the order of the loops: // for k=1:Acols // for j=1:Bcols // for i=1:Arows // c[i][j] += a[i][k]*b[k][j] // The inner loop is now striding over the compressed // dimension of A, which is the best case. Also, we can // apply a simple zero-check to b[k][j] and skip loops // over i for which it is zero. template void SparseDenseRealMultiply(T**A, int A_rows, int A_cols, T*B, int B_cols, T*C) { T Bval; memset(C,0,A_rows*B_cols*sizeof(T)); for (int k=0;k Acol(A[k],A_rows); Acol.update(); while (Acol.more()) { C[Acol.row()+j*A_rows] += Acol.value()*Bval; Acol.advance(); } } } } template void SparseDenseComplexMultiply(T**A, int A_rows, int A_cols, T*B, int B_cols, T*C) { T Bval_real; T Bval_imag; memset(C,0,2*A_rows*B_cols*sizeof(T)); for (int k=0;k Acol(A[k],A_rows); Acol.update(); while (Acol.more()) { int i = Acol.row(); C[2*(i+j*A_rows)] += Acol.value_real()*Bval_real - Acol.value_imag()*Bval_imag; C[2*(i+j*A_rows)+1] += Acol.value_real()*Bval_imag + Acol.value_imag()*Bval_real; Acol.advance(); } } } } // for i=1:Arows // for j=1:Ccols // for k = 1:Acols // c[i][j] += a[i][k]*b[k][j] // Multiply a dense matrix by a sparse matrix (result is dense) // If we move the I loop inside, we can use the sparsity of B // to our advantage // for j=1:Ccols // for k = 1:Acols // for i=1:Arows // c[i][j] += a[i][k]*b[k][j] template void DenseSparseRealMultiply(T* A, int A_rows, int A_cols, T** B, int B_cols, T* C) { int B_rows; memset(C,0,sizeof(T)*A_rows*B_cols); B_rows = A_cols; for (int j=0;j Bcol(B[j],B_rows); Bcol.update(); while (Bcol.more()) { T Bval = Bcol.value(); int n = Bcol.row(); for (int i=0;i void DenseSparseComplexMultiply(T* A, int A_rows, int A_cols, T** B, int B_cols, T* C) { int B_rows; memset(C,0,2*sizeof(T)*A_rows*B_cols); B_rows = A_cols; for (int j=0;j Bcol(B[j],B_rows); Bcol.update(); while (Bcol.more()) { T Bval_real = Bcol.value_real(); T Bval_imag = Bcol.value_imag(); int n = Bcol.row(); for (int i=0;i((float**)ap,rows,cols, (float*)bp,bcols,C); return C; } case FM_DOUBLE: { double *C = (double*) Malloc(rows*bcols*sizeof(double)); SparseDenseRealMultiply((double**)ap,rows,cols, (double*)bp,bcols,C); return C; } case FM_COMPLEX: { float *C = (float*) Malloc(2*rows*bcols*sizeof(float)); SparseDenseComplexMultiply((float**)ap,rows,cols, (float*)bp,bcols,C); return C; } case FM_DCOMPLEX: { double *C = (double*) Malloc(2*rows*bcols*sizeof(double)); SparseDenseComplexMultiply((double**)ap,rows,cols, (double*)bp,bcols,C); return C; } default: throw Exception("unexpected type in sparse-sparse matrix multiply"); } } void* DenseSparseMatrixMultiply(Class dclass, int rows, int cols, int bcols, const void* ap, const void* bp) { switch (dclass) { case FM_FLOAT: { float *C = (float*) Malloc(rows*bcols*sizeof(float)); DenseSparseRealMultiply((float*)ap,rows,cols, (float**)bp,bcols,C); return C; } case FM_DOUBLE: { double *C = (double*) Malloc(rows*bcols*sizeof(double)); DenseSparseRealMultiply((double*)ap,rows,cols, (double**)bp,bcols,C); return C; } case FM_COMPLEX: { float *C = (float*) Malloc(2*rows*bcols*sizeof(float)); DenseSparseComplexMultiply((float*)ap,rows,cols, (float**)bp,bcols,C); return C; } case FM_DCOMPLEX: { double *C = (double*) Malloc(2*rows*bcols*sizeof(double)); DenseSparseComplexMultiply((double*)ap,rows,cols, (double**)bp,bcols,C); return C; } default: throw Exception("unsupported type for sparse-dense multiply"); } } void* SparseSparseMatrixMultiply(Class dclass, int rows, int cols, int bcols, const void* ap, const void* bp) { switch (dclass) { case FM_FLOAT: { float **C = new float*[bcols]; SparseSparseRealMultiply((float**)ap,rows,cols, (float**)bp,bcols,C); return C; } case FM_DOUBLE: { double **C = new double*[bcols]; SparseSparseRealMultiply((double**)ap,rows,cols, (double**)bp,bcols,C); return C; } case FM_COMPLEX: { float **C = new float*[bcols]; SparseSparseComplexMultiply((float**)ap,rows,cols, (float**)bp,bcols,C); return C; } case FM_DCOMPLEX: { double **C = new double*[bcols]; SparseSparseComplexMultiply((double**)ap,rows,cols, (double**)bp,bcols,C); return C; } default: throw Exception("unsupported type for sparse-sparse complex multiply"); } } /* Type convert the given sparse array and delete it */ /* dclass is the source type, oclass is the destination class*/ void* TypeConvertSparse(Class dclass, int rows, int cols, const void *cp, Class oclass) { if (dclass == FM_LOGICAL) { switch(oclass) { case FM_INT32: return TypeConvertSparseRealReal((uint32**)cp,rows,cols); case FM_FLOAT: return TypeConvertSparseRealReal((uint32**)cp,rows,cols); case FM_DOUBLE: return TypeConvertSparseRealReal((uint32**)cp,rows,cols); case FM_COMPLEX: return TypeConvertSparseRealComplex((uint32**)cp,rows,cols); case FM_DCOMPLEX: return TypeConvertSparseRealComplex((uint32**)cp,rows,cols); default: throw Exception("unsupported type for sparse-dense multiply"); } } else if (dclass == FM_INT32) { switch(oclass) { case FM_LOGICAL: return TypeConvertSparseRealLogical((int32**)cp,rows,cols); case FM_FLOAT: return TypeConvertSparseRealReal((int32**)cp,rows,cols); case FM_DOUBLE: return TypeConvertSparseRealReal((int32**)cp,rows,cols); case FM_COMPLEX: return TypeConvertSparseRealComplex((int32**)cp,rows,cols); case FM_DCOMPLEX: return TypeConvertSparseRealComplex((int32**)cp,rows,cols); default: throw Exception("unsupported type for sparse-dense multiply"); } } else if (dclass == FM_FLOAT) { switch(oclass) { case FM_LOGICAL: return TypeConvertSparseRealLogical((float**)cp,rows,cols); case FM_INT32: return TypeConvertSparseRealReal((float**)cp,rows,cols); case FM_DOUBLE: return TypeConvertSparseRealReal((float**)cp,rows,cols); case FM_COMPLEX: return TypeConvertSparseRealComplex((float**)cp,rows,cols); case FM_DCOMPLEX: return TypeConvertSparseRealComplex((float**)cp,rows,cols); default: throw Exception("unsupported type for sparse-dense multiply"); } } else if (dclass == FM_DOUBLE) { switch(oclass) { case FM_LOGICAL: return TypeConvertSparseRealLogical((double**)cp,rows,cols); case FM_INT32: return TypeConvertSparseRealReal((double**)cp,rows,cols); case FM_FLOAT: return TypeConvertSparseRealReal((double**)cp,rows,cols); case FM_COMPLEX: return TypeConvertSparseRealComplex((double**)cp,rows,cols); case FM_DCOMPLEX: return TypeConvertSparseRealComplex((double**)cp,rows,cols); default: throw Exception("unsupported type for sparse-dense multiply"); } } else if (dclass == FM_COMPLEX) { switch(oclass) { case FM_LOGICAL: return TypeConvertSparseComplexLogical((float**)cp,rows,cols); case FM_INT32: return TypeConvertSparseComplexReal((float**)cp,rows,cols); case FM_FLOAT: return TypeConvertSparseComplexReal((float**)cp,rows,cols); case FM_DOUBLE: return TypeConvertSparseComplexReal((float**)cp,rows,cols); case FM_DCOMPLEX: return TypeConvertSparseComplexComplex((float**)cp,rows,cols); default: throw Exception("unsupported type for sparse-dense multiply"); } } else if (dclass == FM_DCOMPLEX) { switch(oclass) { case FM_LOGICAL: return TypeConvertSparseComplexLogical((double**)cp,rows,cols); case FM_INT32: return TypeConvertSparseComplexReal((double**)cp,rows,cols); case FM_FLOAT: return TypeConvertSparseComplexReal((double**)cp,rows,cols); case FM_DOUBLE: return TypeConvertSparseComplexReal((double**)cp,rows,cols); case FM_COMPLEX: return TypeConvertSparseComplexComplex((double**)cp,rows,cols); default: throw Exception("unsupported type for sparse-dense multiply"); } } throw Exception("unsupported type for TypeConvertSparse"); } // There _has_ to be a better way to do this.. =P void TrimEmpties(ArrayMatrix &m) { for (ArrayMatrix::iterator i=m.begin();i != m.end();i++) { // Scan for empties bool emptiesFound = true; while (emptiesFound) { emptiesFound = false; int j=0; while (jsize()) { if (i->at(j).isEmpty()) { i->erase(i->begin() + j); emptiesFound = true; break; } else j++; } } } bool emptiesFound = true; while (emptiesFound) { emptiesFound = false; for (ArrayMatrix::iterator i=m.begin();i != m.end();i++) { if (i->empty()) { m.erase(i); emptiesFound = true; break; } } } } // The strategy is straightforward, we loop over the output columns // We need one pointer for each row of the array matrix template void* SparseMatrixConst(int cols, ArrayMatrix m) { T** dst; dst = new T*[cols]; // The blockindx array tracks which "block" within each // row is active MemBlock blockindxBlock(m.size()); int *blockindx = blockindxBlock.Pointer(); // The colindx array tracks which column within each block // is active MemBlock colindxBlock(m.size()); int *colindx = colindxBlock.Pointer(); unsigned int j; for (j=0;j= m[j][blockindx[j]].getDimensionLength(1)) { blockindx[j]++; colindx[j] = 0; } } } return dst; } void* SparseMatrixConstructor(Class dclass, int cols, ArrayMatrix m) { TrimEmpties(m); // Precondition the arrays by converting to sparse and to // the output type for (ArrayMatrix::iterator i=m.begin();i != m.end();i++) { for (int j=0;jsize();j++) { (*i)[j].promoteType(dclass); (*i)[j].makeSparse(); } } // Now, we can construct the output array switch (dclass) { case FM_LOGICAL: return SparseMatrixConst(cols, m); case FM_INT32: return SparseMatrixConst(cols, m); case FM_FLOAT: return SparseMatrixConst(cols, m); case FM_DOUBLE: return SparseMatrixConst(cols, m); case FM_COMPLEX: return SparseMatrixConst(cols, m); case FM_DCOMPLEX: return SparseMatrixConst(cols, m); default: throw Exception("unsupported type for SparseMatrixConstructor"); } } template void RealStringExtract(const T* src, int ndx, T* dst) { int i=0; int n=1; int j; while (i void ComplexStringExtract(const T* src, int ndx, T* dst) { int i=0; int n=1; int j; while (i<2*ndx) { if ((src[n] != 0) || (src[n+1] != 0)) { i+=2; n+=2; } else { j = (int) src[n+2]; i += j*2; n += 3; } } if (i==2*ndx) { dst[0] = src[n]; dst[1] = src[n+1]; } else { dst[0] = 0; dst[1] = 0; } } template IJVEntry* ConvertSparseToIJVListReal(const T** src, int rows, int cols, int &nnz) { nnz = CountNonzerosReal(src,rows,cols); IJVEntry* mlist = new IJVEntry[nnz]; int ptr = 0; int i, j, n; for (i=0;i IJVEntry* ConvertSparseToIJVListComplex(const T** src, int rows, int cols, int &nnz) { nnz = CountNonzerosComplex(src,rows,cols); IJVEntry* mlist = new IJVEntry[nnz]; int ptr = 0; int i, j, n; for (i=0;i void* ConvertIJVtoRLEReal(IJVEntry* mlist, int nnz, int rows, int cols) { T** B; B = new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = bufferBlock.Pointer(); int ptr = 0; for (int col=0;col(buffer,mlist,nnz,ptr,col,rows); return B; } template void* ConvertIJVtoRLEComplex(IJVEntry* mlist, int nnz, int rows, int cols) { T** B; B = new T*[cols]; MemBlock bufferBlock(rows*4); T* buffer = bufferBlock.Pointer(); int ptr = 0; for (int col=0;col(buffer,mlist,nnz,ptr,col,rows); return B; } // The original implementation is poor because for a linear access of the elements // of A, it requires significant time. A better approach is to build a sorted list // of the type: // ord indx_row indx_col // and then sort by the indx_col (and subsort by indx_row). We can then loop over // the indx_col's and populate the output with the corresponding element ord. The // reason this approach is best is because of the assignment version (which is equally // complicated). // // The problem is that the output needs to be resorted by ord to build the // sparse output matrix. I think the IJV notation is in order here... // // So the process is: // 1. Expand the sparse matrix into IJV format // 2. Sort the get array by linear index // 3. Do a sorted list compare/extract --> IJV format // 4. Recode IJV --> sparse output // Now the get array has 4 pieces of information - source row & col, // and dest row & col. All of this information can be packed into // an IJVEntry... template void* GetSparseVectorSubsetsReal(int rows, int cols, const T** A, const indexType* indx, int irows, int icols) { // Convert the sparse matrix into IJV format IJVEntry* mlist; int nnz; mlist = ConvertSparseToIJVListReal(A,rows,cols,nnz); // Unpack the indexing array into IJV format IJVEntry* ilist; int icount; icount = irows*icols; ilist = new IJVEntry[icount]; for (int i=0;i(ilist,icount,irows,cols); return B; } template void* SetSparseVectorSubsetsReal(int &rows, int &cols, const T** A, const indexType* indx, int irows, int icols, const T* data, int advance) { // Convert the sparse matrix into IJV format IJVEntry* mlist; int nnz; mlist = ConvertSparseToIJVListReal(A,rows,cols,nnz); // Unpack the indexing array into IJV format IJVEntry* ilist; int icount; icount = irows*icols; // check to see if a resize is necessary... int maxindx = indx[0]; for (int i=0;i indx[i]) ? maxindx : indx[i]; // if maxindx is larger than rows*cols, we have to // vector resize if (maxindx > rows*cols) { // To vector resize, we set rows = maxindx, cols = 1, but // we also adjust the row & column index of each IJVentry // first for (int i=0;i[icount]; for (int i=0;i* dlist; dlist = new IJVEntry[icount+nnz]; // Now do a merge operation between the // indexing array and the source array. int iptr; int jptr; int optr; jptr = 0; optr = 0; for (iptr=0;iptr(dlist,optr,rows,cols); delete[] mlist; delete[] ilist; delete[] dlist; return B; } template void* SetSparseVectorSubsetsComplex(int &rows, int &cols, const T** A, const indexType* indx, int irows, int icols, const T* data, int advance) { // Convert the sparse matrix into IJV format IJVEntry* mlist; int nnz; mlist = ConvertSparseToIJVListComplex(A,rows,cols,nnz); // Unpack the indexing array into IJV format IJVEntry* ilist; int icount; icount = irows*icols; // check to see if a resize is necessary... int maxindx = indx[0]; for (int i=0;i indx[i]) ? maxindx : indx[i]; // if maxindx is larger than rows*cols, we have to // vector resize if (maxindx > rows*cols) { // To vector resize, we set rows = maxindx, cols = 1, but // we also adjust the row & column index of each IJVentry // first for (int i=0;i[icount]; for (int i=0;i* dlist; dlist = new IJVEntry[icount+nnz]; // Now do a merge operation between the // indexing array and the source array. int iptr; int jptr; int optr; jptr = 0; optr = 0; for (iptr=0;iptr(dlist,optr,rows,cols); delete[] mlist; delete[] ilist; delete[] dlist; return B; } // This implementation is poor because for a linear access of the elements // of A, it requires significant time. template void* GetSparseVectorSubsetsComplex(int &rows, int &cols, const T** A, const indexType* indx, int irows, int icols) { // Convert the sparse matrix into IJV format IJVEntry* mlist; int nnz; mlist = ConvertSparseToIJVListComplex(A,rows,cols,nnz); // Unpack the indexing array into IJV format IJVEntry* ilist; int icount; icount = irows*icols; ilist = new IJVEntry[icount]; for (int i=0;i(ilist,icount,irows,cols); return B; } // GetSparseVectorSubsets - This one is a bit difficult to do efficiently. // For each column in the output, we have to extract potentially random // elements from the source array. void* GetSparseVectorSubsets(Class dclass, int rows, int cols, const void* src, const indexType* indx, int irows, int icols) { switch (dclass) { case FM_LOGICAL: return GetSparseVectorSubsetsReal(rows, cols, (const uint32**) src, indx, irows, icols); case FM_INT32: return GetSparseVectorSubsetsReal(rows, cols, (const int32**) src, indx, irows, icols); case FM_FLOAT: return GetSparseVectorSubsetsReal(rows, cols, (const float**) src, indx, irows, icols); case FM_DOUBLE: return GetSparseVectorSubsetsReal(rows, cols, (const double**) src, indx, irows, icols); case FM_COMPLEX: return GetSparseVectorSubsetsComplex(rows, cols, (const float**) src, indx, irows, icols); case FM_DCOMPLEX: return GetSparseVectorSubsetsComplex(rows, cols, (const double**) src, indx, irows, icols); default: throw Exception("unsupported type for GetSparseVctorSubsets"); } } // SetSparseVectorSubsets - This one is a bit difficult to do efficiently. // For each column in the output, we have to extract potentially random // elements from the source array. void* SetSparseVectorSubsets(Class dclass, int &rows, int &cols, const void* src, const indexType* indx, int irows, int icols, const void* data, int advance) { switch (dclass) { case FM_LOGICAL: return SetSparseVectorSubsetsReal(rows, cols, (const uint32**) src, indx, irows, icols, (const uint32*) data, advance); case FM_INT32: return SetSparseVectorSubsetsReal(rows, cols, (const int32**) src, indx, irows, icols, (const int32*) data, advance); case FM_FLOAT: return SetSparseVectorSubsetsReal(rows, cols, (const float**) src, indx, irows, icols, (const float*) data, advance); case FM_DOUBLE: return SetSparseVectorSubsetsReal(rows, cols, (const double**) src, indx, irows, icols, (const double*) data, advance); case FM_COMPLEX: return SetSparseVectorSubsetsComplex(rows, cols, (const float**) src, indx, irows, icols, (const float*) data, advance); case FM_DCOMPLEX: return SetSparseVectorSubsetsComplex(rows, cols, (const double**) src, indx, irows, icols, (const double*) data, advance); default: throw Exception("unsupported type for SetSparseVectorSubsets"); } } template void* GetSparseColumnSubsetAssist(int cols, const T** A, const indexType*cindx, int icols) { for (int i=0;i cols)) throw Exception("out of range column index in sparse matrix expression of type A(:,n)"); T** dest; dest = new T*[icols]; for (int i=0;i(A[cindx[i]-1]); return dest; } // GetSparseNDimSubsets void* GetSparseColumnSubset(Class dclass, int cols, const void* src, const indexType* cindx, int icols) { switch(dclass) { case FM_LOGICAL: return GetSparseColumnSubsetAssist(cols, (const uint32**) src, cindx, icols); case FM_INT32: return GetSparseColumnSubsetAssist(cols, (const int32**) src, cindx, icols); case FM_FLOAT: return GetSparseColumnSubsetAssist(cols, (const float**) src, cindx, icols); case FM_DOUBLE: return GetSparseColumnSubsetAssist(cols, (const double**) src, cindx, icols); case FM_COMPLEX: return GetSparseColumnSubsetAssist(cols, (const float**) src, cindx, icols); case FM_DCOMPLEX: return GetSparseColumnSubsetAssist(cols, (const double**) src, cindx, icols); default: throw Exception("unsupported type for GetSparseColumnSubset"); } } template void* GetSparseNDimSubsetsReal(int rows, const T** A, const indexType* rindx, int irows, const indexType* cindx, int icols) { // We need to put rindx into an IJV list, we can set the J column // to 1 (it doesn't matter), and then sort the list. IJVEntry* mlist = new IJVEntry[irows]; for (int i=0;i* ilist = new IJVEntry[irows]; T** dp = new T*[icols]; MemBlock bufferBlock(2*irows); T* buffer = bufferBlock.Pointer(); for (int n=0;n Acol(A[cindx[n]-1],rows); Acol.update(); for (int m=0;m mlist[m].I) ilist[m].Vreal = 0; else ilist[m].Vreal = Acol.value(); } std::sort(ilist,ilist+irows); int ptr = 0; dp[n] = CompressRealIJV(buffer,ilist,irows,ptr,1,irows); } delete[] ilist; delete[] mlist; return dp; } template void* GetSparseNDimSubsetsComplex(int rows, const T** A, const indexType* rindx, int irows, const indexType* cindx, int icols) { // We need to put rindx into an IJV list, we can set the J column // to 1 (it doesn't matter), and then sort the list. IJVEntry* mlist = new IJVEntry[irows]; for (int i=0;i* ilist = new IJVEntry[irows]; T** dp = new T*[icols]; MemBlock bufferBlock(4*irows); T* buffer = bufferBlock.Pointer(); for (int n=0;n Acol(A[cindx[n]-1],rows); Acol.update(); for (int m=0;m mlist[m].I) { ilist[m].Vreal = 0; ilist[m].Vimag = 0; } else { ilist[m].Vreal = Acol.value_real(); ilist[m].Vimag = Acol.value_imag(); } } std::sort(ilist,ilist+irows); int ptr = 0; dp[n] = CompressComplexIJV(buffer,ilist,irows,ptr,1,irows); } delete[] ilist; delete[] mlist; return dp; } bool CheckAllRowsReference(const indexType* rindx, int rows) { for (unsigned int i=0;i cptrBlock(icols); indexType* cptr; if (cindx == NULL) { cptr = cptrBlock.Pointer(); for (int i=0;i(rows, (const uint32**) src, rindx, irows, cptr, icols); case FM_INT32: return GetSparseNDimSubsetsReal(rows, (const int32**) src, rindx, irows, cptr, icols); case FM_FLOAT: return GetSparseNDimSubsetsReal(rows, (const float**) src, rindx, irows, cptr, icols); case FM_DOUBLE: return GetSparseNDimSubsetsReal(rows, (const double**) src, rindx, irows, cptr, icols); case FM_COMPLEX: return GetSparseNDimSubsetsComplex(rows, (const float**) src, rindx, irows, cptr, icols); case FM_DCOMPLEX: return GetSparseNDimSubsetsComplex(rows, (const double**) src, rindx, irows, cptr, icols); default: throw Exception("unsupported type for GetSparseNDimSubsets"); } } template void* GetSparseScalarReal(int rows, int cols, const T** src, int rindx, int cindx) { T* ret = (T*) Malloc(sizeof(T)*1); rindx -= 1; cindx -= 1; if ((rindx<0) || (rindx >= rows)) throw Exception("out of range row index in sparse scalar indexing expression A(m,n)"); if ((cindx<0) || (cindx >= cols)) throw Exception("out of range col index in sparse scalar indexing expression A(m,n)"); RealStringExtract(src[cindx],rindx,ret); return ret; } template void* GetSparseScalarComplex(int rows, int cols, const T** src, int rindx, int cindx) { T* ret = (T*) Malloc(sizeof(T)*2); rindx -= 1; cindx -= 1; if ((rindx<0) || (rindx >= rows)) throw Exception("out of range row index in sparse scalar indexing expression A(m,n)"); if ((cindx<0) || (cindx >= cols)) throw Exception("out of range col index in sparse scalar indexing expression A(m,n)"); ComplexStringExtract(src[cindx],rindx,ret); return ret; } // GetSparseNDimSubsets void* GetSparseScalarElement(Class dclass, int rows, int cols, const void* src, indexType rindx, indexType cindx) { switch(dclass) { case FM_LOGICAL: return GetSparseScalarReal(rows, cols, (const uint32**) src, rindx, cindx); case FM_INT32: return GetSparseScalarReal(rows, cols, (const int32**) src, rindx, cindx); case FM_FLOAT: return GetSparseScalarReal(rows, cols, (const float**) src, rindx, cindx); case FM_DOUBLE: return GetSparseScalarReal(rows, cols, (const double**) src, rindx, cindx); case FM_COMPLEX: return GetSparseScalarComplex(rows, cols, (const float**) src, rindx, cindx); case FM_DCOMPLEX: return GetSparseScalarComplex(rows, cols, (const double**) src, rindx, cindx); default: throw Exception("unsupported type for GetSparseScalarElement"); } } template void* CopyResizeSparseMatrixComplex(const T** src, int rows, int cols, int maxrow, int maxcol) { T** dp; dp = new T*[maxcol]; int i; for (i=0;i void* CopyResizeSparseMatrixReal(const T** src, int rows, int cols, int maxrow, int maxcol) { T** dp; dp = new T*[maxcol]; int i; for (i=0;i((const uint32 **)Ap, rows,cols,newrows,newcols); case FM_INT32: return CopyResizeSparseMatrixReal((const int32 **)Ap, rows,cols,newrows,newcols); case FM_FLOAT: return CopyResizeSparseMatrixReal((const float **)Ap, rows,cols,newrows,newcols); case FM_DOUBLE: return CopyResizeSparseMatrixReal((const double **)Ap, rows,cols,newrows,newcols); case FM_COMPLEX: return CopyResizeSparseMatrixComplex((const float **)Ap, rows,cols,newrows,newcols); case FM_DCOMPLEX: return CopyResizeSparseMatrixComplex((const double **)Ap, rows,cols,newrows,newcols); default: throw Exception("Unsupported type in makeDenseArray"); } } // Current version is N^2... What about an O(nnz) algorithm? // We can use a merge style - for each column, we template void SetSparseNDimSubsetsReal(int rows, T** dp, const indexType* rindx, int irows, const indexType* cindx, int icols, const T* data, int advance) { // For each column... MemBlock bufferBlock(rows*2); T* buffer = bufferBlock.Pointer(); // The data buffer MemBlock databufBlock(irows); T* databuf = databufBlock.Pointer(); // We have to unscramble the rindx order IJVEntry* mlist = new IJVEntry[irows]; MemBlock keepvalBlock(irows); bool* keepval = &keepvalBlock; for (int i=0;i Acol(dp[m],rows); RLEEncoder Anew(buffer,rows); Acol.update(); // Fill out the data buffer (in descrambled order) for (int j=0;j void SetSparseNDimSubsetsComplex(int rows, T** dp, const indexType* rindx, int irows, const indexType* cindx, int icols, const T* data, int advance) { // For each column... MemBlock bufferBlock(rows*4); T* buffer = bufferBlock.Pointer(); // The data buffer MemBlock databufBlock(irows*2); T* databuf = databufBlock.Pointer(); // We have to unscramble the rindx order IJVEntry* mlist = new IJVEntry[irows]; MemBlock keepvalBlock(irows); bool* keepval = keepvalBlock.Pointer(); for (int i=0;i Acol(dp[m],rows); RLEEncoderComplex Anew(buffer,rows); Acol.update(); // Fill out the data buffer (in descrambled order) for (int j=0;j void SetSparseColumnSubsetReal(int rows, T** dp, const indexType* cindx, int icols, const T* data, int advance) { MemBlock AcolBuffer(rows); T* Acol = &AcolBuffer; MemBlock bufferBlock(2*rows); T* buffer = &bufferBlock; for (int i=0;i(buffer,Acol,rows); } } template void SetSparseColumnSubsetComplex(int rows, T** dp, const indexType* cindx, int icols, const T* data, int advance) { MemBlock AcolBlock(2*rows); T* Acol = &AcolBlock; MemBlock bufferBlock(4*rows+1); T* buffer = &bufferBlock; for (int i=0;i(buffer,Acol,rows); } } void SetSparseColumnSubset(Class dclass, int rows, const void* src, const indexType* cindx, int icols, const void *data, int advance) { switch(dclass) { case FM_LOGICAL: return SetSparseColumnSubsetReal(rows,(uint32**) src, cindx,icols,(uint32*) data, advance); case FM_INT32: return SetSparseColumnSubsetReal(rows,(int32**) src, cindx,icols,(int32*) data, advance); case FM_FLOAT: return SetSparseColumnSubsetReal(rows,(float**) src, cindx,icols,(float*) data, advance); case FM_DOUBLE: return SetSparseColumnSubsetReal(rows,(double**) src, cindx,icols,(double*) data, advance); case FM_COMPLEX: return SetSparseColumnSubsetComplex(rows,(float**) src, cindx,icols,(float*) data, advance); case FM_DCOMPLEX: return SetSparseColumnSubsetComplex(rows,(double**) src, cindx,icols,(double*) data, advance); default: throw Exception("unexpected type in setsparsecolumn subset"); } } void SetSparseNDimSubsets(Class dclass, int rows, void* src, const indexType* rindx, int irows, const indexType* cindx, int icols, const void *data, int advance) { MemBlock cptrBlock(icols); indexType* cptr; if (cindx == NULL) { cptr = &cptrBlock; for (int i=0;i(rows, (uint32**) src, rindx, irows, cptr, icols, (const uint32*) data, advance); case FM_INT32: return SetSparseNDimSubsetsReal(rows, (int32**) src, rindx, irows, cptr, icols, (const int32*) data, advance); case FM_FLOAT: return SetSparseNDimSubsetsReal(rows, (float**) src, rindx, irows, cptr, icols, (const float*) data, advance); case FM_DOUBLE: return SetSparseNDimSubsetsReal(rows, (double**) src, rindx, irows, cptr, icols, (const double*) data, advance); case FM_COMPLEX: return SetSparseNDimSubsetsComplex(rows, (float**) src, rindx, irows, cptr, icols, (const float*) data, advance); case FM_DCOMPLEX: return SetSparseNDimSubsetsComplex(rows, (double**) src, rindx, irows, cptr, icols, (const double*) data, advance); default: throw Exception("unsupported type for SetSparseNDimSubsets"); } } template void* DeleteSparseMatrixRowsComplex(int rows, int cols, const T** src, bool *dmap) { // Count the number of undeleted rows int newrow; int i; newrow = 0; for (i=0;i NBufBlock(newrow*2); T* NBuf = &NBufBlock; MemBlock OBufBlock(rows*2); T* OBuf = &OBufBlock; MemBlock bufferBlock(newrow*4); T* buffer = &bufferBlock; // Allocate the output array for (i=0;i(src[i],OBuf,rows); // Copy it int ptr = 0; for (int j=0;j(buffer,NBuf,newrow); } return dest; } template void* DeleteSparseMatrixRowsReal(int rows, int cols, const T** src, bool *dmap) { // Count the number of undeleted columns int newrow; int i; newrow = 0; for (i=0;i NBufBlock(newrow); T* NBuf = &NBufBlock; MemBlock OBufBlock(rows); T* OBuf = &OBufBlock; MemBlock bufferBlock(newrow*2); T* buffer = &bufferBlock; // Allocate the output array for (i=0;i(src[i],OBuf,rows); // Copy it int ptr = 0; for (int j=0;j(buffer,NBuf,newrow); } return dest; } template void* DeleteSparseMatrixCols(int cols, const T** src, bool *dmap) { // Count the number of undeleted columns int newcol; int i; newcol = 0; for (i=0;i(cols, (const uint32**) cp, dmap); case FM_INT32: return DeleteSparseMatrixCols(cols, (const int32**) cp, dmap); case FM_FLOAT: return DeleteSparseMatrixCols(cols, (const float**) cp, dmap); case FM_DOUBLE: return DeleteSparseMatrixCols(cols, (const double**) cp, dmap); case FM_COMPLEX: return DeleteSparseMatrixCols(cols, (const float**) cp, dmap); case FM_DCOMPLEX: return DeleteSparseMatrixCols(cols, (const double**) cp, dmap); default: throw Exception("unsupported type for DeleteSparseMatrixCols"); } } void* DeleteSparseMatrixRows(Class dclass, int rows, int cols, const void* cp, bool *dmap) { switch(dclass) { case FM_LOGICAL: return DeleteSparseMatrixRowsReal(rows, cols, (const uint32**) cp, dmap); case FM_INT32: return DeleteSparseMatrixRowsReal(rows, cols, (const int32**) cp, dmap); case FM_FLOAT: return DeleteSparseMatrixRowsReal(rows, cols, (const float**) cp, dmap); case FM_DOUBLE: return DeleteSparseMatrixRowsReal(rows, cols, (const double**) cp, dmap); case FM_COMPLEX: return DeleteSparseMatrixRowsComplex(rows, cols, (const float**) cp, dmap); case FM_DCOMPLEX: return DeleteSparseMatrixRowsComplex(rows, cols, (const double**) cp, dmap); default: throw Exception("unsupported type for DeleteSparseMatrixRows"); } } template void* DeleteSparseMatrixVectorReal(int &rows, int &cols, const T** src, const indexType* ndx, int delete_len) { // Get the source matrix as an IJV array int nnz; IJVEntry* mlist = ConvertSparseToIJVListReal(src,rows,cols,nnz); // reshape the IJV list into a vector int i; for (i=0;i ilistBlock(delete_len); int* ilist = &ilistBlock; // Copy the index info in for (i=0;ii+1) { for (int k=j;k(mlist,nnz,rows,1); delete mlist; return dst; } template void* DeleteSparseMatrixVectorComplex(int &rows, int &cols, const T** src, const indexType* ndx, int delete_len) { // Get the source matrix as an IJV array int nnz; IJVEntry* mlist = ConvertSparseToIJVListComplex(src,rows,cols,nnz); // reshape the IJV list into a vector int i; for (i=0;i ilistBlock(delete_len); int* ilist = &ilistBlock; // Copy the index info in for (i=0;ii+1) { for (int k=j;k(mlist,nnz,rows,1); delete mlist; return dst; } void* DeleteSparseMatrixVectorSubset(Class dclass, int &rows, int &cols, const void* cp, const indexType* ndx, int delete_len) { switch(dclass) { case FM_LOGICAL: return DeleteSparseMatrixVectorReal(rows, cols, (const uint32**) cp, ndx, delete_len); case FM_INT32: return DeleteSparseMatrixVectorReal(rows, cols, (const int32**) cp, ndx, delete_len); case FM_FLOAT: return DeleteSparseMatrixVectorReal(rows, cols, (const float**) cp, ndx, delete_len); case FM_DOUBLE: return DeleteSparseMatrixVectorReal(rows, cols, (const double**) cp, ndx, delete_len); case FM_COMPLEX: return DeleteSparseMatrixVectorComplex(rows, cols, (const float**) cp, ndx, delete_len); case FM_DCOMPLEX: return DeleteSparseMatrixVectorComplex(rows, cols, (const double**) cp, ndx, delete_len); default: throw Exception("unsupported type for DeleteSparseMatrixVectorSubset"); } } template void* GetSparseDiagonalReal(int rows, int cols, const T** src, int diagonalOrder) { T** dst; dst = new T*[1]; int outLen; if (diagonalOrder < 0) { outLen = (rows+diagonalOrder) < cols ? (rows+diagonalOrder) : cols; MemBlock bufferBlock(outLen); T* buffer = &bufferBlock; MemBlock abufBlock(outLen*2); T* abuf = &abufBlock; for (int j=0;j(src[j],j-diagonalOrder,buffer+j); dst[0] = CompressRealVector(abuf,buffer,outLen); } else { outLen = rows < (cols-diagonalOrder) ? rows : (cols-diagonalOrder); MemBlock bufferBlock(outLen); T* buffer = &bufferBlock; MemBlock abufBlock(outLen*2); T* abuf = &abufBlock; for (int j=0;j(src[diagonalOrder+j],j,buffer+j); dst[0] = CompressRealVector(abuf,buffer,outLen); } return dst; } template void* GetSparseDiagonalComplex(int rows, int cols, const T** src, int diagonalOrder) { T** dst; dst = new T*[1]; int outLen; if (diagonalOrder < 0) { outLen = (rows+diagonalOrder) < cols ? (rows+diagonalOrder) : cols; MemBlock bufferBlock(2*outLen); T* buffer = &bufferBlock; MemBlock abufBlock(4*outLen); T* abuf = &abufBlock; for (int j=0;j bufferBlock(2*outLen); T* buffer = &bufferBlock; MemBlock abufBlock(4*outLen); T* abuf = &abufBlock; for (int j=0;j(rows, cols, (const uint32**) cp, diag_order); case FM_INT32: return GetSparseDiagonalReal(rows, cols, (const int32**) cp, diag_order); case FM_FLOAT: return GetSparseDiagonalReal(rows, cols, (const float**) cp, diag_order); case FM_DOUBLE: return GetSparseDiagonalReal(rows, cols, (const double**) cp, diag_order); case FM_COMPLEX: return GetSparseDiagonalComplex(rows, cols, (const float**) cp, diag_order); case FM_DCOMPLEX: return GetSparseDiagonalComplex(rows, cols, (const double**) cp, diag_order); default: throw Exception("unsupported type for GetSparseDiagonal"); } } template bool TestSparseNotFinite(int cols, const T** src) { for (int i=0;i(cols, (const float**) cp); case FM_DOUBLE: return TestSparseNotFinite(cols, (const double**) cp); case FM_COMPLEX: return TestSparseNotFinite(cols, (const float**) cp); case FM_DCOMPLEX: return TestSparseNotFinite(cols, (const double**) cp); default: throw Exception("unsupported type for SparseAnyNotFinite"); } } template void* SparseArrayTransposeReal(int rows, int cols, const T** src) { int nnz; IJVEntry* mlist = ConvertSparseToIJVListReal(src, rows, cols, nnz); for (int i=0;i(mlist,nnz,cols,rows); delete mlist; return B; } template void* SparseArrayTransposeComplex(int rows, int cols, const T** src) { int nnz; IJVEntry* mlist = ConvertSparseToIJVListComplex(src, rows, cols, nnz); for (int i=0;i(mlist,nnz,cols,rows); delete mlist; return B; } template void* SparseArrayHermitianComplex(int rows, int cols, const T** src) { int nnz; IJVEntry* mlist = ConvertSparseToIJVListComplex(src, rows, cols, nnz); for (int i=0;i(mlist,nnz,cols,rows); delete mlist; return B; } void* SparseArrayTranspose(Class dclass, int rows, int cols, const void* cp) { switch(dclass) { case FM_LOGICAL: return SparseArrayTransposeReal(rows, cols, (const uint32**) cp); case FM_INT32: return SparseArrayTransposeReal(rows, cols, (const int32**) cp); case FM_FLOAT: return SparseArrayTransposeReal(rows, cols, (const float**) cp); case FM_DOUBLE: return SparseArrayTransposeReal(rows, cols, (const double**) cp); case FM_COMPLEX: return SparseArrayTransposeComplex(rows, cols, (const float**) cp); case FM_DCOMPLEX: return SparseArrayTransposeComplex(rows, cols, (const double**) cp); default: throw Exception("unsupported type for SparseArrayTranspose"); } } void* SparseArrayHermitian(Class dclass, int rows, int cols, const void* cp) { switch(dclass) { case FM_COMPLEX: return SparseArrayHermitianComplex(rows, cols, (const float**) cp); case FM_DCOMPLEX: return SparseArrayHermitianComplex(rows, cols, (const double**) cp); default: throw Exception("unsupported type for SparseArrayHermitian"); } } template void* SparseAddReal(int rows, int cols,const T** Amat,const T** Bmat) { int i; T** Cmat; Cmat = new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = &bufferBlock; for (i=0;i A(Amat[i],rows); RLEDecoder B(Bmat[i],rows); RLEEncoder C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); C.push(A.value() + B.value()); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); C.push(A.value()); A.advance(); } else { C.set(B.row()); C.push(B.value()); B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template void* SparseAddComplex(int rows, int cols,const T** Amat,const T** Bmat) { int i; T** Cmat; Cmat = new T*[cols]; MemBlock bufferBlock(rows*4); T* buffer = &bufferBlock; for (i=0;i A(Amat[i],rows); RLEDecoderComplex B(Bmat[i],rows); RLEEncoderComplex C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); C.push(A.value_real() + B.value_real(), A.value_imag() + B.value_imag()); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); C.push(A.value_real(),A.value_imag()); A.advance(); } else { C.set(B.row()); C.push(B.value_real(),B.value_imag()); B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } void* SparseSparseAdd(Class dclass, const void *ap, int rows, int cols, const void *bp) { switch(dclass) { case FM_INT32: return SparseAddReal(rows, cols, (const int32**) ap, (const int32**) bp); case FM_FLOAT: return SparseAddReal(rows, cols, (const float**) ap, (const float**) bp); case FM_DOUBLE: return SparseAddReal(rows, cols, (const double**) ap, (const double**) bp); case FM_COMPLEX: return SparseAddComplex(rows, cols, (const float**) ap, (const float**) bp); case FM_DCOMPLEX: return SparseAddComplex(rows, cols, (const double**) ap, (const double**) bp); default: throw Exception("unsupported type for SparseSparseAdd"); } } template void* SparseSubtractReal(int rows, int cols,const T** Amat,const T** Bmat) { int i; T** Cmat; Cmat = new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = &bufferBlock; for (i=0;i A(Amat[i],rows); RLEDecoder B(Bmat[i],rows); RLEEncoder C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); C.push(A.value() - B.value()); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); C.push(A.value()); A.advance(); } else { C.set(B.row()); C.push(-B.value()); B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template void* SparseSubtractComplex(int rows, int cols,const T** Amat,const T** Bmat) { int i; T** Cmat; Cmat = new T*[cols]; MemBlock bufferBlock(rows*4); T* buffer = &bufferBlock; for (i=0;i A(Amat[i],rows); RLEDecoderComplex B(Bmat[i],rows); RLEEncoderComplex C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); C.push(A.value_real() - B.value_real(), A.value_imag() - B.value_imag()); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); C.push(A.value_real(),A.value_imag()); A.advance(); } else { C.set(B.row()); C.push(-B.value_real(),-B.value_imag()); B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } void* SparseSparseSubtract(Class dclass, const void *ap, int rows, int cols, const void *bp) { switch(dclass) { case FM_INT32: return SparseSubtractReal(rows, cols, (const int32**) ap, (const int32**) bp); case FM_FLOAT: return SparseSubtractReal(rows, cols, (const float**) ap, (const float**) bp); case FM_DOUBLE: return SparseSubtractReal(rows, cols, (const double**) ap, (const double**) bp); case FM_COMPLEX: return SparseSubtractComplex(rows, cols, (const float**) ap, (const float**) bp); case FM_DCOMPLEX: return SparseSubtractComplex(rows, cols, (const double**) ap, (const double**) bp); default: throw Exception("unsupported type for SparseSparseSubtract"); } } template void* SparseMultiplyReal(int rows, int cols,const T** Amat,const T** Bmat) { int i; T** Cmat; Cmat = new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = &bufferBlock; for (i=0;i A(Amat[i],rows); RLEDecoder B(Bmat[i],rows); RLEEncoder C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); C.push(A.value()*B.value()); A.advance(); B.advance(); } else if (A.row() < B.row()) { A.advance(); } else { B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template void* SparseMultiplyComplex(int rows, int cols,const T** Amat,const T** Bmat) { int i; T** Cmat; Cmat = new T*[cols]; MemBlock bufferBlock(rows*4); T* buffer = &bufferBlock; for (i=0;i A(Amat[i],rows); RLEDecoderComplex B(Bmat[i],rows); RLEEncoderComplex C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); C.push(A.value_real()*B.value_real()- A.value_imag()*B.value_imag(), A.value_real()*B.value_imag()+ A.value_imag()*B.value_real()); A.advance(); B.advance(); } else if (A.row() < B.row()) { A.advance(); } else { B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } void* SparseSparseMultiply(Class dclass, const void *ap, int rows, int cols, const void *bp) { switch(dclass) { case FM_INT32: return SparseMultiplyReal(rows, cols, (const int32**) ap, (const int32**) bp); case FM_FLOAT: return SparseMultiplyReal(rows, cols, (const float**) ap, (const float**) bp); case FM_DOUBLE: return SparseMultiplyReal(rows, cols, (const double**) ap, (const double**) bp); case FM_COMPLEX: return SparseMultiplyComplex(rows, cols, (const float**) ap, (const float**) bp); case FM_DCOMPLEX: return SparseMultiplyComplex(rows, cols, (const double**) ap, (const double**) bp); default: throw Exception("unsupported type for SparseSparseMultiply"); } } template void* SparseScalarMultiplyReal(int rows, int cols, const T** Amat, const T* Bval) { T** Cmat; Cmat = new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = &bufferBlock; for (int i=0;i A(Amat[i],rows); RLEEncoder C(buffer,rows); A.update(); while (A.more()) { C.set(A.row()); C.push(A.value()*Bval[0]); A.advance(); } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template void* SparseScalarMultiplyComplex(int rows, int cols, const T** Amat, const T* Bval) { T** Cmat; Cmat = new T*[cols]; MemBlock bufferBlock(rows*4); T* buffer = &bufferBlock; for (int i=0;i A(Amat[i],rows); RLEEncoderComplex C(buffer,rows); A.update(); while (A.more()) { C.set(A.row()); C.push(A.value_real()*Bval[0]-A.value_imag()*Bval[1], A.value_real()*Bval[1]+A.value_imag()*Bval[0]); A.advance(); } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template float** SparseOnesFuncReal(int rows, int cols, const T** Amat) { float** Cmat; Cmat = new float*[cols]; MemBlock bufferBlock(rows*2); float* buffer = &bufferBlock; for (int i=0;i A(Amat[i],rows); RLEEncoder C(buffer,rows); A.update(); while (A.more()) { C.set(A.row()); C.push(1.0f); A.advance(); } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template float** SparseOnesFuncComplex(int rows, int cols, const T** Amat) { float** Cmat; Cmat = new float*[cols]; MemBlock bufferBlock(rows*2); float* buffer = &bufferBlock; for (int i=0;i A(Amat[i],rows); RLEEncoder C(buffer,rows); A.update(); while (A.more()) { C.set(A.row()); C.push(1.0f); A.advance(); } C.end(); Cmat[i] = C.copyout(); } return Cmat; } void* SparseOnesFunc(Class dclass, int Arows, int Acols, const void *Ap) { switch(dclass) { case FM_LOGICAL: return SparseOnesFuncReal(Arows,Acols,(const uint32**)Ap); case FM_INT32: return SparseOnesFuncReal(Arows,Acols,(const int32**)Ap); case FM_FLOAT: return SparseOnesFuncReal(Arows,Acols,(const float**)Ap); case FM_DOUBLE: return SparseOnesFuncReal(Arows,Acols,(const double**)Ap); case FM_COMPLEX: return SparseOnesFuncComplex(Arows,Acols,(const float**)Ap); case FM_DCOMPLEX: return SparseOnesFuncComplex(Arows,Acols,(const double**)Ap); default: throw Exception("unsupported type for SparseOnesFunc"); } } void* SparseScalarMultiply(Class dclass, const void *ap, int rows, int cols, const void *bp) { switch(dclass) { case FM_INT32: return SparseScalarMultiplyReal(rows, cols, (const int32**) ap, (const int32*) bp); case FM_FLOAT: return SparseScalarMultiplyReal(rows, cols, (const float**) ap, (const float*) bp); case FM_DOUBLE: return SparseScalarMultiplyReal(rows, cols, (const double**) ap, (const double*) bp); case FM_COMPLEX: return SparseScalarMultiplyComplex(rows, cols, (const float**) ap, (const float*) bp); case FM_DCOMPLEX: return SparseScalarMultiplyComplex(rows, cols, (const double**) ap, (const double*) bp); default: throw Exception("unsupported type for SparseScalarMultiply"); } } int ConvertSparseCCSReal(int rows, int cols, const double **Ap, int* &Acolstart, int* &Arowindx, double* &Adata) { // Get number of nonzeros int nnz = CountNonzerosReal(Ap,rows,cols); // Set up the output arrays Acolstart = new int[cols+1]; Arowindx = new int[nnz]; Adata = new double[nnz]; // We have to unpack the matrix into these arrays... we do this // by traversing the columns int m = 0; Acolstart[0] = 0; for (int i=0;i A(Ap[i],rows); A.update(); while (A.more()) { Arowindx[m] = A.row(); Adata[m++] = A.value(); A.advance(); } Acolstart[i+1] = m; } return nnz; } int ConvertSparseCCSComplex(int rows, int cols, const double **Ap, int* &Acolstart, int* &Arowindx, double* &Adata, double* &Aimag) { // Get number of nonzeros int nnz = CountNonzerosComplex(Ap,rows,cols); // Set up the output arrays Acolstart = new int[cols+1]; Arowindx = new int[nnz]; Adata = new double[nnz]; Aimag = new double[nnz]; // We have to unpack the matrix into these arrays... we do this // by traversing the columns int m = 0; Acolstart[0] = 0; for (int i=0;i A(Ap[i],rows); A.update(); while (A.more()) { Arowindx[m] = A.row(); Adata[m] = A.value_real(); Aimag[m++] = A.value_imag(); A.advance(); } Acolstart[i+1] = m; } return nnz; } void* SparseSolveLinEqReal(int Arows, int Acols, const void *Ap, int Brows, int Bcols, const void *Bp) { #if HAVE_UMFPACK // Convert A into CCS form int *Acolstart; int *Arowindx; double *Adata; int nnz; nnz = ConvertSparseCCSReal(Arows, Acols, (const double**) Ap, Acolstart, Arowindx, Adata); double *null = (double *) NULL ; void *Symbolic, *Numeric ; (void) umfpack_di_symbolic (Acols, Acols, Acolstart, Arowindx, Adata, &Symbolic, null, null); (void) umfpack_di_numeric (Acolstart, Arowindx, Adata, Symbolic, &Numeric, null, null) ; umfpack_di_free_symbolic (&Symbolic) ; double *x = (double*) Malloc(sizeof(double)*Arows*Bcols); const double *b = (const double*) Bp; for (int i=0;i* ConvertCCSToIJVListReal(int *Ap, int *Ai, double *Ax, int Acols, int Anz) { IJVEntry* T = new IJVEntry[Anz]; int i, j, p; p = 0; for (i=0;i* ConvertCCSToIJVListComplex(int *Ap, int *Ai, double *Ax, double *Ay, int Acols, int Anz) { IJVEntry* T = new IJVEntry[Anz]; int i, j, p; p = 0; for (i=0;i* llist = ConvertCCSToIJVListReal(Lp,Lj,Lx,Arows,lnz); for (int j=0;j* ulist = ConvertCCSToIJVListReal(Up,Ui,Ux,Acols,unz); IJVEntry* rlist = new IJVEntry[Arows]; std::sort(llist,llist+lnz); for (int i=0;i(llist,lnz,Arows,Amid),true)); retval.push_back(Array(FM_DOUBLE,Dimensions(Amid,Acols), ConvertIJVtoRLEReal(ulist,unz,Amid,Acols),true)); retval.push_back(Array(FM_INT32,Dimensions(1,Arows),P,false)); retval.push_back(Array(FM_INT32,Dimensions(1,Acols),Q,false)); retval.push_back(Array(FM_DOUBLE,Dimensions(Arows,Arows), ConvertIJVtoRLEReal(rlist,Arows,Arows,Arows),true)); umfpack_di_free_symbolic(&Symbolic); umfpack_di_free_numeric(&Numeric); delete[] rlist; delete[] ulist; delete[] llist; delete[] Rs; delete[] Ux; delete[] Ui; delete[] Up; delete[] Lx; delete[] Lj; delete[] Lp; delete[] Acolstart; delete[] Arowindx; delete[] Adata; return retval; #else throw Exception("LU Decompositions of sparse matrices requires UMFPACK support, which was not available at compile time. You must have UMFPACK installed at compile time for FreeMat to enable this functionality."); #endif } ArrayVector SparseLUDecomposeComplex(int Arows, int Acols, const void *Ap) { #if HAVE_UMFPACK // Convert A into CCS form int *Acolstart; int *Arowindx; double *Adata; double *Aimag; int nnz; nnz = ConvertSparseCCSComplex(Arows, Acols, (const double**) Ap, Acolstart, Arowindx, Adata, Aimag); double *null = (double *) NULL ; void *Symbolic, *Numeric ; (void) umfpack_zi_symbolic (Acols, Acols, Acolstart, Arowindx, Adata, Aimag, &Symbolic, null, null); (void) umfpack_zi_numeric (Acolstart, Arowindx, Adata, Aimag, Symbolic, &Numeric, null, null); // Set up the output arrays for the LU Decomposition. // The first matrix is L, which is stored in comprssed row form. int lnz; int unz; int n_row; int n_col; int nz_udiag; (void) umfpack_zi_get_lunz(&lnz,&unz,&n_row,&n_col,&nz_udiag,Numeric); int *Lp = new int[Arows+1]; int *Lj = new int[lnz]; double *Lx = new double[lnz]; double *Ly = new double[lnz]; int *Up = new int[Acols+1]; int *Ui = new int[unz]; double *Ux = new double[unz]; double *Uy = new double[unz]; int32 *P = (int32*) Malloc(sizeof(int32)*Arows); int32 *Q = (int32*) Malloc(sizeof(int32)*Acols); double *Rs = new double[Arows]; int do_recip; umfpack_zi_get_numeric(Lp, Lj, Lx, Ly, Up, Ui, Ux, Uy, P, Q, NULL, NULL, &do_recip, Rs, Numeric); for (int i=0;i* llist = ConvertCCSToIJVListComplex(Lp,Lj,Lx,Ly,Arows,lnz); for (int j=0;j* ulist = ConvertCCSToIJVListComplex(Up,Ui,Ux,Uy,Acols,unz); IJVEntry* rlist = new IJVEntry[Arows]; std::sort(llist,llist+lnz); for (int i=0;i(llist,lnz,Arows,Amid),true)); retval.push_back(Array(FM_DCOMPLEX,Dimensions(Amid,Acols), ConvertIJVtoRLEComplex(ulist,unz,Amid,Acols),true)); retval.push_back(Array(FM_INT32,Dimensions(1,Arows),P,false)); retval.push_back(Array(FM_INT32,Dimensions(1,Acols),Q,false)); retval.push_back(Array(FM_DOUBLE,Dimensions(Arows,Arows), ConvertIJVtoRLEReal(rlist,Arows,Arows,Arows),true)); umfpack_di_free_symbolic(&Symbolic); umfpack_di_free_numeric(&Numeric); delete[] rlist; delete[] ulist; delete[] llist; delete[] Rs; delete[] Uy; delete[] Ux; delete[] Ui; delete[] Up; delete[] Ly; delete[] Lx; delete[] Lj; delete[] Lp; delete[] Acolstart; delete[] Arowindx; delete[] Adata; return retval; #else throw Exception("LU Decompositions of sparse matrices requires UMFPACK support, which was not available at compile time. You must have UMFPACK installed at compile time for FreeMat to enable this functionality."); #endif } ArrayVector SparseLUDecompose(int nargout, Array A) { if ((A.dataClass() == FM_FLOAT) || (A.dataClass() == FM_COMPLEX)) throw Exception("FreeMat currently only supports the LU decomposition for double and dcomplex matrices"); if (A.dataClass() < FM_FLOAT) A.promoteType(FM_DOUBLE); int Arows; int Acols; Arows = A.getDimensionLength(0); Acols = A.getDimensionLength(1); if (Arows != Acols) throw Exception("FreeMat currently only supports LU decompositions for square matrices"); if (A.dataClass() == FM_DOUBLE) return SparseLUDecomposeReal(Arows, Acols, A.getSparseDataPointer()); else return SparseLUDecomposeComplex(Arows, Acols, A.getSparseDataPointer()); } template bool SparseIsSymmetricReal(int rows, int cols, const T** src) { int nnz; IJVEntry* mlist = ConvertSparseToIJVListReal(src, rows, cols, nnz); IJVEntry* plist = new IJVEntry[nnz]; // Transpose the array for (int i=0;i residBlock(n); double *resid = &residBlock; int ncv = 2*nev+1; if (ncv > n) ncv = n; MemBlock vBlock(n*ncv); double *v = &vBlock; int ldv = n; int iparam[11]; iparam[0] = 1; iparam[1] = 0; iparam[2] = 300; iparam[3] = 1; iparam[4] = 0; iparam[5] = 0; iparam[6] = 1; iparam[7] = 0; iparam[8] = 0; iparam[9] = 0; iparam[10] = 0; MemBlock workdBlock(3*n); double *workd = &workdBlock; int lworkl = 3*ncv*ncv+6*ncv; MemBlock worklBlock(lworkl); double *workl = &worklBlock; int info = 0; MemBlock ipntrBlock(14); int *ipntr = &ipntrBlock; while (1) { dnaupd_(&ido, &bmat, &n , which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info,1,which.size()); if ((ido == -1) || (ido == 1)) SparseDenseRealMultiply((double**)ap,rows,cols, workd+ipntr[0]-1, 1, workd+ipntr[1]-1); else break; } if (info < 0) DNAUPARPACKError(info); // Compute vectors and values int rvec; if (nargout <= 1) rvec = 0; else rvec = 1; char howmny = 'A'; MemBlock selectBlock(ncv); int *select = &selectBlock; double *dr = (double*) Malloc(sizeof(double)*(nev+1)); double *di = (double*) Malloc(sizeof(double)*(nev+1)); double *z; if (nargout <= 1) z = NULL; else z = (double*) Malloc(sizeof(double)*(n*(nev+1))); double sigmar; double sigmai; MemBlock workevBlock(3*ncv); double *workev = &workevBlock; int ierr; dneupd_(&rvec, &howmny, select, dr, di, z, &ldv, &sigmar, &sigmai, workev, &bmat, &n, which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &ierr); int nconv = iparam[4]; if (ierr != 0) DNEUPARPACKError(ierr); // Reverse the vectors dr and di if (rvec == 0) { for (int i=0;i<(nconv)/2;i++) { swap(dr[i],dr[nconv-1-i]); swap(di[i],di[nconv-1-i]); } } // Check for complex eigenvalues bool anycomplex = false; for (int i=0;(!anycomplex) && (i 1) { eigvecs = (double*) Malloc(nev*n*sizeof(double)*2); // if eigenvalue i is complex, then the corresponding eigenvector // should be constructed from columns i and i+1 of z if i is even // and columns i-1 and i of z if i is odd int vcol = 0; while (vcol < min(nconv,nev)) { if (di[vcol] != 0) { for (int j=0;j residBlock(n); double *resid = &residBlock; int ncv = 2*nev+1; if (ncv > n) ncv = n; MemBlock vBlock(n*ncv); double *v = &vBlock; int ldv = n; int iparam[11]; iparam[0] = 1; iparam[1] = 0; iparam[2] = 300; iparam[3] = 1; iparam[4] = 0; iparam[5] = 0; iparam[6] = 1; iparam[7] = 0; iparam[8] = 0; iparam[9] = 0; iparam[10] = 0; MemBlock workdBlock(3*n); double *workd = &workdBlock; int lworkl = ncv*ncv+8*ncv; MemBlock worklBlock(lworkl); double *workl = &worklBlock; int info = 0; int ipntr[11]; while (1) { dsaupd_(&ido, &bmat, &n , which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); if ((ido == -1) || (ido == 1)) SparseDenseRealMultiply((double**)ap,rows,cols, workd+ipntr[0]-1, 1, workd+ipntr[1]-1); else break; } if (info < 0) DNAUPARPACKError(info); // Compute vectors and values int rvec; if (nargout <= 1) rvec = 0; else rvec = 1; char howmny = 'A'; MemBlock selectBlock(ncv); int *select = &selectBlock; double *d = (double*) Malloc(sizeof(double)*nev); double *z; if (nargout <= 1) z = NULL; else z = (double*) Malloc(sizeof(double)*(n*nev)); double sigma; int ierr; dseupd_(&rvec, &howmny, select, d,z, &ldv, &sigma,&bmat, &n, which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &ierr); int nconv = iparam[4]; if (ierr != 0) DNEUPARPACKError(ierr); // Reverse the vectors dr and di for (int i=0;i<(nconv)/2;i++) swap(d[i],d[nconv-1-i]); if (rvec == 1) { for (int i=0;i<(nconv)/2;i++) for (int j=0;j residBlock(2*n); double *resid = &residBlock; int ncv = 2*nev+1; if (ncv > n) ncv = n; MemBlock vBlock(2*n*ncv); double *v = &vBlock; int ldv = n; int iparam[11]; iparam[0] = 1; iparam[1] = 0; iparam[2] = 300; iparam[3] = 1; iparam[4] = 0; iparam[5] = 0; iparam[6] = 1; iparam[7] = 0; iparam[8] = 0; iparam[9] = 0; iparam[10] = 0; MemBlock workdBlock(2*3*n); double *workd = &workdBlock; int lworkl = 3*ncv*ncv+5*ncv; MemBlock worklBlock(2*lworkl); double *workl = &worklBlock; MemBlock rworkBlock(ncv); double *rwork = &rworkBlock; int info = 0; int ipntr[14]; while (1) { znaupd_(&ido, &bmat, &n , which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, rwork, &info); if ((ido == -1) || (ido == 1)) SparseDenseComplexMultiply((double**)ap,rows,cols, workd+2*(ipntr[0]-1), 1, workd+2*(ipntr[1]-1)); else break; } if (info < 0) DNAUPARPACKError(info); // Compute vectors and values int rvec; if (nargout <= 1) rvec = 0; else rvec = 1; char howmny = 'A'; MemBlock selectBlock(ncv); int *select = &selectBlock; double *d = (double*) Malloc(2*sizeof(double)*(nev+1)); double *z; if (nargout <= 1) z = NULL; else z = (double*) Malloc(2*sizeof(double)*(n*(nev+1))); double sigma[2]; MemBlock workevBlock(2*2*ncv); double *workev = &workevBlock; int ierr; zneupd_(&rvec, &howmny, select, d, z, &ldv, sigma, workev, &bmat, &n, which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, rwork, &ierr); int nconv = iparam[4]; if (ierr != 0) DNEUPARPACKError(ierr); // Reverse the vectors dr and di if (rvec == 0) { for (int i=0;i<(nconv)/2;i++) { swap(d[2*i],d[2*(nconv-1-i)]); swap(d[2*i+1],d[2*(nconv-1-i)+1]); } } double *eigvals = (double*) Malloc(nev*sizeof(double)*2); for (int i=0;i 1) { eigvecs = (double*) Malloc(nev*n*sizeof(double)*2); for (int i=0;i(shift, rows); // Compute A - scI double** C = (double**) SparseSubtractReal(rows,cols,(const double**) ap,(const double**) scI); // Factor it... // Convert C into CCS form int *Ccolstart; int *Crowindx; double *Cdata; int nnz; nnz = ConvertSparseCCSReal(rows, cols, (const double**) C, Ccolstart, Crowindx, Cdata); double *null = (double *) NULL ; void *Symbolic, *Numeric ; int res; res = umfpack_di_symbolic (cols, cols, Ccolstart, Crowindx, Cdata, &Symbolic, null, null); res = umfpack_di_numeric (Ccolstart, Crowindx, Cdata, Symbolic, &Numeric, null, null); umfpack_di_free_symbolic (&Symbolic); // Initialization call int ido = 0; char bmat = 'I'; int n = rows; string which = "LM"; // How many eigenvalues to compute char cmach = 'E'; double tol = dlamch_(&cmach); MemBlock residBlock(n); double *resid = &residBlock; int ncv = 2*nev+1; if (ncv > n) ncv = n; MemBlock vBlock(n*ncv); double *v = &vBlock; int ldv = n; int iparam[11]; iparam[0] = 1; iparam[1] = 0; iparam[2] = 300; iparam[3] = 1; iparam[4] = 0; iparam[5] = 0; iparam[6] = 3; iparam[7] = 0; iparam[8] = 0; iparam[9] = 0; iparam[10] = 0; MemBlock workdBlock(3*n); double *workd = &workdBlock; int lworkl = 3*ncv*ncv+6*ncv; MemBlock worklBlock(lworkl); double *workl = &worklBlock; int info = 0; int ipntr[14]; while (1) { dnaupd_(&ido, &bmat, &n , which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info,1,which.size()); if ((ido == -1) || (ido == 1)) { res = umfpack_di_solve(UMFPACK_A, Ccolstart, Crowindx, Cdata, workd+ipntr[1]-1,workd+ipntr[0]-1,Numeric, null, null); // Check the result MemBlock g(cols); double *gp = &g; MemBlock r(rows); double *rp = &r; memcpy(rp,workd+ipntr[1]-1,sizeof(double)*rows); memcpy(gp,workd+ipntr[0]-1,sizeof(double)*cols); MemBlock c(rows); SparseDenseRealMultiply(C,rows,cols,rp,1,&c); } else if (ido == 2) memcpy( workd+ipntr[1]-1, workd+ipntr[0]-1, sizeof(double)*rows); else break; } // Free the numeric component umfpack_di_free_numeric(&Numeric); if (info < 0) DNAUPARPACKError(info); // Compute vectors and values int rvec; if (nargout <= 1) rvec = 0; else rvec = 1; char howmny = 'A'; MemBlock selectBlock(ncv); int *select = &selectBlock; //lambda_a = 1/lambda_c + sigma double *dr = (double*) Malloc(sizeof(double)*(nev+1)); double *di = (double*) Malloc(sizeof(double)*(nev+1)); double *z; if (nargout <= 1) z = NULL; else z = (double*) Malloc(sizeof(double)*(n*(nev+1))); double sigmar; double sigmai; sigmar = shift; sigmai = 0.0; MemBlock workevBlock(3*ncv); double *workev = &workevBlock; int ierr; dneupd_(&rvec, &howmny, select, dr, di, z, &ldv, &sigmar, &sigmai, workev, &bmat, &n, which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &ierr); int nconv = iparam[4]; if (ierr != 0) DNEUPARPACKError(ierr); // Reverse the vectors dr and di if (rvec == 0) { for (int i=0;i<(nconv)/2;i++) { swap(dr[i],dr[nconv-1-i]); swap(di[i],di[nconv-1-i]); } } // Check for complex eigenvalues bool anycomplex = false; for (int i=0;(!anycomplex) && (i 1) { eigvecs = (double*) Malloc(nev*n*sizeof(double)*2); // if eigenvalue i is complex, then the corresponding eigenvector // should be constructed from columns i and i+1 of z if i is even // and columns i-1 and i of z if i is odd int vcol = 0; while (vcol < min(nconv,nev)) { if (di[vcol] != 0) { for (int j=0;j(shift[0], shift[1], rows); // Compute A - scI double** C = (double**) SparseSubtractComplex(rows,cols,(const double**) ap,(const double**) scI); // Factor it... // Convert C into CCS form int *Ccolstart; int *Crowindx; double *Cdata; double *Cimag; int nnz; nnz = ConvertSparseCCSComplex(rows, cols, (const double**) C, Ccolstart, Crowindx, Cdata, Cimag); double *null = (double *) NULL ; void *Symbolic, *Numeric ; int res; res = umfpack_zi_symbolic (cols, cols, Ccolstart, Crowindx, Cdata, Cimag, &Symbolic, null, null); res = umfpack_zi_numeric (Ccolstart, Crowindx, Cdata, Cimag, Symbolic, &Numeric, null, null); umfpack_zi_free_symbolic (&Symbolic); // Initialization call int ido = 0; char bmat = 'I'; int n = rows; string which = "LM"; // How many eigenvalues to compute char cmach = 'E'; double tol = dlamch_(&cmach); MemBlock residBlock(2*n); double *resid = &residBlock; int ncv = 2*nev+1; if (ncv > n) ncv = n; MemBlock vBlock(2*n*ncv); double *v = &vBlock; int ldv = n; int iparam[11]; iparam[0] = 1; iparam[1] = 0; iparam[2] = 300; iparam[3] = 1; iparam[4] = 0; iparam[5] = 0; iparam[6] = 3; iparam[7] = 0; iparam[8] = 0; iparam[9] = 0; iparam[10] = 0; MemBlock workdBlock(2*3*n); double *workd = &workdBlock; int lworkl = 3*ncv*ncv+5*ncv; MemBlock worklBlock(2*lworkl); double *workl = &worklBlock; MemBlock rworkBlock(ncv); double *rwork = &rworkBlock; MemBlock xrBlock(rows); double *xr = &xrBlock; MemBlock xiBlock(rows); double *xi = &xiBlock; MemBlock yrBlock(rows); double *yr = &yrBlock; MemBlock yiBlock(rows); double *yi = &yiBlock; int info = 0; int ipntr[14]; while (1) { znaupd_(&ido, &bmat, &n , which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, rwork, &info); if ((ido == -1) || (ido == 1)) { for (int i=0;i selectBlock(ncv); int *select = &selectBlock; //lambda_a = 1/lambda_c + sigma double *d = (double*) Malloc(2*sizeof(double)*(nev+1)); double *z; if (nargout <= 1) z = NULL; else z = (double*) Malloc(2*sizeof(double)*(n*(nev+1))); double sigma[2]; sigma[0] = shift[0]; sigma[1] = shift[1]; MemBlock workevBlock(2*2*ncv); double *workev = &workevBlock; int ierr; zneupd_(&rvec, &howmny, select, d, z, &ldv, sigma, workev, &bmat, &n, which.c_str(), &nev, &tol, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, rwork, &ierr); int nconv = iparam[4]; if (ierr != 0) DNEUPARPACKError(ierr); // Reverse the vectors dr and di if (rvec == 0) { for (int i=0;i<(nconv)/2;i++) { swap(d[2*i],d[2*(nconv-1-i)]); swap(d[2*i+1],d[2*(nconv-1-i)+1]); } } double *eigvals = (double*) Malloc(nev*sizeof(double)*2); for (int i=0;i 1) { eigvecs = (double*) Malloc(nev*n*sizeof(double)*2); for (int i=0;i(A.getDimensionLength(0), A.getDimensionLength(1), (const double**) A.getSparseDataPointer()); if (symDetect) return SparseEigDecomposeSymmetricReal((double**) A.getSparseDataPointer(), A.getDimensionLength(0), A.getDimensionLength(1), k, nargout, whichFlag); else return SparseEigDecomposeNonsymmetricReal((double**) A.getSparseDataPointer(), A.getDimensionLength(0), A.getDimensionLength(1), k, nargout, whichFlag); } } ArrayVector SparseEigDecomposeShifted(int nargout, Array A, int k, double shift[2]) { if (shift[1] != 0) A.promoteType(FM_DCOMPLEX); if (A.isComplex()) { return SparseEigDecomposeNonsymmetricComplexShifted((double**) A.getSparseDataPointer(), A.getDimensionLength(0), A.getDimensionLength(1), k, nargout, shift); } else { return SparseEigDecomposeNonsymmetricRealShifted((double**) A.getSparseDataPointer(), A.getDimensionLength(0), A.getDimensionLength(1), k, nargout, shift[0]); } } Array SparsePowerFuncComplexComplex(Array A, Array B) { A.promoteType(FM_DCOMPLEX); B.promoteType(FM_DCOMPLEX); B.makeDense(); int rows, cols; rows = A.getDimensionLength(0); cols = A.getDimensionLength(1); const double *bval = (const double *) B.getDataPointer(); double **dp = new double*[cols]; MemBlock bufferBlock(4*rows); double *buffer = &bufferBlock; const double **ap = (const double **) A.getSparseDataPointer(); for (int p=0;p Adecoder(ap[p],rows); RLEEncoderComplex Cencoder(buffer,rows); Adecoder.update(); while (Adecoder.more()) { Cencoder.set(Adecoder.row()); double aval[2]; double cval[2]; aval[0] = Adecoder.value_real(); aval[1] = Adecoder.value_imag(); power_zz(cval,aval,bval); Cencoder.push(cval[0],cval[1]); Adecoder.advance(); } Cencoder.end(); dp[p] = Cencoder.copyout(); } return Array(FM_DCOMPLEX,Dimensions(rows,cols),dp,true); } Array SparsePowerFuncComplexInteger(Array A, Array B) { A.promoteType(FM_DCOMPLEX); B.promoteType(FM_INT32); B.makeDense(); int rows, cols; rows = A.getDimensionLength(0); cols = A.getDimensionLength(1); int32 bval = *((const int32 *) B.getDataPointer()); double **dp = new double*[cols]; MemBlock bufferBlock(4*rows); double *buffer = &bufferBlock; const double **ap = (const double **) A.getSparseDataPointer(); for (int p=0;p Adecoder(ap[p],rows); RLEEncoderComplex Cencoder(buffer,rows); Adecoder.update(); while (Adecoder.more()) { Cencoder.set(Adecoder.row()); double aval[2]; double cval[2]; aval[0] = Adecoder.value_real(); aval[1] = Adecoder.value_imag(); power_zi(cval,aval,bval); Cencoder.push(cval[0],cval[1]); Adecoder.advance(); } Cencoder.end(); dp[p] = Cencoder.copyout(); } return Array(FM_DCOMPLEX,Dimensions(rows,cols),dp,true); } Array SparsePowerFuncRealReal(Array A, Array B) { A.promoteType(FM_DOUBLE); B.promoteType(FM_DOUBLE); B.makeDense(); int rows, cols; rows = A.getDimensionLength(0); cols = A.getDimensionLength(1); double bval = *((const double *) B.getDataPointer()); double **dp = new double*[cols]; MemBlock bufferBlock(2*rows); double *buffer = &bufferBlock; const double **ap = (const double **) A.getSparseDataPointer(); for (int p=0;p Adecoder(ap[p],rows); RLEEncoder Cencoder(buffer,rows); Adecoder.update(); while (Adecoder.more()) { Cencoder.set(Adecoder.row()); Cencoder.push(power_dd(Adecoder.value(),bval)); Adecoder.advance(); } Cencoder.end(); dp[p] = Cencoder.copyout(); } return Array(FM_DOUBLE,Dimensions(rows,cols),dp,true); } Array SparsePowerFuncRealInteger(Array A, Array B) { A.promoteType(FM_DOUBLE); B.promoteType(FM_INT32); B.makeDense(); int rows, cols; rows = A.getDimensionLength(0); cols = A.getDimensionLength(1); int32 bval = *((const int32 *) B.getDataPointer()); double **dp = new double*[cols]; MemBlock bufferBlock(2*rows); double *buffer = &bufferBlock; const double **ap = (const double **) A.getSparseDataPointer(); for (int p=0;p Adecoder(ap[p],rows); RLEEncoder Cencoder(buffer,rows); Adecoder.update(); while (Adecoder.more()) { Cencoder.set(Adecoder.row()); Cencoder.push(power_di(Adecoder.value(),bval)); Adecoder.advance(); } Cencoder.end(); dp[p] = Cencoder.copyout(); } return Array(FM_DOUBLE,Dimensions(rows,cols),dp,true); } // Compute A^B where B is a scalar, A is sparse and B is a scalar // We do this the lazy way - convert both to dcomplex or complex, and // then check the result to see if its real (or integer) // // These are the four case to consider: // // power_zi // power_zz // power_di // power_dd // // Array SparsePowerFunc(Array A, Array B) { Class Aclass, Bclass; Aclass = A.dataClass(); Bclass = B.dataClass(); Array C; if (A.isReal()) if (Bclass == FM_INT32) C = SparsePowerFuncRealInteger(A,B); else C = SparsePowerFuncRealReal(A,B); else if (Bclass == FM_INT32) C = SparsePowerFuncComplexInteger(A,B); else C = SparsePowerFuncComplexComplex(A,B); C.promoteType(A.dataClass()); return C; } template bool IsPositiveSparse(int rows, int cols, const T** src) { bool allPositive = true; for (int p=0;p A(src[p],rows); A.update(); while (A.more()) { allPositive = allPositive & (A.value() >= 0); if (!allPositive) return false; A.advance(); } } return allPositive; } // Test for positive entries (technically non-negative entries) - only applicable to // real valued matrices bool SparseIsPositive(Class dclass, int rows, int cols, const void *ap) { switch(dclass) { case FM_INT32: return IsPositiveSparse(rows, cols, (const int32**) ap); case FM_FLOAT: return IsPositiveSparse(rows, cols, (const float**) ap); case FM_DOUBLE: return IsPositiveSparse(rows, cols, (const double**) ap); default: throw Exception("unsupported type for ispositive"); } } template void* SparseMatrixSumColumnsReal(int rows, int cols, const T** A) { T **out = (T**) new T*[cols]; MemBlock bufferBlock(3); T* buffer = &bufferBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { accum += Acmp.value(); Acmp.advance(); } RLEEncoder Bcmp(buffer,1); Bcmp.push(accum); Bcmp.end(); out[col] = Bcmp.copyout(); } return out; } template void* SparseMatrixSumColumnsComplex(int rows, int cols, const T** A) { T **out = (T**) new T*[cols]; MemBlock bufferBlock(5); T* buffer = &bufferBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { accum_real += Acmp.value_real(); accum_imag += Acmp.value_imag(); Acmp.advance(); } RLEEncoderComplex Bcmp(buffer,1); Bcmp.push(accum_real,accum_imag); Bcmp.end(); out[col] = Bcmp.copyout(); } return out; } template void* SparseMatrixSumRowsReal(int rows, int cols, const T** A) { T** out = (T**) new T*[1]; MemBlock bufferBlock(rows); T* buffer = &bufferBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { buffer[Acmp.row()] += Acmp.value(); Acmp.advance(); } } MemBlock cbufBlock(rows*2); T* cbuf = &cbufBlock; out[0] = CompressRealVector(cbuf, buffer, rows); return out; } template void* SparseMatrixSumRowsComplex(int rows, int cols, const T** A) { T** out = (T**) new T*[1]; MemBlock bufferBlock(2*rows); T* buffer = &bufferBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { buffer[2*Acmp.row()] += Acmp.value_real(); buffer[2*Acmp.row()+1] += Acmp.value_imag(); Acmp.advance(); } } MemBlock cbufBlock(rows*4); T* cbuf = &cbufBlock; out[0] = CompressComplexVector(cbuf, buffer, rows); return out; } void* SparseMatrixSumRows(Class dclass, int Arows, int Acols, const void *Ap) { switch(dclass) { case FM_INT32: return SparseMatrixSumRowsReal(Arows, Acols, (const int32**) Ap); case FM_FLOAT: return SparseMatrixSumRowsReal(Arows, Acols, (const float**) Ap); case FM_DOUBLE: return SparseMatrixSumRowsReal(Arows, Acols, (const double**) Ap); case FM_COMPLEX: return SparseMatrixSumRowsComplex(Arows, Acols, (const float**) Ap); case FM_DCOMPLEX: return SparseMatrixSumRowsComplex(Arows, Acols, (const double**) Ap); default: throw Exception("unexpected type in sparse matrix sum rows"); } } void* SparseMatrixSumColumns(Class dclass, int Arows, int Acols, const void *Ap) { switch(dclass) { case FM_INT32: return SparseMatrixSumColumnsReal(Arows, Acols, (const int32**) Ap); case FM_FLOAT: return SparseMatrixSumColumnsReal(Arows, Acols, (const float**) Ap); case FM_DOUBLE: return SparseMatrixSumColumnsReal(Arows, Acols, (const double**) Ap); case FM_COMPLEX: return SparseMatrixSumColumnsComplex(Arows, Acols, (const float**) Ap); case FM_DCOMPLEX: return SparseMatrixSumColumnsComplex(Arows, Acols, (const double**) Ap); default: throw Exception("unexpected type in sparse matrix sum cols"); } } template void* ReshapeSparseMatrixComplex(int rows, int cols, const T** A, int newrows, int newcols) { // Convert the sparse matrix into IJV format IJVEntry* mlist; int nnz; mlist = ConvertSparseToIJVListComplex(A,rows,cols,nnz); for (int i=0;i(mlist,nnz,newrows,newcols); } template void* ReshapeSparseMatrixReal(int rows, int cols, const T** A, int newrows, int newcols) { // Convert the sparse matrix into IJV format IJVEntry* mlist; int nnz; mlist = ConvertSparseToIJVListReal(A,rows,cols,nnz); for (int i=0;i(mlist,nnz,newrows,newcols); } void* ReshapeSparseMatrix(Class dclass, int rows, int cols, const void *Ap, int newrows, int newcols) { switch(dclass) { case FM_LOGICAL: return ReshapeSparseMatrixReal(rows,cols,(const uint32**) Ap,newrows,newcols); case FM_INT32: return ReshapeSparseMatrixReal(rows,cols,(const int32**) Ap,newrows,newcols); case FM_FLOAT: return ReshapeSparseMatrixReal(rows,cols,(const float**) Ap,newrows,newcols); case FM_DOUBLE: return ReshapeSparseMatrixReal(rows,cols,(const double**) Ap,newrows,newcols); case FM_COMPLEX: return ReshapeSparseMatrixComplex(rows,cols,(const float**) Ap,newrows,newcols); case FM_DCOMPLEX: return ReshapeSparseMatrixReal(rows,cols,(const double**) Ap,newrows,newcols); default: throw Exception("unexpected type in reshapse sparse matrix"); } } uint32* SparseLogicalToOrdinal(int rows, int cols, const void *Ap, int &nnz) { nnz = CountNonzerosReal((const uint32 **) Ap,rows,cols); uint32* retptr = (uint32*) Malloc(sizeof(uint32)*nnz); const uint32 **src = (const uint32**) Ap; int k = 0; for (int i=0;i A(src[i],rows); A.update(); while (A.more()) { retptr[k++] = A.row()+i*rows+1; A.advance(); } } return retptr; } template uint32 slo_and_real(T a, T b) { return (a && b) ? 1 : 0; } template uint32 slo_or_real(T a, T b) { return (a || b) ? 1 : 0; } template uint32 slo_lt_real(T a, T b) { return (a < b) ? 1 : 0; } template uint32 slo_gt_real(T a, T b) { return (a > b) ? 1 : 0; } template uint32 slo_ne_real(T a, T b) { return (a != b) ? 1 : 0; } template uint32 slo_le_real(T a, T b) { return (a <= b) ? 1 : 0; } template uint32 slo_ge_real(T a, T b) { return (a >= b) ? 1 : 0; } template uint32 slo_eq_real(T a, T b) { return (a == b) ? 1 : 0; } template uint32 slo_lt_complex(T ar, T ai, T br, T bi) { return (complex_abs(ar,ai) < complex_abs(br,bi)) ? 1 : 0; } template uint32 slo_gt_complex(T ar, T ai, T br, T bi) { return (complex_abs(ar,ai) > complex_abs(br,bi)) ? 1 : 0; } template uint32 slo_ne_complex(T ar, T ai, T br, T bi) { return ((ar != br) || (ai != bi)) ? 1 : 0; } template uint32 slo_le_complex(T ar, T ai, T br, T bi) { return (complex_abs(ar,ai) <= complex_abs(br,bi)) ? 1 : 0; } template uint32 slo_ge_complex(T ar, T ai, T br, T bi) { return (complex_abs(ar,ai) >= complex_abs(br,bi)) ? 1 : 0; } template uint32 slo_eq_complex(T ar, T ai, T br, T bi) { return ((ar == br) && (ai == bi)) ? 1 : 0; } template uint32** ApplyLogicalOpComplexZP(int rows, int cols, const T** Asrc, const T** Bsrc, uint32 (*fnop)(T,T,T,T)) { uint32** Cmat = new uint32*[cols]; MemBlock bufferBlock(rows*2); uint32* buffer = &bufferBlock; for (int col=0;col A(Asrc[col],rows); RLEDecoderComplex B(Bsrc[col],rows); RLEEncoder C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); C.push(fnop(A.value_real(),A.value_imag(), B.value_real(),B.value_imag())); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); C.push(fnop(A.value_real(),A.value_imag(),0,0)); A.advance(); } else { C.set(B.row()); C.push(fnop(0,0,B.value_real(),B.value_imag())); B.advance(); } } C.end(); Cmat[col] = C.copyout(); } return Cmat; } template uint32** ApplyLogicalOpRealZP(int rows, int cols, const T** Asrc, const T** Bsrc, uint32 (*fnop)(T,T)) { uint32** Cmat = new uint32*[cols]; MemBlock bufferBlock(rows*2); uint32* buffer = &bufferBlock; for (int col=0;col A(Asrc[col],rows); RLEDecoder B(Bsrc[col],rows); RLEEncoder C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); C.push(fnop(A.value(),B.value())); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); C.push(fnop(A.value(),0)); A.advance(); } else { C.set(B.row()); C.push(fnop(0,B.value())); B.advance(); } } C.end(); Cmat[col] = C.copyout(); } return Cmat; } // Non-zero preserving template uint32** ApplyLogicalOpRealNZ(int rows, int cols, const T** Asrc, const T** Bsrc, uint32 (*fnop)(T,T)) { uint32** Cmat = new uint32*[cols]; MemBlock AbufBlock(rows); T* Abuf = &AbufBlock; MemBlock BbufBlock(rows); T* Bbuf = &BbufBlock; MemBlock CbufBlock(rows); uint32* Cbuf = &CbufBlock; MemBlock bufferBlock(rows*2); uint32* buffer = &bufferBlock; for (int col=0;col(Asrc[col],Abuf,rows); DecompressRealString(Bsrc[col],Bbuf,rows); for (int row=0;row(buffer,Cbuf,rows); } return Cmat; } // Non-zero preserving template uint32** ApplyLogicalOpComplexNZ(int rows, int cols, const T** Asrc, const T** Bsrc, uint32 (*fnop)(T,T,T,T)) { uint32** Cmat = new uint32*[cols]; MemBlock AbufBlock(2*rows); T* Abuf = &AbufBlock; MemBlock BbufBlock(2*rows); T* Bbuf = &BbufBlock; MemBlock CbufBlock(rows); uint32* Cbuf = &CbufBlock; MemBlock bufferBlock(rows*2); uint32* buffer = &bufferBlock; for (int col=0;col(Asrc[col],Abuf,rows); DecompressComplexString(Bsrc[col],Bbuf,rows); for (int row=0;row(buffer,Cbuf,rows); } return Cmat; } template uint32** ApplyLogicalOpComplexScalarZP(int rows, int cols, const T** Asrc, const T* Bsrc, uint32 (*fnop)(T,T,T,T)) { uint32** Cmat = new uint32*[cols]; MemBlock bufferBlock(rows*2); uint32* buffer = &bufferBlock; for (int col=0;col A(Asrc[col],rows); RLEEncoder C(buffer,rows); memset(buffer,0,sizeof(uint32)*rows*2); A.update(); while (A.more()) { C.set(A.row()); C.push(fnop(A.value_real(),A.value_imag(), Bsrc[0],Bsrc[1])); A.advance(); } C.end(); Cmat[col] = C.copyout(); } return Cmat; } template uint32** ApplyLogicalOpRealScalarZP(int rows, int cols, const T** Asrc, const T* Bsrc, uint32 (*fnop)(T,T)) { uint32** Cmat = new uint32*[cols]; MemBlock bufferBlock(rows*2); uint32* buffer = &bufferBlock; for (int col=0;col A(Asrc[col],rows); RLEEncoder C(buffer,rows); memset(buffer,0,sizeof(uint32)*rows*2); A.update(); while (A.more()) { C.set(A.row()); C.push(fnop(A.value(),Bsrc[0])); A.advance(); } C.end(); Cmat[col] = C.copyout(); } return Cmat; } // Non-zero preserving template uint32** ApplyLogicalOpRealScalarNZ(int rows, int cols, const T** Asrc, const T* Bsrc, uint32 (*fnop)(T,T)) { uint32** Cmat = new uint32*[cols]; MemBlock AbufBlock(rows); T* Abuf = &AbufBlock; uint32* Cbuf = new uint32[rows]; MemBlock bufferBlock(rows*2); uint32* buffer = &bufferBlock; for (int col=0;col(Asrc[col],Abuf,rows); for (int row=0;row(buffer,Cbuf,rows); } return Cmat; } // Non-zero preserving template uint32** ApplyLogicalOpComplexScalarNZ(int rows, int cols, const T** Asrc, const T* Bsrc, uint32 (*fnop)(T,T,T,T)) { uint32** Cmat = new uint32*[cols]; MemBlock AbufBlock(2*rows); T* Abuf = &AbufBlock; MemBlock CbufBlock(rows); uint32* Cbuf = &CbufBlock; MemBlock bufferBlock(rows*2); uint32* buffer = &bufferBlock; for (int col=0;col(Asrc[col],Abuf,rows); for (int row=0;row(buffer,Cbuf,rows); } return Cmat; } template uint32** ApplyLogicalOpRealScalar(int rows, int cols, const T** Asrc, const T* Bsrc, uint32 (*fnop)(T,T)) { // Is fnop zero preserving? if (!fnop(0,Bsrc[0])) return ApplyLogicalOpRealScalarZP(rows,cols,Asrc,Bsrc,fnop); else return ApplyLogicalOpRealScalarNZ(rows,cols,Asrc,Bsrc,fnop); } template uint32** ApplyLogicalOpComplexScalar(int rows, int cols, const T** Asrc, const T* Bsrc, uint32 (*fnop)(T,T,T,T)) { // Is fnop zero preserving? if (!fnop(0,0,Bsrc[0],Bsrc[1])) return ApplyLogicalOpComplexScalarZP(rows,cols,Asrc,Bsrc,fnop); else return ApplyLogicalOpComplexScalarNZ(rows,cols,Asrc,Bsrc,fnop); } // Apply a logical op to two sparse matrices of the same size. // The algo to handle the operation depends on wheter or not // the op is zero preserving. For sparse-sparse interactions, // the ops stack up like this: // SLO_LT, 0 < 0 = 0 -> Zero Preserving // SLO_GT, 0 > 0 = 0 -> Zero Preserving // SLO_NE, 0 ~= 0 = 0 -> Zero Preserving // SLO_AND, 0 && 0 = 0 -> Zero Preserving // SLO_OR 0 || 0 = 0 -> Zero Preserving // SLO_LE, 0 <= 0 = 1 -> not zero preserving // SLO_GE, 0 >= 0 = 1 -> not zero preserving // SLO_EQ, 0 == 0 = 1 -> not zero preserving // SLO_NOT, ~0 = 1 -> not zero preserving // For an action that is _not_ zero preserving, we shouldn't be here - just // convert both arguments to full matrices and use the regular routines void* SparseSparseLogicalOp(Class dclass, int rows, int cols, const void *Ap, const void *Bp, SparseLogOpID opselect) { switch (dclass) { case FM_LOGICAL: switch (opselect) { case SLO_AND: return ApplyLogicalOpRealZP(rows,cols,(const uint32**)Ap,(const uint32**)Bp,slo_and_real); case SLO_OR: return ApplyLogicalOpRealZP(rows,cols,(const uint32**)Ap,(const uint32**)Bp,slo_or_real); default: throw Exception("unsupported sparse type/op combination"); } case FM_INT32: switch (opselect) { case SLO_LT: return ApplyLogicalOpRealZP(rows,cols,(const int32**)Ap,(const int32**)Bp,slo_lt_real); case SLO_GT: return ApplyLogicalOpRealZP(rows,cols,(const int32**)Ap,(const int32**)Bp,slo_gt_real); case SLO_NE: return ApplyLogicalOpRealZP(rows,cols,(const int32**)Ap,(const int32**)Bp,slo_ne_real); case SLO_LE: return ApplyLogicalOpRealNZ(rows,cols,(const int32**)Ap,(const int32**)Bp,slo_le_real); case SLO_GE: return ApplyLogicalOpRealNZ(rows,cols,(const int32**)Ap,(const int32**)Bp,slo_ge_real); case SLO_EQ: return ApplyLogicalOpRealNZ(rows,cols,(const int32**)Ap,(const int32**)Bp,slo_eq_real); default: throw Exception("unsupported sparse type/op combination"); } case FM_FLOAT: switch (opselect) { case SLO_LT: return ApplyLogicalOpRealZP(rows,cols,(const float**)Ap,(const float**)Bp,slo_lt_real); case SLO_GT: return ApplyLogicalOpRealZP(rows,cols,(const float**)Ap,(const float**)Bp,slo_gt_real); case SLO_NE: return ApplyLogicalOpRealZP(rows,cols,(const float**)Ap,(const float**)Bp,slo_ne_real); case SLO_LE: return ApplyLogicalOpRealNZ(rows,cols,(const float**)Ap,(const float**)Bp,slo_le_real); case SLO_GE: return ApplyLogicalOpRealNZ(rows,cols,(const float**)Ap,(const float**)Bp,slo_ge_real); case SLO_EQ: return ApplyLogicalOpRealNZ(rows,cols,(const float**)Ap,(const float**)Bp,slo_eq_real); default: throw Exception("unsupported sparse type/op combination"); } case FM_DOUBLE: switch (opselect) { case SLO_LT: return ApplyLogicalOpRealZP(rows,cols,(const double**)Ap,(const double**)Bp,slo_lt_real); case SLO_GT: return ApplyLogicalOpRealZP(rows,cols,(const double**)Ap,(const double**)Bp,slo_gt_real); case SLO_NE: return ApplyLogicalOpRealZP(rows,cols,(const double**)Ap,(const double**)Bp,slo_ne_real); case SLO_LE: return ApplyLogicalOpRealNZ(rows,cols,(const double**)Ap,(const double**)Bp,slo_le_real); case SLO_GE: return ApplyLogicalOpRealNZ(rows,cols,(const double**)Ap,(const double**)Bp,slo_ge_real); case SLO_EQ: return ApplyLogicalOpRealNZ(rows,cols,(const double**)Ap,(const double**)Bp,slo_eq_real); default: throw Exception("unsupported sparse type/op combination"); } case FM_COMPLEX: switch (opselect) { case SLO_LT: return ApplyLogicalOpComplexZP(rows,cols,(const float**)Ap,(const float**)Bp,slo_lt_complex); case SLO_GT: return ApplyLogicalOpComplexZP(rows,cols,(const float**)Ap,(const float**)Bp,slo_gt_complex); case SLO_NE: return ApplyLogicalOpComplexZP(rows,cols,(const float**)Ap,(const float**)Bp,slo_ne_complex); case SLO_LE: return ApplyLogicalOpComplexNZ(rows,cols,(const float**)Ap,(const float**)Bp,slo_le_complex); case SLO_GE: return ApplyLogicalOpComplexNZ(rows,cols,(const float**)Ap,(const float**)Bp,slo_ge_complex); case SLO_EQ: return ApplyLogicalOpComplexNZ(rows,cols,(const float**)Ap,(const float**)Bp,slo_eq_complex); default: throw Exception("unsupported sparse type/op combination"); } case FM_DCOMPLEX: switch (opselect) { case SLO_LT: return ApplyLogicalOpComplexZP(rows,cols,(const double**)Ap,(const double**)Bp,slo_lt_complex); case SLO_GT: return ApplyLogicalOpComplexZP(rows,cols,(const double**)Ap,(const double**)Bp,slo_gt_complex); case SLO_NE: return ApplyLogicalOpComplexZP(rows,cols,(const double**)Ap,(const double**)Bp,slo_ne_complex); case SLO_LE: return ApplyLogicalOpComplexNZ(rows,cols,(const double**)Ap,(const double**)Bp,slo_le_complex); case SLO_GE: return ApplyLogicalOpComplexNZ(rows,cols,(const double**)Ap,(const double**)Bp,slo_ge_complex); case SLO_EQ: return ApplyLogicalOpComplexNZ(rows,cols,(const double**)Ap,(const double**)Bp,slo_eq_complex); default: throw Exception("unsupported sparse type/op combination"); } default: throw Exception("unsupported sparse type/op combination"); } } void* SparseScalarLogicalOp(Class dclass, int rows, int cols, const void *Ap, const void *Bp, SparseLogOpID opselect) { switch (dclass) { case FM_LOGICAL: { uint32 Btmp = ((logical*) Bp)[0]; switch (opselect) { case SLO_AND: return ApplyLogicalOpRealScalar(rows,cols,(const uint32**)Ap,(const uint32*)&Btmp,slo_and_real); case SLO_OR: return ApplyLogicalOpRealScalar(rows,cols,(const uint32**)Ap,(const uint32*)&Btmp,slo_or_real); default: throw Exception("unsupported sparse type/op combination"); } } case FM_INT32: switch (opselect) { case SLO_LT: return ApplyLogicalOpRealScalar(rows,cols,(const int32**)Ap,(const int32*)Bp,slo_lt_real); case SLO_GT: return ApplyLogicalOpRealScalar(rows,cols,(const int32**)Ap,(const int32*)Bp,slo_gt_real); case SLO_NE: return ApplyLogicalOpRealScalar(rows,cols,(const int32**)Ap,(const int32*)Bp,slo_ne_real); case SLO_LE: return ApplyLogicalOpRealScalar(rows,cols,(const int32**)Ap,(const int32*)Bp,slo_le_real); case SLO_GE: return ApplyLogicalOpRealScalar(rows,cols,(const int32**)Ap,(const int32*)Bp,slo_ge_real); case SLO_EQ: return ApplyLogicalOpRealScalar(rows,cols,(const int32**)Ap,(const int32*)Bp,slo_eq_real); default: throw Exception("unsupported sparse type/op combination"); } case FM_FLOAT: switch (opselect) { case SLO_LT: return ApplyLogicalOpRealScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_lt_real); case SLO_GT: return ApplyLogicalOpRealScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_gt_real); case SLO_NE: return ApplyLogicalOpRealScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_ne_real); case SLO_LE: return ApplyLogicalOpRealScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_le_real); case SLO_GE: return ApplyLogicalOpRealScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_ge_real); case SLO_EQ: return ApplyLogicalOpRealScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_eq_real); default: throw Exception("unsupported sparse type/op combination"); } case FM_DOUBLE: switch (opselect) { case SLO_LT: return ApplyLogicalOpRealScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_lt_real); case SLO_GT: return ApplyLogicalOpRealScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_gt_real); case SLO_NE: return ApplyLogicalOpRealScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_ne_real); case SLO_LE: return ApplyLogicalOpRealScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_le_real); case SLO_GE: return ApplyLogicalOpRealScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_ge_real); case SLO_EQ: return ApplyLogicalOpRealScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_eq_real); default: throw Exception("unsupported sparse type/op combination"); } case FM_COMPLEX: switch (opselect) { case SLO_LT: return ApplyLogicalOpComplexScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_lt_complex); case SLO_GT: return ApplyLogicalOpComplexScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_gt_complex); case SLO_NE: return ApplyLogicalOpComplexScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_ne_complex); case SLO_LE: return ApplyLogicalOpComplexScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_le_complex); case SLO_GE: return ApplyLogicalOpComplexScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_ge_complex); case SLO_EQ: return ApplyLogicalOpComplexScalar(rows,cols,(const float**)Ap,(const float*)Bp,slo_eq_complex); default: throw Exception("unsupported sparse type/op combination"); } case FM_DCOMPLEX: switch (opselect) { case SLO_LT: return ApplyLogicalOpComplexScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_lt_complex); case SLO_GT: return ApplyLogicalOpComplexScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_gt_complex); case SLO_NE: return ApplyLogicalOpComplexScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_ne_complex); case SLO_LE: return ApplyLogicalOpComplexScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_le_complex); case SLO_GE: return ApplyLogicalOpComplexScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_ge_complex); case SLO_EQ: return ApplyLogicalOpComplexScalar(rows,cols,(const double**)Ap,(const double*)Bp,slo_eq_complex); default: throw Exception("unsupported sparse type/op combination"); } default: throw Exception("unsupported sparse type/op combination"); } } template T** SparseAbsFunctionReal(int rows, int cols, const T**src) { T** dp; dp = new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = &bufferBlock; for (int p=0;p B(buffer,rows); RLEDecoder A(src[p],rows); A.update(); while (A.more()) { B.set(A.row()); B.push((T)(fabs((double)A.value()))); A.advance(); } B.end(); dp[p] = B.copyout(); } return dp; } template T** SparseAbsFunctionComplex(int rows, int cols, const T**src) { T** dp; dp = new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = &bufferBlock; for (int p=0;p B(buffer,rows); RLEDecoderComplex A(src[p],rows); A.update(); while (A.more()) { B.set(A.row()); B.push(complex_abs(A.value_real(),A.value_imag())); A.advance(); } B.end(); dp[p] = B.copyout(); } return dp; } void* SparseAbsFunction(Class dclass, int rows, int cols, const void *Ap) { switch(dclass) { case FM_INT32: return SparseAbsFunctionReal(rows,cols,(const int32**)Ap); case FM_FLOAT: return SparseAbsFunctionReal(rows,cols,(const float**)Ap); case FM_DOUBLE: return SparseAbsFunctionReal(rows,cols,(const double**)Ap); case FM_COMPLEX: return SparseAbsFunctionComplex(rows,cols,(const float**)Ap); case FM_DCOMPLEX: return SparseAbsFunctionComplex(rows,cols,(const double**)Ap); default: throw Exception("unsupported sparse matrix type in argument to abs"); } } template void* SparseMatrixMaxColumnsReal(int rows, int cols, const T** A) { T **out = (T**) new T*[cols]; MemBlock bufferBlock(3); T* buffer = &bufferBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { if (!IsNaN(Acmp.value())) { if (max_inited) maxval = (Acmp.value() > maxval) ? Acmp.value() : maxval; else { maxval = Acmp.value(); max_inited = true; } } Acmp.advance(); } RLEEncoder Bcmp(buffer,1); if (!max_inited) Bcmp.push(0); else Bcmp.push(maxval); Bcmp.end(); out[col] = Bcmp.copyout(); } return out; } template void* SparseMatrixMaxColumnsComplex(int rows, int cols, const T** A) { T **out = (T**) new T*[cols]; MemBlock bufferBlock(5); T* buffer = &bufferBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { if (!IsNaN(Acmp.value_real()) && !IsNaN(Acmp.value_imag())) { T tstval = complex_abs(Acmp.value_real(),Acmp.value_imag()); if (max_inited) { if (tstval > maxval_abs) { maxval_abs = tstval; maxval_real = Acmp.value_real(); maxval_imag = Acmp.value_imag(); } } else { max_inited = true; maxval_abs = tstval; maxval_real = Acmp.value_real(); maxval_imag = Acmp.value_imag(); } } Acmp.advance(); } RLEEncoderComplex Bcmp(buffer,1); Bcmp.push(maxval_real,maxval_imag); Bcmp.end(); out[col] = Bcmp.copyout(); } return out; } template void* SparseMatrixMaxRowsReal(int rows, int cols, const T** A) { T** out = (T**) new T*[1]; MemBlock bufferBlock(rows); T* buffer = &bufferBlock; MemBlock initBlock(rows); bool* inited = &initBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { if (!IsNaN(Acmp.value())) { if (inited[Acmp.row()]) { buffer[Acmp.row()] = (buffer[Acmp.row()] > Acmp.value()) ? buffer[Acmp.row()] : Acmp.value(); } else { buffer[Acmp.row()] = Acmp.value(); inited[Acmp.row()] = true; } } Acmp.advance(); } } MemBlock cbufBlock(rows*2); T* cbuf = &cbufBlock; out[0] = CompressRealVector(cbuf, buffer, rows); return out; } template void* SparseMatrixMaxRowsComplex(int rows, int cols, const T** A) { T** out = (T**) new T*[1]; MemBlock bufferBlock(2*rows); T* buffer = &bufferBlock; MemBlock bufferBlockMag(2*rows); T* bufferMag = &bufferBlockMag; MemBlock initBlock(rows); bool *inited = &initBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { if (!IsNaN(Acmp.value_real()) && !IsNaN(Acmp.value_imag())) { T tstval = complex_abs(Acmp.value_real(),Acmp.value_imag()); if (inited[Acmp.row()]) { if (tstval > bufferMag[Acmp.row()]) { bufferMag[Acmp.row()] = tstval; buffer[Acmp.row()*2] = Acmp.value_real(); buffer[Acmp.row()*2+1] = Acmp.value_imag(); } } else { inited[Acmp.row()] = true; bufferMag[Acmp.row()] = tstval; buffer[Acmp.row()*2] = Acmp.value_real(); buffer[Acmp.row()*2+1] = Acmp.value_imag(); } } Acmp.advance(); } } MemBlock cbufBlock(rows*4); T* cbuf = &cbufBlock; out[0] = CompressComplexVector(cbuf, buffer, rows); return out; } void* SparseMatrixMaxRows(Class dclass, int Arows, int Acols, const void *Ap) { switch(dclass) { case FM_INT32: return SparseMatrixMaxRowsReal(Arows, Acols, (const int32**) Ap); case FM_FLOAT: return SparseMatrixMaxRowsReal(Arows, Acols, (const float**) Ap); case FM_DOUBLE: return SparseMatrixMaxRowsReal(Arows, Acols, (const double**) Ap); case FM_COMPLEX: return SparseMatrixMaxRowsComplex(Arows, Acols, (const float**) Ap); case FM_DCOMPLEX: return SparseMatrixMaxRowsComplex(Arows, Acols, (const double**) Ap); default: throw Exception("unexpected type in sparse matrix max rows"); } } void* SparseMatrixMaxColumns(Class dclass, int Arows, int Acols, const void *Ap) { switch(dclass) { case FM_INT32: return SparseMatrixMaxColumnsReal(Arows, Acols, (const int32**) Ap); case FM_FLOAT: return SparseMatrixMaxColumnsReal(Arows, Acols, (const float**) Ap); case FM_DOUBLE: return SparseMatrixMaxColumnsReal(Arows, Acols, (const double**) Ap); case FM_COMPLEX: return SparseMatrixMaxColumnsComplex(Arows, Acols, (const float**) Ap); case FM_DCOMPLEX: return SparseMatrixMaxColumnsComplex(Arows, Acols, (const double**) Ap); default: throw Exception("unexpected type in sparse matrix max cols"); } } template void* SparseMatrixMinColumnsReal(int rows, int cols, const T** A) { T **out = (T**) new T*[cols]; MemBlock bufferBlock(3); T* buffer = &bufferBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { if (!IsNaN(Acmp.value())) { if (min_inited) minval = (Acmp.value() < minval) ? Acmp.value() : minval; else { minval = Acmp.value(); min_inited = true; } } Acmp.advance(); } RLEEncoder Bcmp(buffer,1); if (!min_inited) Bcmp.push(0); else Bcmp.push(minval); Bcmp.end(); out[col] = Bcmp.copyout(); } return out; } template void* SparseMatrixMinColumnsComplex(int rows, int cols, const T** A) { T **out = (T**) new T*[cols]; MemBlock bufferBlock(5); T* buffer = &bufferBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { if (!IsNaN(Acmp.value_real()) && !IsNaN(Acmp.value_imag())) { T tstval = complex_abs(Acmp.value_real(),Acmp.value_imag()); if (min_inited) { if (tstval < minval_abs) { minval_abs = tstval; minval_real = Acmp.value_real(); minval_imag = Acmp.value_imag(); } } else { min_inited = true; minval_abs = tstval; minval_real = Acmp.value_real(); minval_imag = Acmp.value_imag(); } } Acmp.advance(); } RLEEncoderComplex Bcmp(buffer,1); Bcmp.push(minval_real,minval_imag); Bcmp.end(); out[col] = Bcmp.copyout(); } return out; } template void* SparseMatrixMinRowsReal(int rows, int cols, const T** A) { T** out = (T**) new T*[1]; MemBlock bufferBlock(rows); T* buffer = &bufferBlock; MemBlock initBlock(rows); bool* inited = &initBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { if (!IsNaN(Acmp.value())) { if (inited[Acmp.row()]) { buffer[Acmp.row()] = (buffer[Acmp.row()] < Acmp.value()) ? buffer[Acmp.row()] : Acmp.value(); } else { buffer[Acmp.row()] = Acmp.value(); inited[Acmp.row()] = true; } } Acmp.advance(); } } MemBlock cbufBlock(rows*2); T* cbuf = &cbufBlock; out[0] = CompressRealVector(cbuf, buffer, rows); return out; } template void* SparseMatrixMinRowsComplex(int rows, int cols, const T** A) { T** out = (T**) new T*[1]; MemBlock bufferBlock(2*rows); T* buffer = &bufferBlock; MemBlock bufferBlockMag(2*rows); T* bufferMag = &bufferBlockMag; MemBlock initBlock(rows); bool *inited = &initBlock; for (int col=0;col Acmp(A[col], rows); Acmp.update(); while (Acmp.more()) { if (!IsNaN(Acmp.value_real()) && !IsNaN(Acmp.value_imag())) { T tstval = complex_abs(Acmp.value_real(),Acmp.value_imag()); if (inited[Acmp.row()]) { if (tstval < bufferMag[Acmp.row()]) { bufferMag[Acmp.row()] = tstval; buffer[Acmp.row()*2] = Acmp.value_real(); buffer[Acmp.row()*2+1] = Acmp.value_imag(); } } else { inited[Acmp.row()] = true; bufferMag[Acmp.row()] = tstval; buffer[Acmp.row()*2] = Acmp.value_real(); buffer[Acmp.row()*2+1] = Acmp.value_imag(); } } Acmp.advance(); } } MemBlock cbufBlock(rows*4); T* cbuf = &cbufBlock; out[0] = CompressComplexVector(cbuf, buffer, rows); return out; } void* SparseMatrixMinRows(Class dclass, int Arows, int Acols, const void *Ap) { switch(dclass) { case FM_INT32: return SparseMatrixMinRowsReal(Arows, Acols, (const int32**) Ap); case FM_FLOAT: return SparseMatrixMinRowsReal(Arows, Acols, (const float**) Ap); case FM_DOUBLE: return SparseMatrixMinRowsReal(Arows, Acols, (const double**) Ap); case FM_COMPLEX: return SparseMatrixMinRowsComplex(Arows, Acols, (const float**) Ap); case FM_DCOMPLEX: return SparseMatrixMinRowsComplex(Arows, Acols, (const double**) Ap); default: throw Exception("unexpected type in sparse matrix min rows"); } } void* SparseMatrixMinColumns(Class dclass, int Arows, int Acols, const void *Ap) { switch(dclass) { case FM_INT32: return SparseMatrixMinColumnsReal(Arows, Acols, (const int32**) Ap); case FM_FLOAT: return SparseMatrixMinColumnsReal(Arows, Acols, (const float**) Ap); case FM_DOUBLE: return SparseMatrixMinColumnsReal(Arows, Acols, (const double**) Ap); case FM_COMPLEX: return SparseMatrixMinColumnsComplex(Arows, Acols, (const float**) Ap); case FM_DCOMPLEX: return SparseMatrixMinColumnsComplex(Arows, Acols, (const double**) Ap); default: throw Exception("unexpected type in sparse matrix min cols"); } } template void* SparseGreaterThanReal(int rows, int cols, const T** Amat, const T** Bmat) { T** Cmat = (T**) new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = &bufferBlock; for (int i=0;i A(Amat[i],rows); RLEDecoder B(Bmat[i],rows); RLEEncoder C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); if (A.value() > B.value()) C.push(A.value()); else C.push(B.value()); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); if (A.value() > 0) C.push(A.value()); A.advance(); } else { C.set(B.row()); if (B.value() > 0) C.push(B.value()); B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template void* SparseGreaterThanComplex(int rows, int cols, const T** Amat, const T** Bmat) { T** Cmat = (T**) new T*[cols]; MemBlock bufferBlock(rows*4); T* buffer = &bufferBlock; for (int i=0;i A(Amat[i],rows); RLEDecoderComplex B(Bmat[i],rows); RLEEncoderComplex C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); if (complex_abs(A.value_real(),A.value_imag()) > complex_abs(B.value_real(),B.value_imag())) C.push(A.value_real(),A.value_imag()); else C.push(B.value_real(),B.value_imag()); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); C.push(A.value_real(),A.value_imag()); A.advance(); } else { C.set(B.row()); C.push(B.value_real(),B.value_imag()); B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template void* SparseLessThanReal(int rows, int cols, const T** Amat, const T** Bmat) { T** Cmat = (T**) new T*[cols]; MemBlock bufferBlock(rows*2); T* buffer = &bufferBlock; for (int i=0;i A(Amat[i],rows); RLEDecoder B(Bmat[i],rows); RLEEncoder C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); if (A.value() < B.value()) C.push(A.value()); else C.push(B.value()); A.advance(); B.advance(); } else if (A.row() < B.row()) { C.set(A.row()); if (A.value() < 0) C.push(A.value()); A.advance(); } else { C.set(B.row()); if (B.value() < 0) C.push(B.value()); B.advance(); } } C.end(); Cmat[i] = C.copyout(); } return Cmat; } template void* SparseLessThanComplex(int rows, int cols, const T** Amat, const T** Bmat) { T** Cmat = (T**) new T*[cols]; MemBlock bufferBlock(rows*4); T* buffer = &bufferBlock; for (int i=0;i A(Amat[i],rows); RLEDecoderComplex B(Bmat[i],rows); RLEEncoderComplex C(buffer,rows); A.update(); B.update(); while (A.more() || B.more()) { if (A.row() == B.row()) { C.set(A.row()); if (complex_abs(A.value_real(),A.value_imag()) < complex_abs(B.value_real(),B.value_imag())) C.push(A.value_real(),A.value_imag()); else C.push(B.value_real(),B.value_imag()); A.advance(); B.advance(); } else if (A.row() < B.row()) A.advance(); else B.advance(); } C.end(); Cmat[i] = C.copyout(); } return Cmat; } void* SparseGreaterThan(Class dclass, int rows, int cols, const void *Ap, const void *Bp) { switch(dclass) { case FM_INT32: return SparseGreaterThanReal(rows,cols,(const int32**)Ap,(const int32**)Bp); case FM_FLOAT: return SparseGreaterThanReal(rows,cols,(const float**)Ap,(const float**)Bp); case FM_DOUBLE: return SparseGreaterThanReal(rows,cols,(const double**)Ap,(const double**)Bp); case FM_COMPLEX: return SparseGreaterThanComplex(rows,cols,(const float**)Ap,(const float**)Bp); case FM_DCOMPLEX: return SparseGreaterThanComplex(rows,cols,(const double**)Ap,(const double**)Bp); default: throw Exception("unexpected type in sparse matrix max"); } } void* SparseLessThan(Class dclass, int rows, int cols, const void *Ap, const void *Bp) { switch(dclass) { case FM_INT32: return SparseLessThanReal(rows,cols,(const int32**)Ap,(const int32**)Bp); case FM_FLOAT: return SparseLessThanReal(rows,cols,(const float**)Ap,(const float**)Bp); case FM_DOUBLE: return SparseLessThanReal(rows,cols,(const double**)Ap,(const double**)Bp); case FM_COMPLEX: return SparseLessThanComplex(rows,cols,(const float**)Ap,(const float**)Bp); case FM_DCOMPLEX: return SparseLessThanComplex(rows,cols,(const double**)Ap,(const double**)Bp); default: throw Exception("unexpected type in sparse matrix min"); } }