/* * -- SuperLU MT routine (version 1.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, * and Lawrence Berkeley National Lab. * August 15, 1997 * */ #include #include "pdsp_defs.h" void dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, double *nzval, int *rowind, int *colptr, Stype_t stype, Dtype_t dtype, Mtype_t mtype) { NCformat *Astore; A->Stype = stype; A->Dtype = dtype; A->Mtype = mtype; A->nrow = m; A->ncol = n; A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) ); Astore = (NCformat *) A->Store; Astore->nnz = nnz; Astore->nzval = nzval; Astore->rowind = rowind; Astore->colptr = colptr; } void dCreate_CompCol_Permuted(SuperMatrix *A, int m, int n, int nnz, double *nzval, int *rowind, int *colbeg, int *colend, Stype_t stype, Dtype_t dtype, Mtype_t mtype) { NCPformat *Astore; A->Stype = stype; A->Dtype = dtype; A->Mtype = mtype; A->nrow = m; A->ncol = n; A->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) ); Astore = (NCPformat *) A->Store; Astore->nnz = nnz; Astore->nzval = nzval; Astore->rowind = rowind; Astore->colbeg = colbeg; Astore->colend = colend; } /* Copy matrix A into matrix B. */ void dCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) { NCformat *Astore, *Bstore; int ncol, nnz, i; B->Stype = A->Stype; B->Dtype = A->Dtype; B->Mtype = A->Mtype; B->nrow = A->nrow;; B->ncol = ncol = A->ncol; Astore = (NCformat *) A->Store; Bstore = (NCformat *) B->Store; Bstore->nnz = nnz = Astore->nnz; for (i = 0; i < nnz; ++i) ((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i]; for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i]; for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i]; } int dPrint_CompCol_Matrix(SuperMatrix *A) { NCformat *Astore; register int i; double *dp; printf("\nCompCol matrix: "); printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); Astore = (NCformat *) A->Store; dp = (double *) Astore->nzval; printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz); printf("\nnzval: "); for (i = 0; i < Astore->nnz; ++i) printf("%f ", dp[i]); printf("\nrowind: "); for (i = 0; i < Astore->nnz; ++i) printf("%d ", Astore->rowind[i]); printf("\ncolptr: "); for (i = 0; i <= A->ncol; ++i) printf("%d ", Astore->colptr[i]); printf("\nend CompCol matrix.\n"); return 0; } int dPrint_Dense_Matrix(SuperMatrix *A) { DNformat *Astore; register int i; double *dp; printf("\nDense matrix: "); printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); Astore = (DNformat *) A->Store; dp = (double *) Astore->nzval; printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); printf("\nnzval: "); for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]); printf("\nend Dense matrix.\n"); return 0; } void dCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx, Stype_t stype, Dtype_t dtype, Mtype_t mtype) { DNformat *Xstore; X->Stype = stype; X->Dtype = dtype; X->Mtype = mtype; X->nrow = m; X->ncol = n; X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); Xstore = (DNformat *) X->Store; Xstore->lda = ldx; Xstore->nzval = (double *) x; } void dCopy_Dense_Matrix(int M, int N, double *X, int ldx, double *Y, int ldy) { /* * * Purpose * ======= * * Copies a two-dimensional matrix X to another matrix Y. */ int i, j; for (j = 0; j < N; ++j) for (i = 0; i < M; ++i) Y[i + j*ldy] = X[i + j*ldx]; } void dCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, double *nzval, int *nzval_colptr, int *rowind, int *rowind_colptr, int *col_to_sup, int *sup_to_col, Stype_t stype, Dtype_t dtype, Mtype_t mtype) { SCformat *Lstore; L->Stype = stype; L->Dtype = dtype; L->Mtype = mtype; L->nrow = m; L->ncol = n; L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ); Lstore = L->Store; Lstore->nnz = nnz; Lstore->nsuper = col_to_sup[n]; Lstore->nzval = nzval; Lstore->nzval_colptr = nzval_colptr; Lstore->rowind = rowind; Lstore->rowind_colptr = rowind_colptr; Lstore->col_to_sup = col_to_sup; Lstore->sup_to_col = sup_to_col; } void dCreate_SuperNode_Permuted(SuperMatrix *L, int m, int n, int nnz, double *nzval, int *nzval_colbeg, int *nzval_colend, int *rowind, int *rowind_colbeg, int *rowind_colend, int *col_to_sup, int *sup_to_colbeg, int *sup_to_colend, Stype_t stype, Dtype_t dtype, Mtype_t mtype) { SCPformat *Lstore; L->Stype = stype; L->Dtype = dtype; L->Mtype = mtype; L->nrow = m; L->ncol = n; L->Store = (void *) SUPERLU_MALLOC( sizeof(SCPformat) ); Lstore = L->Store; Lstore->nnz = nnz; Lstore->nsuper = col_to_sup[n]; Lstore->nzval = nzval; Lstore->nzval_colbeg = nzval_colbeg; Lstore->nzval_colend = nzval_colend; Lstore->rowind = rowind; Lstore->rowind_colbeg = rowind_colbeg; Lstore->rowind_colend = rowind_colend; Lstore->col_to_sup = col_to_sup; Lstore->sup_to_colbeg = sup_to_colbeg; Lstore->sup_to_colend = sup_to_colend; } /* * Diagnostic print of column "jcol" in the U/L factor. */ void dprint_lu_col(int pnum, char *msg, int pcol, int jcol, int w, int pivrow, int *xprune, GlobalLU_t *Glu) { int i, k, fsupc; int *xsup, *supno; int *xlsub, *xlsub_end, *lsub; double *lusup; int *xlusup, *xlusup_end; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; lusup = Glu->lusup; xlusup = Glu->xlusup; xlusup_end = Glu->xlusup_end; printf("(%d)%s fstcol %d,col %d,w %d: pivrow %d, supno %d, xprune %d\n", pnum, msg, pcol, jcol, w, pivrow, supno[jcol], xprune[jcol]); printf("(%d)\tU-col: xusub %d - %d\n", pnum, Glu->xusub[jcol], Glu->xusub_end[jcol]); for (i = Glu->xusub[jcol]; i < Glu->xusub_end[jcol]; i++) printf("(%d)\t%d\t%8e\n", pnum, Glu->usub[i], Glu->ucol[i]); fsupc = xsup[supno[jcol]]; k = xlusup[jcol]; printf("(%d)\tL-col in s-node: xlsub %d - %d, xlusup %d - %d\n", pnum, xlsub[fsupc],xlsub_end[fsupc],xlusup[jcol],xlusup_end[jcol]); for (i = xlsub[fsupc]; i < xlsub_end[fsupc]; ++i) printf("(%d)\t%d\t%.8e\n", pnum, lsub[i], lusup[k++]); fflush(stdout); } /* * Check whether vec[*] == 0. For the two vectors dense[*] and tempv[*], * this invariant should be mantained before and after calling some * numeric update routines, such as "panel_bmod" and "column_bmod". */ void dcheck_zero_vec(int pnum, char *msg, int n, double *vec) { register int i, nonzero; nonzero = FALSE; for (i = 0; i < n; ++i) { if (vec[i] != 0.0) { printf("(%d) vec[%d] = %.10e; should be zero!\n", pnum, i, vec[i]); nonzero = TRUE; } } if ( nonzero ) { printf("(%d) %s\n", pnum, msg); ABORT("Not a zero vector."); } } void dGenXtrue(int n, int nrhs, double *x, int ldx) { int i, j; for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/ } /* * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's */ void dFillRHS(trans_t trans, int nrhs, double *x, int ldx, SuperMatrix *A, SuperMatrix *B) { NCformat *Astore; double *Aval; DNformat *Bstore; double *rhs; int ldc; char trans_c[1]; Astore = A->Store; Aval = (double *) Astore->nzval; Bstore = B->Store; rhs = Bstore->nzval; ldc = Bstore->lda; if ( trans == NOTRANS ) *trans_c = 'N'; else *trans_c = 'T'; sp_dgemm(trans_c, A->nrow, nrhs, A->ncol, 1.0, A, x, ldx, 0.0, rhs, ldc); } /* * Fills a double precision array with a given value. */ void dfill(double *a, int alen, double dval) { register int i; for (i = 0; i < alen; i++) a[i] = dval; } /* * Check the inf-norm of the error vector */ void dinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue) { DNformat *Xstore; double err, xnorm; double *Xmat, *soln_work; int i, j; Xstore = X->Store; Xmat = Xstore->nzval; for (j = 0; j < nrhs; j++) { soln_work = &Xmat[j*Xstore->lda]; err = xnorm = 0.0; for (i = 0; i < X->nrow; i++) { err = MAX(err, fabs(soln_work[i] - xtrue[i])); xnorm = MAX(xnorm, fabs(soln_work[i])); } err = err / xnorm; printf("||X - Xtrue||/||X|| = %e\n", err); } } /* Print performance of the code. */ void dPrintPerf(SuperMatrix *L, SuperMatrix *U, superlu_memusage_t *superlu_memusage, double rpg, double rcond, double *ferr, double *berr, char *equed, Gstat_t *Gstat) { SCPformat *Lstore; NCPformat *Ustore; double *utime; flops_t *ops; utime = Gstat->utime; ops = Gstat->ops; if ( utime[FACT] != 0. ) printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT], ops[FACT]*1e-6/utime[FACT]); printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]); if ( utime[SOLVE] != 0. ) printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE]); Lstore = (SCPformat *) L->Store; Ustore = (NCPformat *) U->Store; printf("\t#NZ in factor L = %d\n", Lstore->nnz); printf("\t#NZ in factor U = %d\n", Ustore->nnz); printf("\t#NZ in L+U = %d\n", Lstore->nnz + Ustore->nnz - L->ncol); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", superlu_memusage->for_lu/1e6, superlu_memusage->total_needed/1e6, superlu_memusage->expansions); printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", utime[FACT], ops[FACT]*1e-6/utime[FACT], utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE], utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]); printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n"); printf("NUM:\t%e\t%e\t%e\t%e\t%s\n", rpg, rcond, ferr[0], berr[0], equed); #if 0 printf("\tTRSV (total%%)\tGEMV (total%%)\tfloat_time%%\tmax_n\tmax_m\tmin_n\tmin_m\tavg_n\tavg_m\n"); printf("BLAS:\t%.0f %.2f\t%.0f %.2f\t%.2f\t\t%d\t%d\t%d\t%d\t%.0f\t%.0f\n", ops[TRSV], ops[TRSV]/ops[FACT], ops[GEMV], ops[GEMV]/ops[FACT], utime[FLOAT]/utime[FACT], max_blas_n, max_gemv_m, min_blas_n, min_gemv_m, (float)sum_blas_n/num_blas, (float)sum_gemv_m/num_blas); printf("\tRCOND\tREFINE\tFERR\n"); printf("SOLVES:\t%d\t%d\t%d\n", no_solves[RCOND], no_solves[REFINE], no_solves[FERR]); flops_dist_for_matlab(); #endif } int print_double_vec(char *what, int n, int *ind, double *vec) { int i; printf("%s: n %d\n", what, n); for (i = 0; i < n; ++i) printf("%d\t%f\n", ind[i], vec[i]); return 0; }