#include "BSprivate.h"
/*@ BStri_mult - Multiply the matrix (A - shift*B) by a block of vectors
Input Parameters:
. A - a sparse matrix
. comm_A - the communication structure for A
. B - a sparse matrix
. comm_B - the communication structure for B
. v1 - the block of vectors to multiply by
. t1 - a block of work vectors
. t2 - a block of work vectors
. shift - the shift value in (A-shift*B)
. BS - the number of vectors in v1
. procinfo - the usual processor stuff
Output Parameters:
. v2 - the resulting block of vectors
Returns:
void
Notes:
Different code is used to multiply a single vector than is used
to multiply a block of vectors (this improves efficiency). Also
different code is used if shift=0.0. If B is NULL, then we
assume that it is the identity matrix.
@*/
void BStri_mult(BSpar_mat *A, BScomm *comm_A, BSpar_mat *B, BScomm *comm_B,
FLOAT *v1, FLOAT *v2, FLOAT *t1, FLOAT *t2, FLOAT shift, int BS,
BSprocinfo *procinfo)
{
int i, j, n;
FLOAT *t_t1, *t_t2, *t_v1, *t_v2;
if ((B == NULL) || (shift == 0.0)) {
if (BS == 1) {
MLOG_ELM(procinfo->procset);
if (procinfo->single) {
BSforward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
} else {
BSforward(A,v1,v2,comm_A,procinfo); CHKERR(0);
}
MLOG_ACC(MM_FORWARD);
MLOG_ELM(procinfo->procset);
if (procinfo->single) {
BSbackward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
} else {
BSbackward(A,v1,v2,comm_A,procinfo); CHKERR(0);
}
MLOG_ACC(MM_BACKWARD);
if (shift != 0.0) {
n = A->num_rows;
for (i=0;i<n;i++) {
v2[i] -= shift*(v1[i]/(fabs(A->scale_diag[i])));
}
}
} else {
MLOG_ELM(procinfo->procset);
BSb_forward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
MLOG_ACC(BMM_FORWARD);
MLOG_ELM(procinfo->procset);
BSb_backward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
MLOG_ACC(BMM_BACKWARD);
if (shift != 0.0) {
n = A->num_rows;
for (i=0;i<BS;i++) {
t_v2 = &(v2[i*n]);
/* is tv_1 this correct? */
t_v1 = &(v1[i*n]);
for (j=0;j<n;j++) {
t_v2[j] -= shift*(t_v1[j]/A->scale_diag[j]);
}
}
}
}
} else {
if (BS == 1) {
n = A->num_rows;
MLOG_ELM(procinfo->procset);
if (procinfo->single) {
BSforward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
} else {
BSforward(A,v1,v2,comm_A,procinfo); CHKERR(0);
}
MLOG_ACC(MM_FORWARD);
MLOG_ELM(procinfo->procset);
if (procinfo->single) {
BSbackward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
} else {
BSbackward(A,v1,v2,comm_A,procinfo); CHKERR(0);
}
MLOG_ACC(MM_BACKWARD);
if(A->icc_storage) {
for (i=0;i<n;i++) {
t1[i] = v1[i]/sqrt(A->scale_diag[i]);
}
} else {
for (i=0;i<n;i++) {
t1[i] = v1[i]/sqrt(fabs(A->scale_diag[i]));
}
}
BSiperm_dvec(t1,t2,A->perm); CHKERR(0);
BSperm_dvec(t2,t1,B->perm); CHKERR(0);
MLOG_ELM(procinfo->procset);
if (procinfo->single) {
BSforward1(B,t1,t2,comm_B,procinfo); CHKERR(0);
} else {
BSforward(B,t1,t2,comm_B,procinfo); CHKERR(0);
}
MLOG_ACC(OMM_FORWARD);
MLOG_ELM(procinfo->procset);
if (procinfo->single) {
BSbackward1(B,t1,t2,comm_B,procinfo); CHKERR(0);
} else {
BSbackward(B,t1,t2,comm_B,procinfo); CHKERR(0);
}
MLOG_ACC(OMM_BACKWARD);
BSiperm_dvec(t2,t1,B->perm); CHKERR(0);
BSperm_dvec(t1,t2,A->perm); CHKERR(0);
if(A->icc_storage) {
for (i=0;i<n;i++) {
v2[i] -= shift*(t2[i]/sqrt(A->scale_diag[i]));
}
} else {
for (i=0;i<n;i++) {
v2[i] -= shift*(t2[i]/sqrt(fabs(A->scale_diag[i])));
}
}
} else {
n = A->num_rows;
MLOG_ELM(procinfo->procset);
BSb_forward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
MLOG_ACC(BMM_FORWARD);
MLOG_ELM(procinfo->procset);
BSb_backward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
MLOG_ACC(BMM_BACKWARD);
if(A->icc_storage) {
for (i=0;i<BS;i++) {
t_t1 = &(t1[i*n]);
t_v1 = &(v1[i*n]);
for (j=0;j<n;j++) {
t_t1[j] = t_v1[j]/sqrt(A->scale_diag[j]);
}
}
} else {
for (i=0;i<BS;i++) {
t_t1 = &(t1[i*n]);
t_v1 = &(v1[i*n]);
for (j=0;j<n;j++) {
t_t1[j] = t_v1[j]/sqrt(fabs(A->scale_diag[j]));
}
}
}
for (i=0;i<BS;i++) {
t_t1 = &(t1[i*n]);
t_t2 = &(t2[i*n]);
BSiperm_dvec(t_t1,t_t2,A->perm); CHKERR(0);
BSperm_dvec(t_t2,t_t1,B->perm); CHKERR(0);
}
MLOG_ELM(procinfo->procset);
BSb_forward(B,t1,t2,comm_B,BS,procinfo); CHKERR(0);
MLOG_ACC(OBMM_FORWARD);
MLOG_ELM(procinfo->procset);
BSb_backward(B,t1,t2,comm_B,BS,procinfo); CHKERR(0);
MLOG_ACC(OBMM_BACKWARD);
for (i=0;i<BS;i++) {
t_t1 = &(t1[i*n]);
t_t2 = &(t2[i*n]);
BSiperm_dvec(t_t2,t_t1,B->perm); CHKERR(0);
BSperm_dvec(t_t1,t_t2,A->perm); CHKERR(0);
}
if(A->icc_storage) {
for (i=0;i<BS;i++) {
t_t2 = &(t2[i*n]);
t_v2 = &(v2[i*n]);
for (j=0;j<n;j++) {
t_v2[j] -= shift*(t_t2[j]/sqrt(A->scale_diag[j]));
}
}
} else {
for (i=0;i<BS;i++) {
t_t2 = &(t2[i*n]);
t_v2 = &(v2[i*n]);
for (j=0;j<n;j++) {
t_v2[j] -= shift*(t_t2[j]/sqrt(fabs(A->scale_diag[j])));
}
}
}
}
}
}
syntax highlighted by Code2HTML, v. 0.9.1