/*@ BSmy_blas.h - This file includes the macros for inline versions
of the special cases of GEMV and TRMV that BlockSolve95 uses.
System Description:
These macros can double the performance of a BlockSolve95
iteration. The problem is that the vendor supplied BLAS
include quite a bit of error checking and overloading
which seriously degrades their performance for small
block sizes.
The level of loop unrolling can be specified with
DGEMV_UNROLL_LVL (the default is 9, which is the maximum).
MY_BLAS_DTRMV_ON and and MY_BLAS_DGEMV_ON turn on the
respective inline macros in the appropriate BlockSolve95
routines.
@*/
#define MY_BLAS_DTRMV_ON
#define MY_BLAS_DGEMV_ON
#define DGEMV_UNROLL_LVL 9
#ifdef MY_BLAS_DGEMV_ON
/* one of our special versions of DGEMV with sparse stuff */
/* TRANS='N' */
/* ALPHA=1 */
/* INC=1 */
/* BETA=1 */
/* INCY=1 */
#if (DGEMV_UNROLL_LVL >= 1)
#define MY_DGEMV1_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 1: { \
FLOAT t1_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*A[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*A[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*A[i99]; \
} \
break; \
}
#else
#define MY_DGEMV1_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 2)
#define MY_DGEMV2_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 2: { \
FLOAT *col1_99, *col2_99; \
FLOAT t1_99, t2_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMV2_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 3)
#define MY_DGEMV3_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 3: { \
FLOAT *col1_99, *col2_99, *col3_99; \
FLOAT t1_99, t2_99, t3_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMV3_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 4)
#define MY_DGEMV4_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 4: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99; \
FLOAT t1_99, t2_99, t3_99, t4_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMV4_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 5)
#define MY_DGEMV5_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 5: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMV5_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 6)
#define MY_DGEMV6_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 6: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
t6_99 = x_99[5]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2] + \
t6_99*col6_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMV6_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 7)
#define MY_DGEMV7_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 7: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
t6_99 = x_99[5]; \
t7_99 = x_99[6]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2] + \
t6_99*col6_99[i99_2] + \
t7_99*col7_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMV7_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 8)
#define MY_DGEMV8_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 8: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
t6_99 = x_99[5]; \
t7_99 = x_99[6]; \
t8_99 = x_99[7]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99] + \
t8_99*col8_99[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2] + \
t6_99*col6_99[i99_2] + \
t7_99*col7_99[i99_2] + \
t8_99*col8_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99] + \
t8_99*col8_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMV8_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 9)
#define MY_DGEMV9_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 9: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99, *col9_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99, t9_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
t6_99 = x_99[5]; \
t7_99 = x_99[6]; \
t8_99 = x_99[7]; \
t9_99 = x_99[8]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
col9_99 = &(A[LDA*8]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99] + \
t8_99*col8_99[i99] + \
t9_99*col9_99[i99]; \
Y[Y_SP_IND[i99_2]] += t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2] + \
t6_99*col6_99[i99_2] + \
t7_99*col7_99[i99_2] + \
t8_99*col8_99[i99_2] + \
t9_99*col9_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] += t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99] + \
t8_99*col8_99[i99] + \
t9_99*col9_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMV9_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
/******* main routine ****** */
#define MY_DGEMV_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
{ \
switch (N) { \
MY_DGEMV1_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMV2_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMV3_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMV4_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMV5_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMV6_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMV7_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMV8_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMV9_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
} \
}
/* one of our special versions of DGEMV with sparse stuff */
/* TRANS='N' */
/* ALPHA=-1 */
/* INC=1 */
/* BETA=1 */
/* INCY=1 */
#if (DGEMV_UNROLL_LVL >= 1)
#define MY_DGEMVM1_1_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 1: { \
FLOAT t1_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*A[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*A[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*A[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_1_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 2)
#define MY_DGEMVM1_2_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 2: { \
FLOAT *col1_99, *col2_99; \
FLOAT t1_99, t2_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_2_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 3)
#define MY_DGEMVM1_3_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 3: { \
FLOAT *col1_99, *col2_99, *col3_99; \
FLOAT t1_99, t2_99, t3_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_3_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 4)
#define MY_DGEMVM1_4_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 4: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99; \
FLOAT t1_99, t2_99, t3_99, t4_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_4_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 5)
#define MY_DGEMVM1_5_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 5: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_5_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 6)
#define MY_DGEMVM1_6_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 6: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
t6_99 = x_99[5]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2] + \
t6_99*col6_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_6_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 7)
#define MY_DGEMVM1_7_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 7: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
t6_99 = x_99[5]; \
t7_99 = x_99[6]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2] + \
t6_99*col6_99[i99_2] + \
t7_99*col7_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_7_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 8)
#define MY_DGEMVM1_8_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 8: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
t6_99 = x_99[5]; \
t7_99 = x_99[6]; \
t8_99 = x_99[7]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99] + \
t8_99*col8_99[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2] + \
t6_99*col6_99[i99_2] + \
t7_99*col7_99[i99_2] + \
t8_99*col8_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99] + \
t8_99*col8_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_8_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 9)
#define MY_DGEMVM1_9_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
case 9: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99, *col9_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99, t9_99; \
FLOAT *x_99; \
int i99, i99_2; \
x_99 = X; \
t1_99 = x_99[0]; \
t2_99 = x_99[1]; \
t3_99 = x_99[2]; \
t4_99 = x_99[3]; \
t5_99 = x_99[4]; \
t6_99 = x_99[5]; \
t7_99 = x_99[6]; \
t8_99 = x_99[7]; \
t9_99 = x_99[8]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
col9_99 = &(A[LDA*8]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99] + \
t8_99*col8_99[i99] + \
t9_99*col9_99[i99]; \
Y[Y_SP_IND[i99_2]] -= t1_99*col1_99[i99_2] + \
t2_99*col2_99[i99_2] + \
t3_99*col3_99[i99_2] + \
t4_99*col4_99[i99_2] + \
t5_99*col5_99[i99_2] + \
t6_99*col6_99[i99_2] + \
t7_99*col7_99[i99_2] + \
t8_99*col8_99[i99_2] + \
t9_99*col9_99[i99_2]; \
} \
for (;i99<M;i99++) { \
Y[Y_SP_IND[i99]] -= t1_99*col1_99[i99] + \
t2_99*col2_99[i99] + \
t3_99*col3_99[i99] + \
t4_99*col4_99[i99] + \
t5_99*col5_99[i99] + \
t6_99*col6_99[i99] + \
t7_99*col7_99[i99] + \
t8_99*col8_99[i99] + \
t9_99*col9_99[i99]; \
} \
break; \
}
#else
#define MY_DGEMVM1_9_N_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
/******* main routine ****** */
#define MY_DGEMVM1_N_1111(M,N,A,LDA,X,Y,Y_SP_IND) \
{ \
switch (N) { \
MY_DGEMVM1_1_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMVM1_2_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMVM1_3_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMVM1_4_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMVM1_5_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMVM1_6_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMVM1_7_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMVM1_8_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
MY_DGEMVM1_9_N_1111(M,N,A,LDA,X,Y,Y_SP_IND); \
} \
}
/* one of our special versions of DGEMV with sparse stuff */
/* TRANS='Y' */
/* ALPHA=1 */
/* INC=1 */
/* BETA=0 */
/* INCY=1 */
#if (DGEMV_UNROLL_LVL >= 1)
#define MY_DGEMV1_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 1: { \
FLOAT t1_99; \
FLOAT *y_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
t1_99 += A[i99]*X[X_SP_IND[i99]] + A[i99_2]*X[X_SP_IND[i99_2]]; \
} \
for (;i99<M;i99++) { \
t1_99 += A[i99]*X[X_SP_IND[i99]]; \
} \
y_99[0] = t1_99; \
break; \
}
#else
#define MY_DGEMV1_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 2)
#define MY_DGEMV2_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 2: { \
FLOAT *col1_99, *col2_99; \
FLOAT t1_99, t2_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
t2_99 = 0.0; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
break; \
}
#else
#define MY_DGEMV2_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 3)
#define MY_DGEMV3_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 3: { \
FLOAT *col1_99, *col2_99, *col3_99; \
FLOAT t1_99, t2_99, t3_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
t2_99 = 0.0; \
t3_99 = 0.0; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
break; \
}
#else
#define MY_DGEMV3_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 4)
#define MY_DGEMV4_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 4: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99; \
FLOAT t1_99, t2_99, t3_99, t4_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
t2_99 = 0.0; \
t3_99 = 0.0; \
t4_99 = 0.0; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
break; \
}
#else
#define MY_DGEMV4_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 5)
#define MY_DGEMV5_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 5: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
t2_99 = 0.0; \
t3_99 = 0.0; \
t4_99 = 0.0; \
t5_99 = 0.0; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
break; \
}
#else
#define MY_DGEMV5_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 6)
#define MY_DGEMV6_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 6: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
t2_99 = 0.0; \
t3_99 = 0.0; \
t4_99 = 0.0; \
t5_99 = 0.0; \
t6_99 = 0.0; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 += col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
t6_99 += col6_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
break; \
}
#else
#define MY_DGEMV6_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 7)
#define MY_DGEMV7_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 7: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
t2_99 = 0.0; \
t3_99 = 0.0; \
t4_99 = 0.0; \
t5_99 = 0.0; \
t6_99 = 0.0; \
t7_99 = 0.0; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 += col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 += col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
t6_99 += col6_99[i99]*temp1_99; \
t7_99 += col7_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
break; \
}
#else
#define MY_DGEMV7_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 8)
#define MY_DGEMV8_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 8: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
t2_99 = 0.0; \
t3_99 = 0.0; \
t4_99 = 0.0; \
t5_99 = 0.0; \
t6_99 = 0.0; \
t7_99 = 0.0; \
t8_99 = 0.0; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 += col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 += col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
t8_99 += col8_99[i99]*temp1_99 + col8_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
t6_99 += col6_99[i99]*temp1_99; \
t7_99 += col7_99[i99]*temp1_99; \
t8_99 += col8_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
y_99[7] = t8_99; \
break; \
}
#else
#define MY_DGEMV8_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 9)
#define MY_DGEMV9_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
case 9: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99, *col9_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99, t9_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = 0.0; \
t2_99 = 0.0; \
t3_99 = 0.0; \
t4_99 = 0.0; \
t5_99 = 0.0; \
t6_99 = 0.0; \
t7_99 = 0.0; \
t8_99 = 0.0; \
t9_99 = 0.0; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
col9_99 = &(A[LDA*8]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 += col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 += col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
t8_99 += col8_99[i99]*temp1_99 + col8_99[i99_2]*temp2_99; \
t9_99 += col9_99[i99]*temp1_99 + col9_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
t6_99 += col6_99[i99]*temp1_99; \
t7_99 += col7_99[i99]*temp1_99; \
t8_99 += col8_99[i99]*temp1_99; \
t9_99 += col9_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
y_99[7] = t8_99; \
y_99[8] = t9_99; \
break; \
}
#else
#define MY_DGEMV9_Y_1101(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
/******* main routine ****** */
#define MY_DGEMV_Y_1101(M,N,A,LDA,X,X_SP_IND,Y) \
{ \
switch (N) { \
MY_DGEMV1_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV2_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV3_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV4_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV5_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV6_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV7_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV8_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV9_Y_1101(M,N,A,LDA,X,X_SP_IND,Y); \
} \
}
/* one of our special versions of DGEMV with sparse stuff */
/* TRANS='Y' */
/* ALPHA=1 */
/* INC=1 */
/* BETA=1 */
/* INCY=1 */
#if (DGEMV_UNROLL_LVL >= 1)
#define MY_DGEMV1_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 1: { \
FLOAT t1_99; \
FLOAT *y_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
t1_99 += A[i99]*X[X_SP_IND[i99]] + A[i99_2]*X[X_SP_IND[i99_2]]; \
} \
for (;i99<M;i99++) { \
t1_99 += A[i99]*X[X_SP_IND[i99]]; \
} \
y_99[0] = t1_99; \
break; \
}
#else
#define MY_DGEMV1_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 2)
#define MY_DGEMV2_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 2: { \
FLOAT *col1_99, *col2_99; \
FLOAT t1_99, t2_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
break; \
}
#else
#define MY_DGEMV2_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 3)
#define MY_DGEMV3_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 3: { \
FLOAT *col1_99, *col2_99, *col3_99; \
FLOAT t1_99, t2_99, t3_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
break; \
}
#else
#define MY_DGEMV3_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 4)
#define MY_DGEMV4_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 4: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99; \
FLOAT t1_99, t2_99, t3_99, t4_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
break; \
}
#else
#define MY_DGEMV4_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 5)
#define MY_DGEMV5_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 5: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
break; \
}
#else
#define MY_DGEMV5_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 6)
#define MY_DGEMV6_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 6: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
t6_99 = y_99[5]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 += col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
t6_99 += col6_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
break; \
}
#else
#define MY_DGEMV6_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 7)
#define MY_DGEMV7_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 7: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
t6_99 = y_99[5]; \
t7_99 = y_99[6]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 += col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 += col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
t6_99 += col6_99[i99]*temp1_99; \
t7_99 += col7_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
break; \
}
#else
#define MY_DGEMV7_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 8)
#define MY_DGEMV8_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 8: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
t6_99 = y_99[5]; \
t7_99 = y_99[6]; \
t8_99 = y_99[7]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 += col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 += col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
t8_99 += col8_99[i99]*temp1_99 + col8_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
t6_99 += col6_99[i99]*temp1_99; \
t7_99 += col7_99[i99]*temp1_99; \
t8_99 += col8_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
y_99[7] = t8_99; \
break; \
}
#else
#define MY_DGEMV8_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 9)
#define MY_DGEMV9_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 9: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99, *col9_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99, t9_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
t6_99 = y_99[5]; \
t7_99 = y_99[6]; \
t8_99 = y_99[7]; \
t9_99 = y_99[8]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
col9_99 = &(A[LDA*8]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 += col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 += col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 += col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 += col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 += col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 += col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 += col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
t8_99 += col8_99[i99]*temp1_99 + col8_99[i99_2]*temp2_99; \
t9_99 += col9_99[i99]*temp1_99 + col9_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 += col1_99[i99]*temp1_99; \
t2_99 += col2_99[i99]*temp1_99; \
t3_99 += col3_99[i99]*temp1_99; \
t4_99 += col4_99[i99]*temp1_99; \
t5_99 += col5_99[i99]*temp1_99; \
t6_99 += col6_99[i99]*temp1_99; \
t7_99 += col7_99[i99]*temp1_99; \
t8_99 += col8_99[i99]*temp1_99; \
t9_99 += col9_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
y_99[7] = t8_99; \
y_99[8] = t9_99; \
break; \
}
#else
#define MY_DGEMV9_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
/******* main routine ****** */
#define MY_DGEMV_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
{ \
switch (N) { \
MY_DGEMV1_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV2_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV3_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV4_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV5_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV6_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV7_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV8_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMV9_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
} \
}
/* one of our special versions of DGEMV with sparse stuff */
/* TRANS='Y' */
/* ALPHA=-1 */
/* INC=1 */
/* BETA=1 */
/* INCY=1 */
#if (DGEMV_UNROLL_LVL >= 1)
#define MY_DGEMVM1_1_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 1: { \
FLOAT t1_99; \
FLOAT *y_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
t1_99 -= A[i99]*X[X_SP_IND[i99]] + A[i99_2]*X[X_SP_IND[i99_2]]; \
} \
for (;i99<M;i99++) { \
t1_99 -= A[i99]*X[X_SP_IND[i99]]; \
} \
y_99[0] = t1_99; \
break; \
}
#else
#define MY_DGEMVM1_1_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 2)
#define MY_DGEMVM1_2_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 2: { \
FLOAT *col1_99, *col2_99; \
FLOAT t1_99, t2_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 -= col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 -= col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 -= col1_99[i99]*temp1_99; \
t2_99 -= col2_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
break; \
}
#else
#define MY_DGEMVM1_2_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 3)
#define MY_DGEMVM1_3_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 3: { \
FLOAT *col1_99, *col2_99, *col3_99; \
FLOAT t1_99, t2_99, t3_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 -= col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 -= col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 -= col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 -= col1_99[i99]*temp1_99; \
t2_99 -= col2_99[i99]*temp1_99; \
t3_99 -= col3_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
break; \
}
#else
#define MY_DGEMVM1_3_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 4)
#define MY_DGEMVM1_4_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 4: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99; \
FLOAT t1_99, t2_99, t3_99, t4_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 -= col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 -= col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 -= col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 -= col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 -= col1_99[i99]*temp1_99; \
t2_99 -= col2_99[i99]*temp1_99; \
t3_99 -= col3_99[i99]*temp1_99; \
t4_99 -= col4_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
break; \
}
#else
#define MY_DGEMVM1_4_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 5)
#define MY_DGEMVM1_5_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 5: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 -= col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 -= col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 -= col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 -= col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 -= col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 -= col1_99[i99]*temp1_99; \
t2_99 -= col2_99[i99]*temp1_99; \
t3_99 -= col3_99[i99]*temp1_99; \
t4_99 -= col4_99[i99]*temp1_99; \
t5_99 -= col5_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
break; \
}
#else
#define MY_DGEMVM1_5_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 6)
#define MY_DGEMVM1_6_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 6: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
t6_99 = y_99[5]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 -= col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 -= col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 -= col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 -= col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 -= col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 -= col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 -= col1_99[i99]*temp1_99; \
t2_99 -= col2_99[i99]*temp1_99; \
t3_99 -= col3_99[i99]*temp1_99; \
t4_99 -= col4_99[i99]*temp1_99; \
t5_99 -= col5_99[i99]*temp1_99; \
t6_99 -= col6_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
break; \
}
#else
#define MY_DGEMVM1_6_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 7)
#define MY_DGEMVM1_7_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 7: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
t6_99 = y_99[5]; \
t7_99 = y_99[6]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 -= col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 -= col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 -= col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 -= col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 -= col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 -= col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 -= col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 -= col1_99[i99]*temp1_99; \
t2_99 -= col2_99[i99]*temp1_99; \
t3_99 -= col3_99[i99]*temp1_99; \
t4_99 -= col4_99[i99]*temp1_99; \
t5_99 -= col5_99[i99]*temp1_99; \
t6_99 -= col6_99[i99]*temp1_99; \
t7_99 -= col7_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
break; \
}
#else
#define MY_DGEMVM1_7_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 8)
#define MY_DGEMVM1_8_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 8: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
t6_99 = y_99[5]; \
t7_99 = y_99[6]; \
t8_99 = y_99[7]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 -= col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 -= col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 -= col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 -= col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 -= col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 -= col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 -= col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
t8_99 -= col8_99[i99]*temp1_99 + col8_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 -= col1_99[i99]*temp1_99; \
t2_99 -= col2_99[i99]*temp1_99; \
t3_99 -= col3_99[i99]*temp1_99; \
t4_99 -= col4_99[i99]*temp1_99; \
t5_99 -= col5_99[i99]*temp1_99; \
t6_99 -= col6_99[i99]*temp1_99; \
t7_99 -= col7_99[i99]*temp1_99; \
t8_99 -= col8_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
y_99[7] = t8_99; \
break; \
}
#else
#define MY_DGEMVM1_8_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
#if (DGEMV_UNROLL_LVL >= 9)
#define MY_DGEMVM1_9_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
case 9: { \
FLOAT *col1_99, *col2_99, *col3_99, *col4_99, *col5_99, *col6_99, \
*col7_99, *col8_99, *col9_99; \
FLOAT t1_99, t2_99, t3_99, t4_99, t5_99, t6_99, t7_99, t8_99, t9_99; \
FLOAT *y_99; \
FLOAT temp1_99, temp2_99; \
int i99, i99_2; \
y_99 = Y; \
t1_99 = y_99[0]; \
t2_99 = y_99[1]; \
t3_99 = y_99[2]; \
t4_99 = y_99[3]; \
t5_99 = y_99[4]; \
t6_99 = y_99[5]; \
t7_99 = y_99[6]; \
t8_99 = y_99[7]; \
t9_99 = y_99[8]; \
col1_99 = &(A[0]); \
col2_99 = &(A[LDA]); \
col3_99 = &(A[LDA*2]); \
col4_99 = &(A[LDA*3]); \
col5_99 = &(A[LDA*4]); \
col6_99 = &(A[LDA*5]); \
col7_99 = &(A[LDA*6]); \
col8_99 = &(A[LDA*7]); \
col9_99 = &(A[LDA*8]); \
for (i99=0;i99<M-1;i99+=2) { \
i99_2 = i99 + 1; \
temp1_99 = X[X_SP_IND[i99]]; \
temp2_99 = X[X_SP_IND[i99_2]]; \
t1_99 -= col1_99[i99]*temp1_99 + col1_99[i99_2]*temp2_99; \
t2_99 -= col2_99[i99]*temp1_99 + col2_99[i99_2]*temp2_99; \
t3_99 -= col3_99[i99]*temp1_99 + col3_99[i99_2]*temp2_99; \
t4_99 -= col4_99[i99]*temp1_99 + col4_99[i99_2]*temp2_99; \
t5_99 -= col5_99[i99]*temp1_99 + col5_99[i99_2]*temp2_99; \
t6_99 -= col6_99[i99]*temp1_99 + col6_99[i99_2]*temp2_99; \
t7_99 -= col7_99[i99]*temp1_99 + col7_99[i99_2]*temp2_99; \
t8_99 -= col8_99[i99]*temp1_99 + col8_99[i99_2]*temp2_99; \
t9_99 -= col9_99[i99]*temp1_99 + col9_99[i99_2]*temp2_99; \
} \
for (;i99<M;i99++) { \
temp1_99 = X[X_SP_IND[i99]]; \
t1_99 -= col1_99[i99]*temp1_99; \
t2_99 -= col2_99[i99]*temp1_99; \
t3_99 -= col3_99[i99]*temp1_99; \
t4_99 -= col4_99[i99]*temp1_99; \
t5_99 -= col5_99[i99]*temp1_99; \
t6_99 -= col6_99[i99]*temp1_99; \
t7_99 -= col7_99[i99]*temp1_99; \
t8_99 -= col8_99[i99]*temp1_99; \
t9_99 -= col9_99[i99]*temp1_99; \
} \
y_99[0] = t1_99; \
y_99[1] = t2_99; \
y_99[2] = t3_99; \
y_99[3] = t4_99; \
y_99[4] = t5_99; \
y_99[5] = t6_99; \
y_99[6] = t7_99; \
y_99[7] = t8_99; \
y_99[8] = t9_99; \
break; \
}
#else
#define MY_DGEMVM1_9_Y_1111(M,N,A,LDA,X,Y,Y_SP_IND)
#endif
/******* main routine ****** */
#define MY_DGEMVM1_Y_1111(M,N,A,LDA,X,X_SP_IND,Y) \
{ \
switch (N) { \
MY_DGEMVM1_1_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMVM1_2_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMVM1_3_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMVM1_4_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMVM1_5_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMVM1_6_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMVM1_7_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMVM1_8_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
MY_DGEMVM1_9_Y_1111(M,N,A,LDA,X,X_SP_IND,Y); \
} \
}
#endif
#ifdef MY_BLAS_DTRMV_ON
#define MY_DTRMV_N_U(M,A,LDA,X,WORK) \
{ \
int i99, j99, j992, j993, j994; \
FLOAT *tAptr, *tAptr2, *tAptr3, tX99, tX299, tX399, *Xptr99; \
for (i99=0;i99<M;i99++) WORK[i99] = 0.0; \
tAptr = A; \
tAptr2 = tAptr + LDA; \
tAptr3 = tAptr + 2*LDA; \
Xptr99 = X; \
for (i99=0;i99<M-2;i99+=3) { \
tX99 = *(Xptr99++); \
tX299 = *(Xptr99++); \
tX399 = *(Xptr99++); \
for (j99=0;j99<=i99;j99++) { \
WORK[j99] += tAptr[j99]*tX99 + tAptr2[j99]*tX299 + tAptr3[j99]*tX399; \
} \
WORK[j99] += tAptr2[j99]*tX299 + tAptr3[j99]*tX399; \
j99++; \
WORK[j99] += tAptr3[j99]*tX399; \
tAptr += 3*LDA; \
tAptr2 += 3*LDA; \
tAptr3 += 3*LDA; \
} \
for (;i99<M;i99++) { \
tX99 = *(Xptr99++); \
for (j99=0;j99<=i99;j99++) { \
WORK[j99] += tAptr[j99]*tX99; \
} \
tAptr += LDA; \
} \
Xptr99 = X; \
for (i99=0;i99<M;i99++) Xptr99[i99] = WORK[i99]; \
}
#define MY_DTRMV_T_U(M,A,LDA,X) \
{ \
int i99, j99, j992, j993, j994; \
FLOAT *tAptr, *tAptr2, *tAptr3, tsum99, tsum299, tsum399, *Xptr99; \
FLOAT tX, *X2ptr99; \
tAptr = &(A[LDA*(M-1)]); \
tAptr2 = tAptr - LDA; \
tAptr3 = tAptr - 2*LDA; \
Xptr99 = X; \
X2ptr99 = &(Xptr99[M-1]); \
for (i99=M-1;i99>1;i99-=3) { \
tsum99 = 0.0; \
tsum299 = 0.0; \
tsum399 = 0.0; \
for (j99=0;j99<=i99-2;j99++) { \
tX = Xptr99[j99]; \
tsum99 += tAptr[j99]*tX; \
tsum299 += tAptr2[j99]*tX; \
tsum399 += tAptr3[j99]*tX; \
} \
tX = Xptr99[j99]; \
tsum99 += tAptr[j99]*tX; \
tsum299 += tAptr2[j99]*tX; \
j99++; \
tX = Xptr99[j99]; \
tsum99 += tAptr[j99]*tX; \
*(X2ptr99--) = tsum99; \
*(X2ptr99--) = tsum299; \
*(X2ptr99--) = tsum399; \
tAptr -= 3*LDA; \
tAptr2 -= 3*LDA; \
tAptr3 -= 3*LDA; \
} \
for (;i99>-1;i99--) { \
tsum99 = 0.0; \
for (j99=0;j99<=i99;j99++) { \
tsum99 += tAptr[j99]*Xptr99[j99]; \
} \
*(X2ptr99--) = tsum99; \
tAptr -= LDA; \
} \
}
#define MY_DTRMV_N_L(M,A,LDA,X,B) \
{ \
int i99, j99; \
FLOAT tX, tX2, tX3, tX4, *tAptr99, *tA2ptr99, *tA3ptr99, *tA4ptr99, *tXptr99, *tBptr99; \
tAptr99 = A; \
tA2ptr99 = tAptr99 + LDA; \
tA3ptr99 = tA2ptr99 + LDA; \
tA4ptr99 = tA3ptr99 + LDA; \
tXptr99 = X; \
tBptr99 = B; \
for (i99=0;i99<M-3;i99+=4) { \
tX = *(tXptr99++); \
tX2 = *(tXptr99++); \
tX3 = *(tXptr99++); \
tX4 = *(tXptr99++); \
tBptr99[i99] += tAptr99[i99]*tX; \
tBptr99[i99+1] += tAptr99[i99+1]*tX + tA2ptr99[i99+1]*tX2; \
tBptr99[i99+2] += tAptr99[i99+2]*tX + tA2ptr99[i99+2]*tX2 + tA3ptr99[i99+2]*tX3; \
for (j99=i99+3;j99<M;j99++) { \
tBptr99[j99] += tAptr99[j99]*tX + tA2ptr99[j99]*tX2 +tA3ptr99[j99]*tX3 + tA4ptr99[j99]*tX4; \
} \
tAptr99 += 4*LDA; \
tA2ptr99 += 4*LDA; \
tA3ptr99 += 4*LDA; \
tA4ptr99 += 4*LDA; \
} \
for (;i99<M;i99++) { \
tX = *(tXptr99++); \
for (j99=i99;j99<M;j99++) { \
tBptr99[j99] += tAptr99[j99]*tX; \
} \
tAptr99 += LDA; \
} \
}
#define MY_DTRMV_T_L(M,A,LDA,X,B) \
{ \
int i99, j99; \
FLOAT tX, tsum99, tsum299, tsum399, tsum499, *tAptr99, *tA2ptr99, *tA3ptr99, *tA4ptr99, *tXptr99, *tBptr99; \
tAptr99 = A; \
tA2ptr99 = tAptr99 + LDA; \
tA3ptr99 = tA2ptr99 + LDA; \
tA4ptr99 = tA3ptr99 + LDA; \
tXptr99 = X; \
tBptr99 = B; \
for (i99=0;i99<M-3;i99+=4) { \
tsum99 = tAptr99[i99]*tXptr99[i99] + tAptr99[i99+1]*tXptr99[i99+1] + tAptr99[i99+2]*tXptr99[i99+2]; \
tsum299 = tA2ptr99[i99+1]*tXptr99[i99+1] + tA2ptr99[i99+2]*tXptr99[i99+2]; \
tsum399 = tA3ptr99[i99+2]*tXptr99[i99+2]; \
tsum499 = 0.0; \
for (j99=i99+3;j99<M;j99++) { \
tX = tXptr99[j99]; \
tsum99 += tAptr99[j99]*tX; \
tsum299 += tA2ptr99[j99]*tX; \
tsum399 += tA3ptr99[j99]*tX; \
tsum499 += tA4ptr99[j99]*tX; \
} \
*(tBptr99++) += tsum99; \
*(tBptr99++) += tsum299; \
*(tBptr99++) += tsum399; \
*(tBptr99++) += tsum499; \
tAptr99 += 4*LDA; \
tA2ptr99 += 4*LDA; \
tA3ptr99 += 4*LDA; \
tA4ptr99 += 4*LDA; \
} \
for (;i99<M;i99++) { \
tsum99 = 0.0; \
for (j99=i99;j99<M;j99++) { \
tsum99 += tAptr99[j99]*tXptr99[j99]; \
} \
*(tBptr99++) += tsum99; \
tAptr99 += LDA; \
} \
}
#endif
syntax highlighted by Code2HTML, v. 0.9.1