/* * $Id: cblasy.c,v 1.1.1.1 2005/09/18 22:04:52 dhmunro Exp $ * CBLAS routines used by yorick */ /* Copyright (c) 2005, The Regents of the University of California. * All rights reserved. * This file is part of yorick (http://yorick.sourceforge.net). * Read the accompanying LICENSE file for details. */ /* Basic Linear algebra Subprograms Technical (BLAST) Forum Standard * August 21, 2001 (http://www.netlib.org/blas/blast-forum/) * Appendix B: Legacy BLAS * * These routines were converted from the original Fortran source * from www.netlib.org, with hand unrolling of the inner loops for * the level 2 and 3 routines. The unrolling helps not-very-bright * or unaggressive C compilers, but may hurt good compilers. (The * level 1 Fortran source was already unrolled.) */ #include "cblasy.h" #define ABS(x) ((x)>=0?(x):-(x)) /* ------------------------------------------------------------ level 1 */ double cblas_ddot(INT_IN n, const double *x, INT_IN incx, const double *y, INT_IN incy) { /*c c forms the dot product of two vectors. c uses unrolled loops for increments equal to one. c jack dongarra, linpack, 3/11/78. c*/ double dtemp = 0.0; long i; if (n<=0) return dtemp; if (incx!=1 || incy!=1) { /* unequal increments or equal increments not equal to 1 */ long iy, nn=n; if (incx<0) x -= (n-1)*incx; if (incy<0) y -= (n-1)*incx; for (i=iy=0 ; nn-- ; i+=incx,iy+=incy) dtemp += y[iy]*x[i]; } else { /* both increments equal to 1 */ /* unroll loop */ long m = n%5; for (i=0 ; i cutlo) break; if (adx > xmax) { /* renormalize to new largest element */ sum = 1.0 + sum * (xmax/adx)*(xmax/adx); xmax = adx; } else { /* continue with current normalization */ sum += (adx/xmax)*(adx/xmax); } nn--; if (!nn) return xmax*sqrt(sum); x += incx; } /* here, sum>=1.0, but xmax<=cutlo, so xmax*xmax might underflow */ sum *= xmax; /* remove phase 2 normalization */ sum *= xmax; /* don't want the compiler to do sum*(xmax*xmax) */ nn--; x += incx; } else { sum = 0.0; } /* phase 3, adx is first element>cutlo */ if (adx=hitest) break; sum += adx*adx; } } /* phase 4, adx is first element greater than hitest */ xmax = adx; sum /= xmax; /* again, try to avoid sum/(xmax*xmax) */ sum /= xmax; sum += 1.0; while (nn) { adx = ABS(x[0]); if (adx > xmax) { /* renormalize to new largest element */ sum = 1.0 + sum * (xmax/adx)*(xmax/adx); xmax = adx; } else { /* continue with current normalization */ sum += (adx/xmax)*(adx/xmax); } nn--; x += incx; } return xmax*sqrt(sum); } double cblas_dasum(INT_IN n, const double *x, INT_IN incx) { /*c c takes the sum of the absolute values. c jack dongarra, linpack, 3/11/78. c modified 3/93 to return if incx .le. 0. c*/ double dtemp = 0.0; long i, nn=n; if ( nn<=0 || incx<=0 ) return dtemp; if (incx!=1) { /* increment not equal to 1 */ for (i=0 ; nn-- ; i+=incx) dtemp += ABS(x[i]); } else { /* increment equal to 1 */ /* unroll loop */ long m = n%6; for (i=0 ; idmax) { dmax = ABS(x[ix]); iamax = i; } } else { /* increment equal to 1 */ dmax = ABS(x[0]); for (i=1 ; --nn ; i++) if (ABS(x[i])>dmax) { dmax = ABS(x[i]); iamax = i; } } return iamax; } void cblas_dswap(INT_IN n, double *x, INT_IN incx, double *y, INT_IN incy) { /*c c interchanges two vectors. c uses unrolled loops for increments equal one. c jack dongarra, linpack, 3/11/78. c*/ double dtemp; long i; if (n<=0) return; if (incx!=1 || incy!=1) { /* unequal increments or equal increments not equal to 1 */ long iy, nn=n; if (incx<0) x -= (n-1)*incx; if (incy<0) y -= (n-1)*incx; for (i=iy=0 ; nn-- ; i+=incx,iy+=incy) { dtemp = x[i]; x[i] = y[iy]; y[iy] = dtemp; } } else { /* both increments equal to 1 */ /* unroll loop */ long m = n%3; for (i=0 ; i=0 ; j--,a-=lda) { if ( x[j]!=0.0 ) { temp = x[j]; /* unroll inner loop */ count = (n-1-j)&3; for (i=n-1 ; count-- ; i--) x[i] += temp*a[i]; for ( ; i>j ; i-=4) { x[i ] += temp*a[i]; x[i-1] += temp*a[i-1]; x[i-2] += temp*a[i-2]; x[i-3] += temp*a[i-3]; } if ( nounit ) x[j] *= a[j]; } } } else { for (j=n-1,jx=j*incx,a+=j*lda ; j>=0 ; j--,jx-=incx,a-=lda) { if ( x[jx]!=0.0 ) { temp = x[jx]; /* unroll inner loop */ count = (n-1-j)&3; for (i=n-1,ix=i*incx ; count-- ; i--,ix-=incx) x[ix] += temp*a[i]; for ( ; i>j ; i-=4,ix-=4*incx) { x[ix ] += temp*a[i]; x[ix- incx] += temp*a[i-1]; x[ix-2*incx] += temp*a[i-2]; x[ix-3*incx] += temp*a[i-3]; } if ( nounit ) x[jx] *= a[j]; } } } } } else { /* Form x := A'*x. */ long count; if ( uplo==CblasUpper ) { if ( incx==1 ) { for (j=n-1,a+=j*lda ; j>=0 ; j--,a-=lda) { temp = x[j]; if ( nounit ) temp*= a[j]; if (j) { /* unroll inner loop */ count = (j-1)&3; for (i=j-1 ; count-- ; i--) temp += a[i]*x[i]; for ( ; i>0 ; i-=4) temp += a[i]*x[i] + a[i-1]*x[i-1] + a[i-2]*x[i-2] + a[i-3]*x[i-3]; } x[j] = temp; } } else { for (j=n-1,jx=j*incx,a+=j*lda ; j>=0 ; j--,jx-=incx,a-=lda) { temp = x[jx]; if ( nounit ) temp *= a[j]; if (j) { /* unroll inner loop */ count = (j-1)&3; for (i=j-1,ix=i*incx ; count-- ; i--,ix-=incx) temp += a[i]*x[ix]; for ( ; i>0 ; i-=4,ix-=4*incx) temp += a[i]*x[ix] + a[i-1]*x[i-incx] + a[i-2]*x[i-2*incx] + a[i-3]*x[i-3*incx]; } x[jx] = temp; } } } else { if ( incx==1 ) { for (j=0 ; j=0 ; j--,a-=lda) { if ( x[j]!=0.0 ) { if ( nounit ) x[j] /= a[j]; if (!j) break; temp = x[j]; /* unroll inner loop */ count = (j-1)&3; for (i=j-1 ; count-- ; i--) x[i] -= temp*a[i]; for ( ; i>0 ; i-=4) { x[i ] -= temp*a[i]; x[i-1] -= temp*a[i-1]; x[i-2] -= temp*a[i-2]; x[i-3] -= temp*a[i-3]; } } } } else { for (j=n-1,jx=j*incx,a+=j*lda ; j>=0 ; j--,jx-=incx,a-=lda) { if ( x[jx]!=0.0 ) { if ( nounit ) x[jx] /= a[j]; if (!j) break; temp = x[jx]; /* unroll inner loop */ count = (j-1)&3; for (i=j-1,ix=i*incx ; count-- ; i--,ix-=incx) x[ix] -= temp*a[i]; for ( ; i>0 ; i-=4,ix-=4*incx) { x[ix ] -= temp*a[i]; x[ix- incx] -= temp*a[i-1]; x[ix-2*incx] -= temp*a[i-2]; x[ix-3*incx] -= temp*a[i-3]; } } } } } else { long count; if ( incx==1 ) { for (j=0 ; j=0 ; j--,a-=lda) { temp = x[j]; /* unroll inner loop */ count = (n-1-j)&3; for (i=n-1 ; count-- ; i--) temp -= a[i]*x[i]; for ( ; i>j ; i-=4) temp -= a[i]*x[i] + a[i-1]*x[i-1] + a[i-2]*x[i-2] + a[i-3]*x[i-3]; if ( nounit ) temp /= a[j]; x[j] = temp; } } else { for (j=n-1,jx=j*incx,a+=j*lda ; j>=0 ; j--,jx-=incx,a-=lda) { temp = x[jx]; /* unroll inner loop */ count = (n-1-j)&3; for (i=n-1,ix=i*incx ; count-- ; i--,ix-=incx) temp -= a[i]*x[ix]; for ( ; i>j ; i-=4,ix-=4*incx) temp -= a[i]*x[ix] + a[i-1]*x[ix-incx] + a[i-2]*x[ix-2*incx] + a[i-3]*x[ix-3*incx]; if ( nounit ) temp /= a[j]; x[jx] = temp; } } } } /* End of DTRSV */ } void cblas_dger(const enum CBLAS_ORDER order, INT_IN m, INT_IN n, const double alpha, const double *x, INT_IN incx, const double *y, INT_IN incy, double *a, INT_IN lda) { /* * Purpose * ======= * * DGER performs the rank 1 operation * * A := alpha*x*y' + A, * * where alpha is a scalar, x is an m element vector, y is an n element * vector and A is an m by n matrix. * * Parameters * ========== * * ORDER - enum CBLAS_ORDER (ignored) * * M - INTEGER. * On entry, M specifies the number of rows of the matrix A. * M must be at least zero. * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * Unchanged on exit. * * ALPHA - DOUBLE PRECISION. * On entry, ALPHA specifies the scalar alpha. * Unchanged on exit. * * X - DOUBLE PRECISION array of dimension at least * ( 1 + ( m - 1 )*abs( INCX ) ). * Before entry, the incremented array X must contain the m * element vector x. * Unchanged on exit. * * INCX - INTEGER. * On entry, INCX specifies the increment for the elements of * X. INCX must not be zero. * Unchanged on exit. * * Y - DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). * Before entry, the incremented array Y must contain the n * element vector y. * Unchanged on exit. * * INCY - INTEGER. * On entry, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). * Before entry, the leading m by n part of the array A must * contain the matrix of coefficients. On exit, A is * overwritten by the updated matrix. * * LDA - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. LDA must be at least * max( 1, m ). * Unchanged on exit. */ /* * Level 2 Blas routine. * * -- Written on 22-October-1986. * Jack Dongarra, Argonne National Lab. * Jeremy Du Croz, Nag Central Office. * Sven Hammarling, Nag Central Office. * Richard Hanson, Sandia National Labs. */ double temp; long i, info, j, m3, mm = m, nn = n, inc_x = incx, inc_y = incy; /* Test the input parameters. */ info = 0; if ( m<0 ) { info = 1; } else if ( n<0 ) { info = 2; } else if ( incx==0 ) { info = 5; } else if ( incy==0 ) { info = 7; } else if ( !lda || lda=0 ; k--,ak-=lda) { if ( b[k]!=0.0 ) { temp = alpha*b[k]; b[k] = temp; if ( nounit ) b[k] *= ak[k]; /* hand unroll inner loop */ count = (m-1-k)&3; for (i=k+1 ; count-- ; i++) b[i] += temp*ak[i]; for ( ; i=0 ; i--,a-=lda) { temp = b[i]; if ( nounit ) temp *= ai[i]; /* hand unroll inner loop */ for (k=0 ; k<(i&3) ; k++) temp += ai[k]*b[k]; for ( ; k=0 ; j--,a-=lda,bj-=ldb) { temp = alpha; if ( nounit ) temp *= a[j]; for (i=0 ; i=0 ; k--,a-=lda,bk-=ldb) { for (j=k+1,bj=b+j*ldb ; j=0 ; k--,ak-=lda) { if ( b[k]!=0.0 ) { if ( nounit ) b[k] /= ak[k]; /* unroll inner loop */ for (i=0 ; i<(k&3) ; i++) b[i] -= b[k]*ak[i]; for ( ; i=0 ; i--,ai-=lda) { temp = alpha*b[i]; /* unroll inner loop */ count = (m-1-i)&3; for (k=i+1 ; count-- ; k++) temp -= ai[k]*b[k]; for ( ; k=0 ; j--,a-=lda,bj-=ldb) { if ( alpha!=1.0 ) for (i=0 ; i=0 ; k--,a-=lda,bk-=ldb) { if ( nounit ) { temp = 1.0/a[k]; for (i=0 ; i