/*
* $Id: dgecon.c,v 1.1.1.1 2005/09/18 22:04:39 dhmunro Exp $
* LAPACK routines to estimate the condition number of a matrix.
*/
/* Copyright (c) 2005, The Regents of the University of California.
* All rights reserved.
* This file is part of yorick (http://yorick.sourceforge.net).
* Read the accompanying LICENSE file for details.
*/
#include "dg.h"
/*---blas routines---*/
/* ddot dcopy dasum idamax daxpy dscal dtrsv */
/*---converted nutty string switches to single characters (lower case)---*/
#define lsame(x,y) ((x)==(y))
/*-----Fortran intrinsics converted-----*/
extern double pow(double,double); /* performance never an issue here! */
#define abs(x) ((x)>=0?(x):-(x))
extern double sqrt(double);
extern double log10(double);
#define mod(x,y) ((x)%(y))
#define dble(x) ((double)x)
#define nint(x) ((long)((x)>=0?(x)+.5:(x)-.5))
#define int(x) ((long)x)
#define sign(x,y) ((((x)<0)!=((y)<0))?-(x):(x))
#define min(x,y) ((x)<(y)?(x):(y))
#define max(x,y) ((x)>(y)?(x):(y))
/*-----end of Fortran intrinsics-----*/
void dgecon( char norm, long n, double a[], long lda, double anorm,
double *rcond, double work[], long iwork[],long *info )
{
/**
* -- LAPACK routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* February 29, 1992
*
* .. Scalar Arguments ..*/
/** ..
* .. Array Arguments ..*/
#undef iwork_1
#define iwork_1(a1) iwork[a1-1]
#undef work_1
#define work_1(a1) work[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
/** ..
*
* Purpose
* =======
*
* DGECON estimates the reciprocal of the condition number of a general
* real matrix A, in either the 1-norm or the infinity-norm, using
* the LU factorization computed by DGETRF.
*
* An estimate is obtained for norm(inv(A)), and the reciprocal of the
* condition number is computed as
* RCOND = 1 / ( norm(A) * norm(inv(A)) ).
*
* Arguments
* =========
*
* NORM (input) CHARACTER*1
* Specifies whether the 1-norm condition number or the
* infinity-norm condition number is required:
* = '1' or 'O': 1-norm;
* = 'I': Infinity-norm.
*
* N (input) INTEGER
* The order of the matrix A. N >= 0.
*
* A (input) DOUBLE PRECISION array, dimension (LDA,N)
* The factors L and U from the factorization A = P*L*U
* as computed by DGETRF.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,N).
*
* ANORM (input) DOUBLE PRECISION
* If NORM = '1' or 'O', the 1-norm of the original matrix A.
* If NORM = 'I', the infinity-norm of the original matrix A.
Note: 1-norm of matrix A is abs(A)(sum,max)
infinity-norm of A is abs(A)(max,sum)
*
* RCOND (output) DOUBLE PRECISION
* The reciprocal of the condition number of the matrix A,
* computed as RCOND = 1/(norm(A) * norm(inv(A))).
*
* WORK_1 (workspace) DOUBLE PRECISION array, dimension (4*N)
*
* IWORK_1 (workspace) INTEGER array, dimension (N)
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
*
* =====================================================================
*
* .. Parameters ..*/
#undef one
#define one 1.0e+0
#undef zero
#define zero 0.0e+0
/** ..
* .. Local Scalars ..*/
int onenrm;
char normin;
long ix, kase, kase1;
double ainvnm, scale, sl, smlnum, su;
/** ..
* .. Intrinsic Functions ..*/
/* intrinsic abs, max;*/
/** ..
* .. Executable Statements ..
*
* Test the input parameters.
**/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
*info = 0;
onenrm = norm=='1' || lsame( norm, 'o' );
if( !onenrm && !lsame( norm, 'i' ) ) {
*info = -1;
} else if( n<0 ) {
*info = -2;
} else if( lda<max( 1, n ) ) {
*info = -4;
} else if( anorm<zero ) {
*info = -5;
}
if( *info!=0 ) {
xerbla( "dgecon", -*info );
return;
}
/**
* Quick return if possible
**/
*rcond = zero;
if( n==0 ) {
*rcond = one;
return;
} else if( anorm==zero ) {
return;
}
smlnum = dlamch( 's'/*afe minimum*/ );
/**
* Estimate the norm of inv(A).
**/
ainvnm = zero;
normin = 'n';
if( onenrm ) {
kase1 = 1;
} else {
kase1 = 2;
}
kase = 0;
L_10:
dlacon( n, &work_1( n+1 ), work, iwork, &ainvnm, &kase );
if( kase!=0 ) {
if( kase==kase1 ) {
/**
* Multiply by inv(L).
**/
dlatrs( 'l'/*ower*/, 'n'/*o transpose*/, 'u'/*nit*/, normin, n, a,
lda, work, &sl, &work_1( 2*n+1 ), info );
/**
* Multiply by inv(U).
**/
dlatrs( 'u'/*pper*/, 'n'/*o transpose*/, 'n'/*on-unit*/, normin, n,
a, lda, work, &su, &work_1( 3*n+1 ), info );
} else {
/**
* Multiply by inv(U').
**/
dlatrs( 'u'/*pper*/, 't'/*ranspose*/, 'n'/*on-unit*/, normin, n, a,
lda, work, &su, &work_1( 3*n+1 ), info );
/**
* Multiply by inv(L').
**/
dlatrs( 'l'/*ower*/, 't'/*ranspose*/, 'u'/*nit*/, normin, n, a,
lda, work, &sl, &work_1( 2*n+1 ), info );
}
/**
* Divide X by 1/(SL*SU) if doing so will not cause overflow.
**/
scale = sl*su;
normin = 'y';
if( scale!=one ) {
ix = 1+cblas_idamax( n, work, 1 );
if( scale<abs( work_1( ix ) )*smlnum || scale==zero )
goto L_20;
drscl( n, scale, work, 1 );
}
goto L_10;
}
/**
* Compute the estimate of the reciprocal condition number.
**/
if( ainvnm!=zero )
*rcond = ( one / ainvnm ) / anorm;
L_20:
return;
/**
* End of DGECON
**/
}
void drscl( long n, double sa, double sx[], long incx )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* October 31, 1992
*
* .. Scalar Arguments ..*/
/** ..
* .. Array Arguments ..*/
#undef sx_1
#define sx_1(a1) sx[a1-1]
/** ..
*
* Purpose
* =======
*
* DRSCL multiplies an n-element real vector x by the real scalar 1/a.
* This is done without overflow or underflow as long as
* the final result x/a does not overflow or underflow.
*
* Arguments
* =========
*
* N (input) INTEGER
* The number of components of the vector x.
*
* SA (input) DOUBLE PRECISION
* The scalar a which is used to divide each component of x.
* SA must be >= 0, or the subroutine will divide by zero.
*
* SX_1 (input/output) DOUBLE PRECISION array, dimension
* (1+(N-1)*abs(INCX))
* The n-element vector x.
*
* INCX (input) INTEGER
* The increment between successive values of the vector SX.
* > 0: SX_1(1) = X(1) and SX_1(1+(i-1)*INCX) = x(i), 1< i<= n
* < 0: SX_1(1) = X(n) and SX_1(1+(i-1)*INCX) = x(n-i+1), 1< i<= n
*
* =====================================================================
*
* .. Parameters ..*/
#undef one
#define one 1.0e+0
#undef zero
#define zero 0.0e+0
/** ..
* .. Local Scalars ..*/
int done;
double bignum, cden, cden1, cnum, cnum1, mul, smlnum;
/** ..
* .. Intrinsic Functions ..*/
/* intrinsic abs;*/
/** ..
* .. Executable Statements ..
*
* Quick return if possible
**/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
if( n<=0 )
return;
/**
* Get machine parameters
**/
smlnum = dlamch( 's' );
bignum = one / smlnum;
dlabad( &smlnum, &bignum );
/**
* Initialize the denominator to SA and the numerator to 1.
**/
cden = sa;
cnum = one;
L_10:
cden1 = cden*smlnum;
cnum1 = cnum / bignum;
if( abs( cden1 )>abs( cnum ) && cnum!=zero ) {
/**
* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM.
**/
mul = smlnum;
done = 0;
cden = cden1;
} else if( abs( cnum1 )>abs( cden ) ) {
/**
* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM.
**/
mul = bignum;
done = 0;
cnum = cnum1;
} else {
/**
* Multiply X by CNUM / CDEN and return.
**/
mul = cnum / cden;
done = 1;
}
/**
* Scale the vector X by MUL
**/
cblas_dscal( n, mul, sx, incx );
if( !done )
goto L_10;
return;
/**
* End of DRSCL
**/
}
void dlabad( double *small, double *large )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* October 31, 1992
*
* .. Scalar Arguments ..*/
/** ..
*
* Purpose
* =======
*
* DLABAD takes as input the values computed by SLAMCH for underflow and
* overflow, and returns the square root of each of these values if the
* log of LARGE is sufficiently large. This subroutine is intended to
* identify machines with a large exponent range, such as the Crays, and
* redefine the underflow and overflow limits to be the square roots of
* the values computed by DLAMCH. This subroutine is needed because
* DLAMCH does not compensate for poor arithmetic in the upper half of
* the exponent range, as is found on a Cray.
*
* Arguments
* =========
*
* SMALL (input/output) DOUBLE PRECISION
* On entry, the underflow threshold as computed by DLAMCH.
* On exit, if LOG10(LARGE) is sufficiently large, the square
* root of SMALL, otherwise unchanged.
*
* LARGE (input/output) DOUBLE PRECISION
* On entry, the overflow threshold as computed by DLAMCH.
* On exit, if LOG10(LARGE) is sufficiently large, the square
* root of LARGE, otherwise unchanged.
*
* =====================================================================
*
* .. Intrinsic Functions ..*/
/* intrinsic log10, sqrt;*/
/** ..
* .. Executable Statements ..
*
* If it looks like we're on a Cray, take the square root of
* SMALL and LARGE to avoid overflow and underflow problems.
**/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
if( log10( *large )>2000.e0 ) {
*small = sqrt( *small );
*large = sqrt( *large );
}
return;
/**
* End of DLABAD
**/
}
void dlatrs( char uplo, char trans, char diag, char normin, long n,
double a[], long lda, double x[], double *scale,
double cnorm[], long *info )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* June 30, 1992
*
* .. Scalar Arguments ..*/
/** ..
* .. Array Arguments ..*/
#undef x_1
#define x_1(a1) x[a1-1]
#undef cnorm_1
#define cnorm_1(a1) cnorm[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
/** ..
*
* Purpose
* =======
*
* DLATRS solves one of the triangular systems
*
* A *x = s*b or A'*x = s*b
*
* with scaling to prevent overflow. Here A is an upper or lower
* triangular matrix, A' denotes the transpose of A, x and b are
* n-element vectors, and s is a scaling factor, usually less than
* or equal to 1, chosen so that the components of x will be less than
* the overflow threshold. If the unscaled problem will not cause
* overflow, the Level 2 BLAS routine DTRSV is called. If the matrix A
* is singular (A_2(j,j) = 0 for some j), then s is set to 0 and a
* non-trivial solution to A*x = 0 is returned.
*
* Arguments
* =========
*
* UPLO (input) CHARACTER*1
* Specifies whether the matrix A is upper or lower triangular.
* = 'U': Upper triangular
* = 'L': Lower triangular
*
* TRANS (input) CHARACTER*1
* Specifies the operation applied to A.
* = 'N': Solve A * x = s*b (No transpose)
* = 'T': Solve A'* x = s*b (Transpose)
* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose)
*
* DIAG (input) CHARACTER*1
* Specifies whether or not the matrix A is unit triangular.
* = 'N': Non-unit triangular
* = 'U': Unit triangular
*
* NORMIN (input) CHARACTER*1
* Specifies whether CNORM has been set or not.
* = 'Y': CNORM contains the column norms on entry
* = 'N': CNORM is not set on entry. On exit, the norms will
* be computed and stored in CNORM.
*
* N (input) INTEGER
* The order of the matrix A. N >= 0.
*
* A_2 (input) DOUBLE PRECISION array, dimension (LDA,N)
* The triangular matrix A. If UPLO = 'U', the leading n by n
* upper triangular part of the array A contains the upper
* triangular matrix, and the strictly lower triangular part of
* A is not referenced. If UPLO = 'L', the leading n by n lower
* triangular part of the array A contains the lower triangular
* matrix, and the strictly upper triangular part of A is not
* referenced. If DIAG = 'U', the diagonal elements of A are
* also not referenced and are assumed to be 1.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max (1,N).
*
* X_1 (input/output) DOUBLE PRECISION array, dimension (N)
* On entry, the right hand side b of the triangular system.
* On exit, X is overwritten by the solution vector x.
*
* SCALE (output) DOUBLE PRECISION
* The scaling factor s for the triangular system
* A * x = s*b or A'* x = s*b.
* If SCALE = 0, the matrix A is singular or badly scaled, and
* the vector x is an exact or approximate solution to A*x = 0.
*
* CNORM_1 (input or output) DOUBLE PRECISION array, dimension (N)
*
* If NORMIN = 'Y', CNORM is an input variable and CNORM_1(j)
* contains the norm of the off-diagonal part of the j-th column
* of A. If TRANS = 'N', CNORM_1(j) must be greater than or equal
* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM_1(j)
* must be greater than or equal to the 1-norm.
*
* If NORMIN = 'N', CNORM is an output variable and CNORM_1(j)
* returns the 1-norm of the offdiagonal part of the j-th column
* of A.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -k, the k-th argument had an illegal value
*
* Further Details
* ======= =======
*
* A rough bound on x is computed; if that is less than overflow, DTRSV
* is called, otherwise, specific code is used which checks for possible
* overflow or divide-by-zero at every operation.
*
* A columnwise scheme is used for solving A*x = b. The basic algorithm
* if A is lower triangular is
*
* x[1:n] := b[1:n]
* for j = 1, ..., n
* x_1(j) := x_1(j) / A_2(j,j)
* x[j+1:n] := x[j+1:n] - x_1(j) * A[j+1:n,j]
* end
*
* Define bounds on the components of x after j iterations of the loop:
* M(j) = bound on x[1:j]
* G(j) = bound on x[j+1:n]
* Initially, let M(0) = 0 and G(0) = max{x_1(i), i=1,...,n}.
*
* Then for iteration j+1 we have
* M(j+1) <= G(j) / | A_2(j+1,j+1) |
* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
* <= G(j) ( 1 + CNORM_1(j+1) / | A_2(j+1,j+1) | )
*
* where CNORM_1(j+1) is greater than or equal to the infinity-norm of
* column j+1 of A, not counting the diagonal. Hence
*
* G(j) <= G(0) product ( 1 + CNORM_1(i) / | A_2(i,i) | )
* 1<=i<=j
* and
*
* |x_1(j)| <= ( G(0) / |A_2(j,j)| ) product ( 1 + CNORM_1(i) / |A_2(i,i)| )
* 1<=i< j
*
* Since |x_1(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the
* reciprocal of the largest M(j), j=1,..,n, is larger than
* max(underflow, 1/overflow).
*
* The bound on x_1(j) is also used to determine when a step in the
* columnwise method can be performed without fear of overflow. If
* the computed bound is greater than a large constant, x is scaled to
* prevent overflow, but if the bound overflows, x is set to 0, x_1(j) to
* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
*
* Similarly, a row-wise scheme is used to solve A'*x = b. The basic
* algorithm for A upper triangular is
*
* for j = 1, ..., n
* x_1(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A_2(j,j)
* end
*
* We simultaneously compute two bounds
* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
* M(j) = bound on x_1(i), 1<=i<=j
*
* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
* Then the bound on x_1(j) is
*
* M(j) <= M(j-1) * ( 1 + CNORM_1(j) ) / | A_2(j,j) |
*
* <= M(0) * product ( ( 1 + CNORM_1(i) ) / |A_2(i,i)| )
* 1<=i<=j
*
* and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater
* than max(underflow, 1/overflow).
*
* =====================================================================
*
* .. Parameters ..*/
#undef zero
#define zero 0.0e+0
#undef half
#define half 0.5e+0
#undef one
#define one 1.0e+0
/** ..
* .. Local Scalars ..*/
int notran, nounit, upper;
long i, imax, j, jfirst, jinc, jlast;
double bignum, grow, rec, smlnum, sumj, tjj, tjjs=0.0,
tmax, tscal, uscal, xbnd, xj, xmax;
/** ..
* .. Intrinsic Functions ..*/
/* intrinsic abs, max, min;*/
/** ..
* .. Executable Statements ..
**/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
*info = 0;
upper = lsame( uplo, 'u' );
notran = lsame( trans, 'n' );
nounit = lsame( diag, 'n' );
/**
* Test the input parameters.
**/
if( !upper && !lsame( uplo, 'l' ) ) {
*info = -1;
} else if( !notran && !lsame( trans, 't' ) && !
lsame( trans, 'c' ) ) {
*info = -2;
} else if( !nounit && !lsame( diag, 'u' ) ) {
*info = -3;
} else if( !lsame( normin, 'y' ) && !
lsame( normin, 'n' ) ) {
*info = -4;
} else if( n<0 ) {
*info = -5;
} else if( lda<max( 1, n ) ) {
*info = -7;
}
if( *info!=0 ) {
xerbla( "dlatrs", -*info );
return;
}
/**
* Quick return if possible
**/
if( n==0 )
return;
/**
* Determine machine dependent parameters to control overflow.
**/
smlnum = dlamch( 's'/*afe minimum*/ ) / dlamch( 'p'/*recision*/ );
bignum = one / smlnum;
*scale = one;
if( lsame( normin, 'n' ) ) {
/**
* Compute the 1-norm of each column, not including the diagonal.
**/
if( upper ) {
/**
* A is upper triangular.
**/
for (j=1 ; j<=n ; j+=1) {
cnorm_1( j ) = cblas_dasum( j-1, &a_2( 1, j ), 1 );
}
} else {
/**
* A is lower triangular.
**/
for (j=1 ; j<=n - 1 ; j+=1) {
cnorm_1( j ) = cblas_dasum( n-j, &a_2( j+1, j ), 1 );
}
cnorm_1( n ) = zero;
}
}
/**
* Scale the column norms by TSCAL if the maximum entry in CNORM is
* greater than BIGNUM.
**/
imax = 1+cblas_idamax( n, cnorm, 1 );
tmax = cnorm_1( imax );
if( tmax<=bignum ) {
tscal = one;
} else {
tscal = one / ( smlnum*tmax );
cblas_dscal( n, tscal, cnorm, 1 );
}
/**
* Compute a bound on the computed solution vector to see if the
* Level 2 BLAS routine DTRSV can be used.
**/
j = 1+cblas_idamax( n, x, 1 );
xmax = abs( x_1( j ) );
xbnd = xmax;
if( notran ) {
/**
* Compute the growth in A * *x = b.
**/
if( upper ) {
jfirst = n;
jlast = 1;
jinc = -1;
} else {
jfirst = 1;
jlast = n;
jinc = 1;
}
if( tscal!=one ) {
grow = zero;
goto L_50;
}
if( nounit ) {
/**
* A is non-unit triangular.
*
* Compute GROW = 1/G(j) and XBND = 1/M(j).
* Initially, G(0) = max{x_1(i), i=1,...,n}.
**/
grow = one / max( xbnd, smlnum );
xbnd = grow;
for (j=jfirst ; jinc>0?j<=jlast:j>=jlast ; j+=jinc) {
/**
* Exit the loop if the growth factor is too small.
**/
if( grow<=smlnum )
goto L_50;
/**
* M(j) = G(j-1) / abs(A_2(j,j))
**/
tjj = abs( a_2( j, j ) );
xbnd = min( xbnd, min( one, tjj )*grow );
if( tjj+cnorm_1( j )>=smlnum ) {
/**
* G(j) = G(j-1)*( 1 + CNORM_1(j) / abs(A_2(j,j)) )
**/
grow = grow*( tjj / ( tjj+cnorm_1( j ) ) );
} else {
/**
* G(j) could overflow, set GROW to 0.
**/
grow = zero;
}
}
grow = xbnd;
} else {
/**
* A is unit triangular.
*
* Compute GROW = 1/G(j), where G(0) = max{x_1(i), i=1,...,n}.
**/
grow = min( one, one / max( xbnd, smlnum ) );
for (j=jfirst ; jinc>0?j<=jlast:j>=jlast ; j+=jinc) {
/**
* Exit the loop if the growth factor is too small.
**/
if( grow<=smlnum )
goto L_50;
/**
* G(j) = G(j-1)*( 1 + CNORM_1(j) )
**/
grow = grow*( one / ( one+cnorm_1( j ) ) );
}
}
L_50:;
} else {
/**
* Compute the growth in A' * *x = b.
**/
if( upper ) {
jfirst = 1;
jlast = n;
jinc = 1;
} else {
jfirst = n;
jlast = 1;
jinc = -1;
}
if( tscal!=one ) {
grow = zero;
goto L_80;
}
if( nounit ) {
/**
* A is non-unit triangular.
*
* Compute GROW = 1/G(j) and XBND = 1/M(j).
* Initially, M(0) = max{x_1(i), i=1,...,n}.
**/
grow = one / max( xbnd, smlnum );
xbnd = grow;
for (j=jfirst ; jinc>0?j<=jlast:j>=jlast ; j+=jinc) {
/**
* Exit the loop if the growth factor is too small.
**/
if( grow<=smlnum )
goto L_80;
/**
* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM_1(j) ) )
**/
xj = one + cnorm_1( j );
grow = min( grow, xbnd / xj );
/**
* M(j) = M(j-1)*( 1 + CNORM_1(j) ) / abs(A_2(j,j))
**/
tjj = abs( a_2( j, j ) );
if( xj>tjj )
xbnd = xbnd*( tjj / xj );
}
grow = min( grow, xbnd );
} else {
/**
* A is unit triangular.
*
* Compute GROW = 1/G(j), where G(0) = max{x_1(i), i=1,...,n}.
**/
grow = min( one, one / max( xbnd, smlnum ) );
for (j=jfirst ; jinc>0?j<=jlast:j>=jlast ; j+=jinc) {
/**
* Exit the loop if the growth factor is too small.
**/
if( grow<=smlnum )
goto L_80;
/**
* G(j) = ( 1 + CNORM_1(j) )*G(j-1)
**/
xj = one + cnorm_1( j );
grow = grow / xj;
}
}
L_80:;
}
if( ( grow*tscal )>smlnum ) {
/**
* Use the Level 2 BLAS solve if the reciprocal of the bound on
* elements of X is not too small.
**/
cblas_dtrsv(CblasColMajor, upper? CblasUpper : CblasLower,
notran? CblasNoTrans : CblasTrans,
nounit? CblasNonUnit : CblasUnit, n, a, lda, x, 1 );
} else {
/**
* Use a Level 1 BLAS solve, scaling intermediate results.
**/
if( xmax>bignum ) {
/**
* Scale X so that its components are less than or equal to
* BIGNUM in absolute value.
**/
*scale = bignum / xmax;
cblas_dscal( n, *scale, x, 1 );
xmax = bignum;
}
if( notran ) {
/**
* Solve A * *x = b
**/
for (j=jfirst ; jinc>0?j<=jlast:j>=jlast ; j+=jinc) {
/**
* Compute x_1(j) = b(j) / A_2(j,j), scaling x if necessary.
**/
xj = abs( x_1( j ) );
if( nounit ) {
tjjs = a_2( j, j )*tscal;
} else {
tjjs = tscal;
if( tscal==one )
goto L_100;
}
tjj = abs( tjjs );
if( tjj>smlnum ) {
/**
* abs(A_2(j,j)) > SMLNUM:
**/
if( tjj<one ) {
if( xj>tjj*bignum ) {
/**
* Scale x by 1/b(j).
**/
rec = one / xj;
cblas_dscal( n, rec, x, 1 );
*scale = *scale*rec;
xmax = xmax*rec;
}
}
x_1( j ) = x_1( j ) / tjjs;
xj = abs( x_1( j ) );
} else if( tjj>zero ) {
/**
* 0 < abs(A_2(j,j)) <= SMLNUM:
**/
if( xj>tjj*bignum ) {
/**
* Scale x by (1/abs(x_1(j)))*abs(A_2(j,j))*BIGNUM
* to avoid overflow when dividing by A_2(j,j).
**/
rec = ( tjj*bignum ) / xj;
if( cnorm_1( j )>one ) {
/**
* Scale by 1/CNORM_1(j) to avoid overflow when
* multiplying x_1(j) times column j.
**/
rec = rec / cnorm_1( j );
}
cblas_dscal( n, rec, x, 1 );
*scale = *scale*rec;
xmax = xmax*rec;
}
x_1( j ) = x_1( j ) / tjjs;
xj = abs( x_1( j ) );
} else {
/**
* A_2(j,j) = 0: Set x_1(1:n) = 0, x_1(j) = 1, and
* *scale = 0, and compute a solution to A**x = 0.
**/
for (i=1 ; i<=n ; i+=1) {
x_1( i ) = zero;
}
x_1( j ) = one;
xj = one;
*scale = zero;
xmax = zero;
}
L_100:
/**
* Scale x if necessary to avoid overflow when adding a
* multiple of column j of A.
**/
if( xj>one ) {
rec = one / xj;
if( cnorm_1( j )>( bignum-xmax )*rec ) {
/**
* Scale x by 1/(2*abs(x_1(j))).
**/
rec = rec*half;
cblas_dscal( n, rec, x, 1 );
*scale = *scale*rec;
}
} else if( xj*cnorm_1( j )>( bignum-xmax ) ) {
/**
* Scale x by 1/2.
**/
cblas_dscal( n, half, x, 1 );
*scale = *scale*half;
}
if( upper ) {
if( j>1 ) {
/**
* Compute the update
* x_1(1:j-1) := x_1(1:j-1) - x_1(j) * A_2(1:j-1,j)
**/
cblas_daxpy( j-1, -x_1( j )*tscal, &a_2( 1, j ), 1, x,
1 );
i = 1+cblas_idamax( j-1, x, 1 );
xmax = abs( x_1( i ) );
}
} else {
if( j<n ) {
/**
* Compute the update
* x_1(j+1:n) := x_1(j+1:n) - x_1(j) * A_2(j+1:n,j)
**/
cblas_daxpy( n-j, -x_1( j )*tscal, &a_2( j+1, j ), 1,
&x_1( j+1 ), 1 );
i = j + 1+cblas_idamax( n-j, &x_1( j+1 ), 1 );
xmax = abs( x_1( i ) );
}
}
}
} else {
/**
* Solve A' * *x = b
**/
for (j=jfirst ; jinc>0?j<=jlast:j>=jlast ; j+=jinc) {
/**
* Compute x_1(j) = b(j) - sum A_2(k,j)*x_1(k).
* k<>j
**/
xj = abs( x_1( j ) );
uscal = tscal;
rec = one / max( xmax, one );
if( cnorm_1( j )>( bignum-xj )*rec ) {
/**
* If x_1(j) could overflow, scale x by 1/(2*XMAX).
**/
rec = rec*half;
if( nounit ) {
tjjs = a_2( j, j )*tscal;
} else {
tjjs = tscal;
}
tjj = abs( tjjs );
if( tjj>one ) {
/**
* Divide by A_2(j,j) when scaling x if A_2(j,j) > 1.
**/
rec = min( one, rec*tjj );
uscal = uscal / tjjs;
}
if( rec<one ) {
cblas_dscal( n, rec, x, 1 );
*scale = *scale*rec;
xmax = xmax*rec;
}
}
sumj = zero;
if( uscal==one ) {
/**
* If the scaling needed for A in the dot product is 1,
* call CBLAS_DDOT to perform the dot product.
**/
if( upper ) {
sumj = cblas_ddot( j-1, &a_2( 1, j ), 1, x, 1 );
} else if( j<n ) {
sumj = cblas_ddot( n-j, &a_2( j+1, j ), 1, &x_1( j+1 ), 1 );
}
} else {
/**
* Otherwise, use in-line code for the dot product.
**/
if( upper ) {
for (i=1 ; i<=j - 1 ; i+=1) {
sumj = sumj + ( a_2( i, j )*uscal )*x_1( i );
}
} else if( j<n ) {
for (i=j + 1 ; i<=n ; i+=1) {
sumj = sumj + ( a_2( i, j )*uscal )*x_1( i );
}
}
}
if( uscal==tscal ) {
/**
* Compute x_1(j) := ( x_1(j) - sumj ) / A_2(j,j) if 1/A_2(j,j)
* was not used to scale the dotproduct.
**/
x_1( j ) = x_1( j ) - sumj;
xj = abs( x_1( j ) );
if( nounit ) {
tjjs = a_2( j, j )*tscal;
} else {
tjjs = tscal;
if( tscal==one )
goto L_150;
}
/**
* Compute x_1(j) = x_1(j) / A_2(j,j), scaling if necessary.
**/
tjj = abs( tjjs );
if( tjj>smlnum ) {
/**
* abs(A_2(j,j)) > SMLNUM:
**/
if( tjj<one ) {
if( xj>tjj*bignum ) {
/**
* Scale X by 1/abs(x_1(j)).
**/
rec = one / xj;
cblas_dscal( n, rec, x, 1 );
*scale = *scale*rec;
xmax = xmax*rec;
}
}
x_1( j ) = x_1( j ) / tjjs;
} else if( tjj>zero ) {
/**
* 0 < abs(A_2(j,j)) <= SMLNUM:
**/
if( xj>tjj*bignum ) {
/**
* Scale x by (1/abs(x_1(j)))*abs(A_2(j,j))*BIGNUM.
**/
rec = ( tjj*bignum ) / xj;
cblas_dscal( n, rec, x, 1 );
*scale = *scale*rec;
xmax = xmax*rec;
}
x_1( j ) = x_1( j ) / tjjs;
} else {
/**
* A_2(j,j) = 0: Set x_1(1:n) = 0, x_1(j) = 1, and
* *scale = 0, and compute a solution to A'**x = 0.
**/
for (i=1 ; i<=n ; i+=1) {
x_1( i ) = zero;
}
x_1( j ) = one;
*scale = zero;
xmax = zero;
}
L_150:;
} else {
/**
* Compute x_1(j) := x_1(j) / A_2(j,j) - sumj if the dot
* product has already been divided by 1/A_2(j,j).
**/
x_1( j ) = x_1( j ) / tjjs - sumj;
}
xmax = max( xmax, abs( x_1( j ) ) );
}
}
*scale = *scale / tscal;
}
/**
* Scale the column norms by 1/TSCAL for return.
**/
if( tscal!=one ) {
cblas_dscal( n, one / tscal, cnorm, 1 );
}
return;
/**
* End of DLATRS
**/
}
double dlamch( char cmach )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* October 31, 1992
*
* .. Scalar Arguments ..*/
double dlamch_R;
/** ..
*
* Purpose
* =======
*
* DLAMCH determines double precision machine parameters.
*
* Arguments
* =========
*
* CMACH (input) CHARACTER*1
* Specifies the value to be returned by DLAMCH:
* = 'E' or 'e', DLAMCH := eps
* = 'S' or 's , DLAMCH := sfmin
* = 'B' or 'b', DLAMCH := base
* = 'P' or 'p', DLAMCH := eps*base
* = 'N' or 'n', DLAMCH := t
* = 'R' or 'r', DLAMCH := rnd
* = 'M' or 'm', DLAMCH := emin
* = 'U' or 'u', DLAMCH := rmin
* = 'L' or 'l', DLAMCH := emax
* = 'O' or 'o', DLAMCH := rmax
*
* where
*
* eps = relative machine precision
* sfmin = safe minimum, such that 1/sfmin does not overflow
* base = base of the machine
* prec = eps*base
* t = number of (base) digits in the mantissa
* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise
* emin = minimum exponent before (gradual) underflow
* rmin = underflow threshold - base**(emin-1)
* emax = largest exponent before overflow
* rmax = overflow threshold - (base**emax)*(1-eps)
*
* =====================================================================
*
* .. Parameters ..*/
#undef one
#define one 1.0e+0
#undef zero
#define zero 0.0e+0
/** ..
* .. Local Scalars ..*/
int lrnd;
long beta, imax, imin, it;
double rmach=0.0, small;
/** ..
* .. Save statement ..*/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
static int first= 1;
static double eps, sfmin, base, t, rnd, emin, rmin, emax, rmax, prec;
/** ..
* .. Executable Statements ..
**/
if( first ) {
first = 0;
dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax );
base = beta;
t = it;
if( lrnd ) {
rnd = one;
eps = ( pow(base,(double)( 1-it )) ) / 2;
} else {
rnd = zero;
eps = pow(base,(double)( 1-it ));
}
prec = eps*base;
emin = imin;
emax = imax;
sfmin = rmin;
small = one / rmax;
if( small>=sfmin ) {
/**
* Use SMALL plus a bit, to avoid the possibility of rounding
* causing overflow when computing 1/sfmin.
**/
sfmin = small*( one+eps );
}
}
if( lsame( cmach, 'e' ) ) {
rmach = eps;
} else if( lsame( cmach, 's' ) ) {
rmach = sfmin;
} else if( lsame( cmach, 'b' ) ) {
rmach = base;
} else if( lsame( cmach, 'p' ) ) {
rmach = prec;
} else if( lsame( cmach, 'n' ) ) {
rmach = t;
} else if( lsame( cmach, 'r' ) ) {
rmach = rnd;
} else if( lsame( cmach, 'm' ) ) {
rmach = emin;
} else if( lsame( cmach, 'u' ) ) {
rmach = rmin;
} else if( lsame( cmach, 'l' ) ) {
rmach = emax;
} else if( lsame( cmach, 'o' ) ) {
rmach = rmax;
}
dlamch_R = rmach;
return dlamch_R;
/**
* End of DLAMCH
**/
}
/**
************************************************************************
**/
void dlamc1( long *beta, long *t, int *rnd, int *ieee1 )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* October 31, 1992
*
* .. Scalar Arguments ..*/
/** ..
*
* Purpose
* =======
*
* DLAMC1 determines the machine parameters given by BETA, T, RND, and
* IEEE1.
*
* Arguments
* =========
*
* BETA (output) INTEGER
* The base of the machine.
*
* T (output) INTEGER
* The number of ( BETA ) digits in the mantissa.
*
* RND (output) LOGICAL
* Specifies whether proper rounding ( RND = .TRUE. ) or
* chopping ( RND = .FALSE. ) occurs in addition. This may not
* be a reliable guide to the way in which the machine performs
* its arithmetic.
*
* IEEE1 (output) LOGICAL
* Specifies whether rounding appears to be done in the IEEE
* 'round to nearest' style.
*
* Further Details
* ===============
*
* The routine is based on the routine ENVRON by Malcolm and
* incorporates suggestions by Gentleman and Marovich. See
*
* Malcolm M. A. (1972) Algorithms to reveal properties of
* floating-point arithmetic. Comms. of the ACM, 15, 949-951.
*
* Gentleman W. M. and Marovich S. B. (1974) More on algorithms
* that reveal properties of floating point arithmetic units.
* Comms. of the ACM, 17, 276-277.
*
* =====================================================================
*
* .. Local Scalars ..*/
#undef one
double a, b, c, f, one, qtr, savec, t1, t2;
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
static int first= 1;
static int lieee1, lrnd;
static long lbeta, lt;
/** ..
* .. Executable Statements ..
**/
if( first ) {
first = 0;
one = 1;
/**
* LBETA, LIEEE1, LT and LRND are the local values of BETA,
* IEEE1, T and RND.
*
* Throughout this routine we use the function DLAMC3 to ensure
* that relevant values are stored and not held in registers, or
* are not affected by optimizers.
*
* Compute a = 2.0**m with the smallest positive integer m such
* that
*
* fl( a + 1.0 ) = a.
**/
a = 1;
c = 1;
/**
*+ WHILE( C.EQ.ONE )LOOP*/
L_10:
if( c==one ) {
a = 2*a;
c = dlamc3( a, one );
c = dlamc3( c, -a );
goto L_10;
}
/**+ END WHILE
*
* Now compute b = 2.0**m with the smallest positive integer m
* such that
*
* fl( a + b ) .gt. a.
**/
b = 1;
c = dlamc3( a, b );
/**
*+ WHILE( C.EQ.A )LOOP*/
L_20:
if( c==a ) {
b = 2*b;
c = dlamc3( a, b );
goto L_20;
}
/**+ END WHILE
*
* Now compute the base. a and c are neighbouring floating point
* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so
* their difference is beta. Adding 0.25 to c is to ensure that it
* is truncated to beta and not ( beta - 1 ).
**/
qtr = one / 4;
savec = c;
c = dlamc3( c, -a );
lbeta = (long)(c + qtr);
/**
* Now determine whether rounding or chopping occurs, by adding a
* bit less than beta/2 and a bit more than beta/2 to a.
**/
b = lbeta;
f = dlamc3( b / 2, -b / 100 );
c = dlamc3( f, a );
if( c==a ) {
lrnd = 1;
} else {
lrnd = 0;
}
f = dlamc3( b / 2, b / 100 );
c = dlamc3( f, a );
if( ( lrnd ) && ( c==a ) )
lrnd = 0;
/**
* Try and decide whether rounding is done in the IEEE 'round to
* nearest' style. B/2 is half a unit in the last place of the two
* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit
* zero, and SAVEC is odd. Thus adding B/2 to A should not change
* A, but adding B/2 to SAVEC should change SAVEC.
**/
t1 = dlamc3( b / 2, a );
t2 = dlamc3( b / 2, savec );
lieee1 = ( t1==a ) && ( t2>savec ) && lrnd;
/**
* Now find the mantissa, t. It should be the integer part of
* log to the base beta of a, however it is safer to determine t
* by powering. So we find t as the smallest positive integer for
* which
*
* fl( beta**t + 1.0 ) = 1.0.
**/
lt = 0;
a = 1;
c = 1;
/**
*+ WHILE( C.EQ.ONE )LOOP*/
L_30:
if( c==one ) {
lt = lt + 1;
a = a*lbeta;
c = dlamc3( a, one );
c = dlamc3( c, -a );
goto L_30;
}
/**+ END WHILE
**/
}
*beta = lbeta;
*t = lt;
*rnd = lrnd;
*ieee1 = lieee1;
return;
/**
* End of DLAMC1
**/
}
/**
************************************************************************
**/
void dlamc2( long *beta, long *t, int *rnd, double *eps,
long *emin, double *rmin, long *emax, double *rmax )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* October 31, 1992
*
* .. Scalar Arguments ..*/
/** ..
*
* Purpose
* =======
*
* DLAMC2 determines the machine parameters specified in its argument
* list.
*
* Arguments
* =========
*
* BETA (output) INTEGER
* The base of the machine.
*
* T (output) INTEGER
* The number of ( BETA ) digits in the mantissa.
*
* RND (output) LOGICAL
* Specifies whether proper rounding ( RND = .TRUE. ) or
* chopping ( RND = .FALSE. ) occurs in addition. This may not
* be a reliable guide to the way in which the machine performs
* its arithmetic.
*
* EPS (output) DOUBLE PRECISION
* The smallest positive number such that
*
* fl( 1.0 - EPS ) .LT. 1.0,
*
* where fl denotes the computed value.
*
* EMIN (output) INTEGER
* The minimum exponent before (gradual) underflow occurs.
*
* RMIN (output) DOUBLE PRECISION
* The smallest normalized number for the machine, given by
* BASE**( EMIN - 1 ), where BASE is the floating point value
* of BETA.
*
* EMAX (output) INTEGER
* The maximum exponent before overflow occurs.
*
* RMAX (output) DOUBLE PRECISION
* The largest positive number for the machine, given by
* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point
* value of BETA.
*
* Further Details
* ===============
*
* The computation of EPS is based on a routine PARANOIA by
* W. Kahan of the University of California at Berkeley.
*
* =====================================================================
*
* .. Local Scalars ..*/
#undef one
#undef zero
#undef half
int ieee, lieee1;
long gnmin, gpmin, i, ngnmin, ngpmin;
double a, b, c, half, one, rbase, sixth, small, third, two, zero;
/** ..
* .. Intrinsic Functions ..*/
/* intrinsic abs, max, min;*/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
static int first= 1;
/* static int iwarn= 0; */
static int lrnd;
static long lbeta, lemax, lemin, lt;
static double lrmax, lrmin, leps;
/** ..
* .. Executable Statements ..
**/
if( first ) {
first = 0;
zero = 0;
one = 1;
two = 2;
/**
* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of
* BETA, T, RND, EPS, EMIN and RMIN.
*
* Throughout this routine we use the function DLAMC3 to ensure
* that relevant values are stored and not held in registers, or
* are not affected by optimizers.
*
* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1.
**/
dlamc1( &lbeta, <, &lrnd, &lieee1 );
/**
* Start to find EPS.
**/
b = lbeta;
a = pow(b,(double)( -lt ));
leps = a;
/**
* Try some tricks to see whether or not this is the correct EPS.
**/
b = two / 3;
half = one / 2;
sixth = dlamc3( b, -half );
third = dlamc3( sixth, sixth );
b = dlamc3( third, -half );
b = dlamc3( b, sixth );
b = abs( b );
if( b<leps )
b = leps;
leps = 1;
/**
*+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP*/
L_10:
if( ( leps>b ) && ( b>zero ) ) {
leps = b;
c = dlamc3( half*leps, ( two*two*two*two*two )*( leps*leps ) );
c = dlamc3( half, -c );
b = dlamc3( half, c );
c = dlamc3( half, -b );
b = dlamc3( half, c );
goto L_10;
}
/**+ END WHILE
**/
if( a<leps )
leps = a;
/**
* Computation of EPS complete.
*
* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)).
* Keep dividing A by BETA until (gradual) underflow occurs. This
* is detected when we cannot recover the previous A.
**/
rbase = one / lbeta;
small = one;
for (i=1 ; i<=3 ; i+=1) {
small = dlamc3( small*rbase, zero );
}
a = dlamc3( one, small );
dlamc4( &ngpmin, one, lbeta );
dlamc4( &ngnmin, -one, lbeta );
dlamc4( &gpmin, a, lbeta );
dlamc4( &gnmin, -a, lbeta );
ieee = 0;
if( ( ngpmin==ngnmin ) && ( gpmin==gnmin ) ) {
if( ngpmin==gpmin ) {
lemin = ngpmin;
/** ( Non twos-complement machines, no gradual underflow;
* e.g., VAX )*/
} else if( ( gpmin-ngpmin )==3 ) {
lemin = ngpmin - 1 + lt;
ieee = 1;
/** ( Non twos-complement machines, with gradual underflow;
* e.g., IEEE standard followers )*/
} else {
lemin = min( ngpmin, gpmin );
/** ( A guess; no known machine )*/
/* iwarn = 1; */
}
} else if( ( ngpmin==gpmin ) && ( ngnmin==gnmin ) ) {
if( abs( ngpmin-ngnmin )==1 ) {
lemin = max( ngpmin, ngnmin );
/** ( Twos-complement machines, no gradual underflow;
* e.g., CYBER 205 )*/
} else {
lemin = min( ngpmin, ngnmin );
/** ( A guess; no known machine )*/
/* iwarn = 1; */
}
} else if( ( abs( ngpmin-ngnmin )==1 ) &&
( gpmin==gnmin ) ) {
if( ( gpmin-min( ngpmin, ngnmin ) )==3 ) {
lemin = max( ngpmin, ngnmin ) - 1 + lt;
/** ( Twos-complement machines with gradual underflow;
* no known machine )*/
} else {
lemin = min( ngpmin, ngnmin );
/** ( A guess; no known machine )*/
/* iwarn = 1; */
}
} else {
lemin = min( min(ngpmin, ngnmin), min(gpmin, gnmin) );
/** ( A guess; no known machine )*/
/* iwarn = 1; */
}
/*****
** Comment out this if block if EMIN is ok
* IF( IWARN ) THEN
* FIRST = .TRUE.
* WRITE( 6, FMT = 9999 )LEMIN
* END IF
****
*
* Assume IEEE arithmetic if we found denormalised numbers above,
* or if arithmetic seems to round in the IEEE style, determined
* in routine DLAMC1. A true IEEE machine should have both things
* true; however, faulty machines may have one or the other.
**/
ieee = ieee || lieee1;
/**
* Compute RMIN by successive division by BETA. We could compute
* RMIN as BASE**( EMIN - 1 ), but some machines underflow during
* this computation.
**/
lrmin = 1;
for (i=1 ; i<=1 - lemin ; i+=1) {
lrmin = dlamc3( lrmin*rbase, zero );
}
/**
* Finally, call DLAMC5 to compute EMAX and RMAX.
**/
dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax );
}
*beta = lbeta;
*t = lt;
*rnd = lrnd;
*eps = leps;
*emin = lemin;
*rmin = lrmin;
*emax = lemax;
*rmax = lrmax;
return;
/**
* 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-',
* $ ' EMIN = ', I8, /
* $ ' If, after inspection, the value EMIN looks',
* $ ' acceptable please comment out ',
* $ / ' the IF block as marked within the code of routine',
* $ ' DLAMC2,', / ' otherwise supply EMIN explicitly.', / )
*
* End of DLAMC2
**/
}
/**
************************************************************************
**/
void dlamc4( long *emin, double start, long base )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* October 31, 1992
*
* .. Scalar Arguments ..*/
/** ..
*
* Purpose
* =======
*
* DLAMC4 is a service routine for DLAMC2.
*
* Arguments
* =========
*
* EMIN (output) EMIN
* The minimum exponent before (gradual) underflow, computed by
* setting A = START and dividing by BASE until the previous A
* can not be recovered.
*
* START (input) DOUBLE PRECISION
* The starting point for determining EMIN.
*
* BASE (input) INTEGER
* The base of the machine.
*
* =====================================================================
*
* .. Local Scalars ..*/
#undef one
#undef zero
long i;
double a, b1, b2, c1, c2, e1, e2, one, rbase, zero;
/** ..
* .. Executable Statements ..
**/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
a = start;
one = 1;
rbase = one / base;
zero = 0;
*emin = 1;
b1 = dlamc3( a*rbase, zero );
c1 = a;
c2 = a;
e1 = a;
e2 = a;
/**+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP*/
L_10:
if( ( c1==a ) && ( c2==a ) && ( e1==a ) &&
( e2==a ) ) {
*emin = *emin - 1;
a = b1;
b1 = dlamc3( a / base, zero );
c1 = dlamc3( b1*base, zero );
e1 = zero;
for (i=1 ; i<=base ; i+=1) {
e1 = e1 + b1;
}
b2 = dlamc3( a*rbase, zero );
c2 = dlamc3( b2 / rbase, zero );
e2 = zero;
for (i=1 ; i<=base ; i+=1) {
e2 = e2 + b2;
}
goto L_10;
}
/**+ END WHILE
**/
return;
/**
* End of DLAMC4
**/
}
/**
************************************************************************
**/
void dlamc5( long beta, long p, long emin, int ieee,
long *emax, double *rmax )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* October 31, 1992
*
* .. Scalar Arguments ..*/
/** ..
*
* Purpose
* =======
*
* DLAMC5 attempts to compute RMAX, the largest machine floating-point
* number, without overflow. It assumes that EMAX + abs(EMIN) sum
* approximately to a power of 2. It will fail on machines where this
* assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
* EMAX = 28718). It will also fail if the value supplied for EMIN is
* too large (i.e. too close to zero), probably with overflow.
*
* Arguments
* =========
*
* BETA (input) INTEGER
* The base of floating-point arithmetic.
*
* P (input) INTEGER
* The number of base BETA digits in the mantissa of a
* floating-point value.
*
* EMIN (input) INTEGER
* The minimum exponent before (gradual) underflow.
*
* IEEE (input) LOGICAL
* A logical flag specifying whether or not the arithmetic
* system is thought to comply with the IEEE standard.
*
* EMAX (output) INTEGER
* The largest exponent before overflow
*
* RMAX (output) DOUBLE PRECISION
* The largest machine floating-point number.
*
* =====================================================================
*
* .. Parameters ..*/
#undef zero
#define zero 0.0e0
#undef one
#define one 1.0e0
/** ..
* .. Local Scalars ..*/
long exbits, expsum, i, lexp, nbits, try, uexp;
double oldy=0.0, recbas, y, z;
/** ..
* .. Intrinsic Functions ..*/
/* intrinsic mod;*/
/** ..
* .. Executable Statements ..
*
* First compute LEXP and UEXP, two powers of 2 that bound
* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
* approximately to the bound that is closest to abs(EMIN).
* (EMAX is the exponent of the required number RMAX).
**/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
lexp = 1;
exbits = 1;
L_10:
try = lexp*2;
if( try<=( -emin ) ) {
lexp = try;
exbits = exbits + 1;
goto L_10;
}
if( lexp==-emin ) {
uexp = lexp;
} else {
uexp = try;
exbits = exbits + 1;
}
/**
* Now -LEXP is less than or equal to EMIN, and -UEXP is greater
* than or equal to EMIN. EXBITS is the number of bits needed to
* store the exponent.
**/
if( ( uexp+emin )>( -lexp-emin ) ) {
expsum = 2*lexp;
} else {
expsum = 2*uexp;
}
/**
* EXPSUM is the exponent range, approximately equal to
* EMAX - EMIN + 1 .
**/
*emax = expsum + emin - 1;
nbits = 1 + exbits + p;
/**
* NBITS is the total number of bits needed to store a
* floating-point number.
**/
if( ( mod( nbits, 2 )==1 ) && ( beta==2 ) ) {
/**
* Either there are an odd number of bits used to store a
* floating-point number, which is unlikely, or some bits are
* not used in the representation of numbers, which is possible,
* (e.g. Cray machines) or the mantissa has an implicit bit,
* (e.g. IEEE machines, Dec Vax machines), which is perhaps the
* most likely. We have to assume the last alternative.
* If this is true, then we need to reduce EMAX by one because
* there must be some way of representing zero in an implicit-bit
* system. On machines like Cray, we are reducing EMAX by one
* unnecessarily.
**/
*emax = *emax - 1;
}
if( ieee ) {
/**
* Assume we are on an IEEE machine which reserves one exponent
* for infinity and NaN.
**/
*emax = *emax - 1;
}
/**
* Now create RMAX, the largest machine number, which should
* be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
*
* First compute 1.0 - BETA**(-P), being careful that the
* result is less than 1.0 .
**/
recbas = one / beta;
z = beta - one;
y = zero;
for (i=1 ; i<=p ; i+=1) {
z = z*recbas;
if( y<one )
oldy = y;
y = dlamc3( y, z );
}
if( y>=one )
y = oldy;
/**
* Now multiply by BETA**EMAX to get RMAX.
**/
for (i=1 ; i<=*emax ; i+=1) {
y = dlamc3( y*beta, zero );
}
*rmax = y;
return;
/**
* End of DLAMC5
**/
}
void dlacon( long n, double v[], double x[], long isgn[],
double *est, long *kase )
{
/**
* -- LAPACK auxiliary routine (version 1.1) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* February 29, 1992
*
* .. Scalar Arguments ..*/
/** ..
* .. Array Arguments ..*/
#undef isgn_1
#define isgn_1(a1) isgn[a1-1]
#undef x_1
#define x_1(a1) x[a1-1]
#undef v_1
#define v_1(a1) v[a1-1]
/** ..
*
* Purpose
* =======
*
* DLACON estimates the 1-norm of a square, real matrix A.
* Reverse communication is used for evaluating matrix-vector products.
*
* Arguments
* =========
*
* N (input) INTEGER
* The order of the matrix. N >= 1.
*
* V (workspace) DOUBLE PRECISION array, dimension (N)
* On the final return, V = A*W, where EST = norm(V)/norm(W)
* (W is not returned).
*
* X (input/output) DOUBLE PRECISION array, dimension (N)
* On an intermediate return, X should be overwritten by
* A * X, if KASE=1,
* A' * X, if KASE=2,
* and DLACON must be re-called with all the other parameters
* unchanged.
*
* ISGN (workspace) INTEGER array, dimension (N)
*
* EST (output) DOUBLE PRECISION
* An estimate (a lower bound) for norm(A).
*
* KASE (input/output) INTEGER
* On the initial call to DLACON, KASE should be 0.
* On an intermediate return, KASE will be 1 or 2, indicating
* whether X should be overwritten by A * X or A' * X.
* On the final return from DLACON, KASE will again be 0.
*
* Further Details
* ======= =======
*
* Contributed by Nick Higham, University of Manchester.
* Originally named SONEST, dated March 16, 1988.
*
* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of
* a real or complex matrix, with applications to condition estimation",
* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
*
* =====================================================================
*
* .. Parameters ..*/
#undef itmax
#define itmax 5
#undef zero
#define zero 0.0e+0
#undef one
#define one 1.0e+0
#undef two
#define two 2.0e+0
/** ..
* .. Local Scalars ..*/
static long i, iter, j, jlast, jump;
static double altsgn, estold, temp;
/** ..
* .. Intrinsic Functions ..*/
/* intrinsic abs, dble, nint, sign;*/
/*-----implicit-declarations-----*/
/*-----end-of-declarations-----*/
/** ..
* .. Executable Statements ..
**/
if( *kase==0 ) {
for (i=1 ; i<=n ; i+=1) {
x_1( i ) = one / dble( n );
}
*kase = 1;
jump = 1;
return;
}
switch (jump) {
case 1: goto L_20;
case 2: goto L_40;
case 3: goto L_70;
case 4: goto L_110;
case 5: goto L_140;
}
/**
* ................ ENTRY (JUMP = 1)
* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X.
**/
L_20:
if( n==1 ) {
v_1( 1 ) = x_1( 1 );
*est = abs( v_1( 1 ) );
/** ... QUIT*/
goto L_150;
}
*est = cblas_dasum( n, x, 1 );
for (i=1 ; i<=n ; i+=1) {
x_1( i ) = sign( one, x_1( i ) );
isgn_1( i ) = nint( x_1( i ) );
}
*kase = 2;
jump = 2;
return;
/**
* ................ ENTRY (JUMP = 2)
* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
**/
L_40:
j = 1+cblas_idamax( n, x, 1 );
iter = 2;
/**
* MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
**/
L_50:
for (i=1 ; i<=n ; i+=1) {
x_1( i ) = zero;
}
x_1( j ) = one;
*kase = 1;
jump = 3;
return;
/**
* ................ ENTRY (JUMP = 3)
* X HAS BEEN OVERWRITTEN BY A*X.
**/
L_70:
cblas_dcopy( n, x, 1, v, 1 );
estold = *est;
*est = cblas_dasum( n, v, 1 );
for (i=1 ; i<=n ; i+=1) {
if( nint( sign( one, x_1( i ) ) )!=isgn_1( i ) )
goto L_90;
}
/** REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.*/
goto L_120;
L_90:
/** TEST FOR CYCLING.*/
if( *est<=estold )
goto L_120;
for (i=1 ; i<=n ; i+=1) {
x_1( i ) = sign( one, x_1( i ) );
isgn_1( i ) = nint( x_1( i ) );
}
*kase = 2;
jump = 4;
return;
/**
* ................ ENTRY (JUMP = 4)
* X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
**/
L_110:
jlast = j;
j = 1+cblas_idamax( n, x, 1 );
if( ( x_1( jlast )!=abs( x_1( j ) ) ) && ( iter<itmax ) ) {
iter = iter + 1;
goto L_50;
}
/**
* ITERATION COMPLETE. FINAL STAGE.
**/
L_120:
altsgn = one;
for (i=1 ; i<=n ; i+=1) {
x_1( i ) = altsgn*( one+dble( i-1 ) / dble( n-1 ) );
altsgn = -altsgn;
}
*kase = 1;
jump = 5;
return;
/**
* ................ ENTRY (JUMP = 5)
* X HAS BEEN OVERWRITTEN BY A*X.
**/
L_140:
temp = two*( cblas_dasum( n, x, 1 ) / dble( 3*n ) );
if( temp>*est ) {
cblas_dcopy( n, x, 1, v, 1 );
*est = temp;
}
L_150:
*kase = 0;
return;
/**
* End of DLACON
**/
}
/* Defeat optimizers by splitting dlamc3 across files and forcing the
result to pass through a memory location.
Actually, it would be more correct to go after the comparison
operation than the sum, but this perturbs the existing code less. */
extern void dlamc3_worker( double a, double b );
extern double dlamc3_sum;
void dlamc3_worker( double a, double b )
{
dlamc3_sum= a+b;
}
syntax highlighted by Code2HTML, v. 0.9.1