/* * $Id: dgesvd.c,v 1.1.1.1 2005/09/18 22:04:48 dhmunro Exp $ * LAPACK routine to return the SVD of a matrix (m>=n branch only). */ /* Copyright (c) 2005, The Regents of the University of California. * All rights reserved. * This file is part of yorick (http://yorick.sourceforge.net). * Read the accompanying LICENSE file for details. */ #include "dg.h" /* Compilers have a hard time with this ridiculously large driver routine. Therefore, I split it into a separate m>=n and m(y)?(x):(y)) /*-----end of Fortran intrinsics-----*/ void dgesvd( char jobu, char jobvt, long m, long n, double a[], long lda, double s[], double u[], long ldu, double vt[], long ldvt, double work[], long lwork, long *info ) { /** * -- LAPACK driver routine (version 1.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * March 31, 1993 * * .. Scalar Arguments ..*/ /** .. * .. Array Arguments ..*/ #undef work_1 #define work_1(a1) work[a1-1] #undef vt_2 #define vt_2(a1,a2) vt[a1-1+ldvt*(a2-1)] #undef u_2 #define u_2(a1,a2) u[a1-1+ldu*(a2-1)] #undef s_1 #define s_1(a1) s[a1-1] #undef a_2 #define a_2(a1,a2) a[a1-1+lda*(a2-1)] /** .. * * Purpose * ======= * * DGESVD computes the singular value decomposition (SVD) of a real * M-by-N matrix A, optionally computing the left and/or right singular * vectors. The SVD is written * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A; they are real and non-negative, and * are returned in descending order. The first min(m,n) columns of * U and V are the left and right singular vectors of A. * * Note that the routine returns V**T, not V. * * Arguments * ========= * * JOBU (input) CHARACTER*1 * Specifies options for computing all or part of the matrix U: * = 'A': all M columns of U are returned in array U: * = 'S': the first min(m,n) columns of U (the left singular * vectors) are returned in the array U; * = 'O': the first min(m,n) columns of U (the left singular * vectors) are overwritten on the array A; * = 'N': no columns of U (no left singular vectors) are * computed. * * JOBVT (input) CHARACTER*1 * Specifies options for computing all or part of the matrix * V**T: * = 'A': all N rows of V**T are returned in the array VT; * = 'S': the first min(m,n) rows of V**T (the right singular * vectors) are returned in the array VT; * = 'O': the first min(m,n) rows of V**T (the right singular * vectors) are overwritten on the array A; * = 'N': no rows of V**T (no right singular vectors) are * computed. * * JOBVT and JOBU cannot both be 'O'. * * M (input) INTEGER * The number of rows of the input matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the input matrix A. N >= 0. * * A (input/output) DOUBLE PRECISION array, dimension (LDA,N) * On entry, the M-by-N matrix A. * On exit, * if JOBU = 'O', A is overwritten with the first min(m,n) * columns of U (the left singular vectors, * stored columnwise); * if JOBVT = 'O', A is overwritten with the first min(m,n) * rows of V**T (the right singular vectors, * stored rowwise); * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A * are destroyed. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * S (output) DOUBLE PRECISION array, dimension (min(M,N)) * The singular values of A, sorted so that S(i) >= S(i+1). * * U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. * If JOBU = 'A', U contains the M-by-M orthogonal matrix U; * if JOBU = 'S', U contains the first min(m,n) columns of U * (the left singular vectors, stored columnwise); * if JOBU = 'N' or 'O', U is not referenced. * * LDU (input) INTEGER * The leading dimension of the array U. LDU >= 1; if * JOBU = 'S' or 'A', LDU >= M. * * VT (output) DOUBLE PRECISION array, dimension (LDVT,N) * If JOBVT = 'A', VT contains the N-by-N orthogonal matrix * V**T; * if JOBVT = 'S', VT contains the first min(m,n) rows of * V**T (the right singular vectors, stored rowwise); * if JOBVT = 'N' or 'O', VT is not referenced. * * LDVT (input) INTEGER * The leading dimension of the array VT. LDVT >= 1; if * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). * * WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) * On exit, if INFO = 0, WORK(1) returns the optimal LWORK; * if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged * superdiagonal elements of an upper bidiagonal matrix B * whose diagonal is in S (not necessarily sorted). B * satisfies A = U * B * VT, so it has the same singular values * as A, and singular vectors related by U and VT. * * LWORK (input) INTEGER * The dimension of the array WORK. LWORK >= 1. * LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)-4). * For good performance, LWORK should generally be larger. * * INFO (output) INTEGER * = 0: successful exit. * < 0: if INFO = -i, the i-th argument had an illegal value. * > 0: if DBDSQR did not converge, INFO specifies how many * superdiagonals of an intermediate bidiagonal form B * did not converge to zero. See the description of WORK * above for details. * * ===================================================================== * * .. Parameters ..*/ #undef zero #define zero 0.0e0 #undef one #define one 1.0e0 /** .. * .. Local Scalars ..*/ int wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo, wntvs; long chunk, i, ie=0, ierr, ir, iscl, itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku, maxwrk=0, minmn, minwrk, mnthr, ncu=0, ncvt=0, nru=0, wrkbl=0; double anrm, bignum, eps, smlnum; char job_u_vt[3]; /** .. * .. Local Arrays ..*/ double dum[1]; #undef dum_1 #define dum_1(a1) [a1-1] /** .. * .. Intrinsic Functions ..*/ /* intrinsic max, min, sqrt;*/ /** .. * .. Executable Statements .. * * Test the input arguments **/ /*-----implicit-declarations-----*/ /*-----end-of-declarations-----*/ if (m=1 && m>0 && n>0 ) { if( m>=mnthr ) { if( wntun ) { /** * Path 1 (M much larger than N, JOBU='N') **/ maxwrk = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); maxwrk = max( maxwrk, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); if( wntvo || wntvas ) maxwrk = max( maxwrk, 3*n+( n-1 )* ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) ); maxwrk = max( maxwrk, 5*n-4 ); minwrk = max( 4*n, 5*n-4 ); } else if( wntuo && wntvn ) { /** * Path 2 (M much larger than N, JOBU='O', JOBVT='N') **/ wrkbl = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, n+n*ilaenv( 1, "dorgqr", " ", m, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); wrkbl = max( wrkbl, 3*n+n* ilaenv( 1, "dorgbr", "q", n, n, n, -1 ) ); wrkbl = max( wrkbl, 5*n-4 ); maxwrk = max( n*n+wrkbl, n*n+m*n+n ); minwrk = max( 3*n+m, 5*n-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntuo && wntvas ) { /** * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or * 'A') **/ wrkbl = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, n+n*ilaenv( 1, "dorgqr", " ", m, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); wrkbl = max( wrkbl, 3*n+n* ilaenv( 1, "dorgbr", "q", n, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+( n-1 )* ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) ); wrkbl = max( wrkbl, 5*n-4 ); maxwrk = max( n*n+wrkbl, n*n+m*n+n ); minwrk = max( 3*n+m, 5*n-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntus && wntvn ) { /** * Path 4 (M much larger than N, JOBU='S', JOBVT='N') **/ wrkbl = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, n+n*ilaenv( 1, "dorgqr", " ", m, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); wrkbl = max( wrkbl, 3*n+n* ilaenv( 1, "dorgbr", "q", n, n, n, -1 ) ); wrkbl = max( wrkbl, 5*n-4 ); maxwrk = n*n + wrkbl; minwrk = max( 3*n+m, 5*n-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntus && wntvo ) { /** * Path 5 (M much larger than N, JOBU='S', JOBVT='O') **/ wrkbl = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, n+n*ilaenv( 1, "dorgqr", " ", m, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); wrkbl = max( wrkbl, 3*n+n* ilaenv( 1, "dorgbr", "q", n, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+( n-1 )* ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) ); wrkbl = max( wrkbl, 5*n-4 ); maxwrk = 2*n*n + wrkbl; minwrk = max( 3*n+m, 5*n-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntus && wntvas ) { /** * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or * 'A') **/ wrkbl = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, n+n*ilaenv( 1, "dorgqr", " ", m, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); wrkbl = max( wrkbl, 3*n+n* ilaenv( 1, "dorgbr", "q", n, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+( n-1 )* ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) ); wrkbl = max( wrkbl, 5*n-4 ); maxwrk = n*n + wrkbl; minwrk = max( 3*n+m, 5*n-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntua && wntvn ) { /** * Path 7 (M much larger than N, JOBU='A', JOBVT='N') **/ wrkbl = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, n+m*ilaenv( 1, "dorgqr", " ", m, m, n, -1 ) ); wrkbl = max( wrkbl, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); wrkbl = max( wrkbl, 3*n+n* ilaenv( 1, "dorgbr", "q", n, n, n, -1 ) ); wrkbl = max( wrkbl, 5*n-4 ); maxwrk = n*n + wrkbl; minwrk = max( 3*n+m, 5*n-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntua && wntvo ) { /** * Path 8 (M much larger than N, JOBU='A', JOBVT='O') **/ wrkbl = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, n+m*ilaenv( 1, "dorgqr", " ", m, m, n, -1 ) ); wrkbl = max( wrkbl, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); wrkbl = max( wrkbl, 3*n+n* ilaenv( 1, "dorgbr", "q", n, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+( n-1 )* ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) ); wrkbl = max( wrkbl, 5*n-4 ); maxwrk = 2*n*n + wrkbl; minwrk = max( 3*n+m, 5*n-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntua && wntvas ) { /** * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or * 'A') **/ wrkbl = n + n*ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, n+m*ilaenv( 1, "dorgqr", " ", m, m, n, -1 ) ); wrkbl = max( wrkbl, 3*n+2*n* ilaenv( 1, "dgebrd", " ", n, n, -1, -1 ) ); wrkbl = max( wrkbl, 3*n+n* ilaenv( 1, "dorgbr", "q", n, n, n, -1 ) ); wrkbl = max( wrkbl, 3*n+( n-1 )* ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) ); wrkbl = max( wrkbl, 5*n-4 ); maxwrk = n*n + wrkbl; minwrk = max( 3*n+m, 5*n-4 ); minwrk = min( minwrk, maxwrk ); } } else { /** * Path 10 (M at least N, but not much larger) **/ maxwrk = 3*n + ( m+n )*ilaenv( 1, "dgebrd", " ", m, n, -1, -1 ); if( wntus || wntuo ) maxwrk = max( maxwrk, 3*n+n* ilaenv( 1, "dorgbr", "q", m, n, n, -1 ) ); if( wntua ) maxwrk = max( maxwrk, 3*n+m* ilaenv( 1, "dorgbr", "q", m, m, n, -1 ) ); if( !wntvn ) maxwrk = max( maxwrk, 3*n+( n-1 )* ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) ); maxwrk = max( maxwrk, 5*n-4 ); minwrk = max( 3*n+m, 5*n-4 ); } work_1( 1 ) = maxwrk; } if( lwork=1 ) work_1( 1 ) = one; return; } /** * Get machine constants **/ eps = dlamch( 'p' ); smlnum = sqrt( dlamch( 's' ) ) / eps; bignum = one / smlnum; /** * Scale A if max entry outside range [SMLNUM,BIGNUM] **/ anrm = dlange( 'm', m, n, a, lda, dum ); iscl = 0; if( anrm>zero && anrmbignum ) { iscl = 1; dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, &ierr ); } /** * A has at least as many rows as columns. If A has sufficiently * more rows than columns, first reduce using the QR * decomposition (if sufficient workspace available) **/ if( m>=mnthr ) { if( wntun ) { /** * Path 1 (M much larger than N, JOBU='N') * No left singular vectors to be computed **/ itau = 1; iwork = itau + n; /** * Compute A=Q*R * (Workspace: need 2*N, prefer N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Zero out below R **/ dlaset( 'l', n-1, n-1, zero, zero, &a_2( 2, 1 ), lda ); ie = 1; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in A * (Workspace: need 4*N, prefer 3*N+2*N*NB) **/ dgebrd( n, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); ncvt = 0; if( wntvo || wntvas ) { /** * If right singular vectors desired, generate P'. * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, a, lda, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); ncvt = n; } iwork = ie + n; /** * Perform bidiagonal QR iteration, computing right * singular vectors of A in A if desired * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, ncvt, 0, 0, s, &work_1( ie ), a, lda, dum, 1, dum, 1, &work_1( iwork ), info ); /** * If right singular vectors desired in VT, copy them there **/ if( wntvas ) dlacpy( 'f', n, n, a, lda, vt, ldvt ); } else if( wntuo && wntvn ) { /** * Path 2 (M much larger than N, JOBU='O', JOBVT='N') * N left singular vectors to be overwritten on A and * no right singular vectors to be computed **/ if( lwork>=n*n+max( 4*n, 5*n-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ ir = 1; if( lwork>=max( wrkbl, lda*n+n )+lda*n ) { /** * WORK(IU) is LDA by N, WORK(IR) is LDA by N **/ ldwrku = lda; ldwrkr = lda; } else if( lwork>=max( wrkbl, lda*n+n )+n*n ) { /** * WORK(IU) is LDA by N, WORK(IR) is N by N **/ ldwrku = lda; ldwrkr = n; } else { /** * WORK(IU) is LDWRKU by N, WORK(IR) is N by N **/ ldwrku = ( lwork-n*n-n ) / n; ldwrkr = n; } itau = ir + ldwrkr*n; iwork = itau + n; /** * Compute A=Q*R * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to WORK(IR) and zero out below it **/ dlacpy( 'u', n, n, a, lda, &work_1( ir ), ldwrkr ); dlaset( 'l', n-1, n-1, zero, zero, &work_1( ir+1 ), ldwrkr ); /** * Generate Q in A * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dorgqr( m, n, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in WORK(IR) * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) **/ dgebrd( n, n, &work_1( ir ), ldwrkr, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left vectors bidiagonalizing R * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) **/ dorgbr( 'q', n, n, n, &work_1( ir ), ldwrkr, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IR) * (Workspace: need N*N+5*N-4) **/ dbdsqr( 'u', n, 0, n, 0, s, &work_1( ie ), dum, 1, &work_1( ir ), ldwrkr, dum, 1, &work_1( iwork ), info ); iu = ie + n; /** * Multiply Q in A by left singular vectors of R in * WORK(IR), storing result in WORK(IU) and copying to A * (Workspace: need N*N+2*N, prefer N*N+M*N+N) **/ for (i=1 ; ldwrku>0?i<=m:i>=m ; i+=ldwrku) { chunk = min( m-i+1, ldwrku ); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, chunk, n, n, one, &a_2( i, 1 ), lda, &work_1( ir ), ldwrkr, zero, &work_1( iu ), ldwrku ); dlacpy( 'f', chunk, n, &work_1( iu ), ldwrku, &a_2( i, 1 ), lda ); } } else { /** * Insufficient workspace for a fast algorithm **/ ie = 1; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize A * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) **/ dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left vectors bidiagonalizing A * (Workspace: need 4*N, prefer 3*N+N*NB) **/ dorgbr( 'q', m, n, n, a, lda, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in A * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, 0, m, 0, s, &work_1( ie ), dum, 1, a, lda, dum, 1, &work_1( iwork ), info ); } } else if( wntuo && wntvas ) { /** * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') * N left singular vectors to be overwritten on A and * N right singular vectors to be computed in VT **/ if( lwork>=n*n+max( 4*n, 5*n-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ ir = 1; if( lwork>=max( wrkbl, lda*n+n )+lda*n ) { /** * WORK(IU) is LDA by N and WORK(IR) is LDA by N **/ ldwrku = lda; ldwrkr = lda; } else if( lwork>=max( wrkbl, lda*n+n )+n*n ) { /** * WORK(IU) is LDA by N and WORK(IR) is N by N **/ ldwrku = lda; ldwrkr = n; } else { /** * WORK(IU) is LDWRKU by N and WORK(IR) is N by N **/ ldwrku = ( lwork-n*n-n ) / n; ldwrkr = n; } itau = ir + ldwrkr*n; iwork = itau + n; /** * Compute A=Q*R * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to VT, zeroing out below it **/ dlacpy( 'u', n, n, a, lda, vt, ldvt ); dlaset( 'l', n-1, n-1, zero, zero, &vt_2( 2, 1 ), ldvt ); /** * Generate Q in A * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dorgqr( m, n, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in VT, copying result to WORK(IR) * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) **/ dgebrd( n, n, vt, ldvt, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', n, n, vt, ldvt, &work_1( ir ), ldwrkr ); /** * Generate left vectors bidiagonalizing R in WORK(IR) * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) **/ dorgbr( 'q', n, n, n, &work_1( ir ), ldwrkr, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right vectors bidiagonalizing R in VT * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, vt, ldvt, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IR) and computing right * singular vectors of R in VT * (Workspace: need N*N+5*N-4) **/ dbdsqr( 'u', n, n, n, 0, s, &work_1( ie ), vt, ldvt, &work_1( ir ), ldwrkr, dum, 1, &work_1( iwork ), info ); iu = ie + n; /** * Multiply Q in A by left singular vectors of R in * WORK(IR), storing result in WORK(IU) and copying to A * (Workspace: need N*N+2*N, prefer N*N+M*N+N) **/ for (i=1 ; ldwrku>0?i<=m:i>=m ; i+=ldwrku) { chunk = min( m-i+1, ldwrku ); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, chunk, n, n, one, &a_2( i, 1 ), lda, &work_1( ir ), ldwrkr, zero, &work_1( iu ), ldwrku ); dlacpy( 'f', chunk, n, &work_1( iu ), ldwrku, &a_2( i, 1 ), lda ); } } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + n; /** * Compute A=Q*R * (Workspace: need 2*N, prefer N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to VT, zeroing out below it **/ dlacpy( 'u', n, n, a, lda, vt, ldvt ); dlaset( 'l', n-1, n-1, zero, zero, &vt_2( 2, 1 ), ldvt ); /** * Generate Q in A * (Workspace: need 2*N, prefer N+N*NB) **/ dorgqr( m, n, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in VT * (Workspace: need 4*N, prefer 3*N+2*N*NB) **/ dgebrd( n, n, vt, ldvt, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply Q in A by left vectors bidiagonalizing R * (Workspace: need 3*N+M, prefer 3*N+M*NB) **/ dormbr( 'q', 'r', 'n', m, n, n, vt, ldvt, &work_1( itauq ), a, lda, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right vectors bidiagonalizing R in VT * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, vt, ldvt, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in A and computing right * singular vectors of A in VT * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, n, m, 0, s, &work_1( ie ), vt, ldvt, a, lda, dum, 1, &work_1( iwork ), info ); } } else if( wntus ) { if( wntvn ) { /** * Path 4 (M much larger than N, JOBU='S', JOBVT='N') * N left singular vectors to be computed in U and * no right singular vectors to be computed **/ if( lwork>=n*n+max( 4*n, 5*n-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ ir = 1; if( lwork>=wrkbl+lda*n ) { /** * WORK(IR) is LDA by N **/ ldwrkr = lda; } else { /** * WORK(IR) is N by N **/ ldwrkr = n; } itau = ir + ldwrkr*n; iwork = itau + n; /** * Compute A=Q*R * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to WORK(IR), zeroing out below it **/ dlacpy( 'u', n, n, a, lda, &work_1( ir ), ldwrkr ); dlaset( 'l', n-1, n-1, zero, zero, &work_1( ir+1 ), ldwrkr ); /** * Generate Q in A * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dorgqr( m, n, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in WORK(IR) * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) **/ dgebrd( n, n, &work_1( ir ), ldwrkr, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left vectors bidiagonalizing R in WORK(IR) * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) **/ dorgbr( 'q', n, n, n, &work_1( ir ), ldwrkr, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IR) * (Workspace: need N*N+5*N-4) **/ dbdsqr( 'u', n, 0, n, 0, s, &work_1( ie ), dum, 1, &work_1( ir ), ldwrkr, dum, 1, &work_1( iwork ), info ); /** * Multiply Q in A by left singular vectors of R in * WORK(IR), storing result in U * (Workspace: need N*N) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, n, one, a, lda, &work_1( ir ), ldwrkr, zero, u, ldu ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need 2*N, prefer N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Generate Q in U * (Workspace: need 2*N, prefer N+N*NB) **/ dorgqr( m, n, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Zero out below R in A **/ dlaset( 'l', n-1, n-1, zero, zero, &a_2( 2, 1 ), lda ); /** * Bidiagonalize R in A * (Workspace: need 4*N, prefer 3*N+2*N*NB) **/ dgebrd( n, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply Q in U by left vectors bidiagonalizing R * (Workspace: need 3*N+M, prefer 3*N+M*NB) **/ dormbr( 'q', 'r', 'n', m, n, n, a, lda, &work_1( itauq ), u, ldu, &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, 0, m, 0, s, &work_1( ie ), dum, 1, u, ldu, dum, 1, &work_1( iwork ), info ); } } else if( wntvo ) { /** * Path 5 (M much larger than N, JOBU='S', JOBVT='O') * N left singular vectors to be computed in U and * N right singular vectors to be overwritten on A **/ if( lwork>=2*n*n+max( 4*n, 5*n-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ iu = 1; if( lwork>=wrkbl+2*lda*n ) { /** * WORK(IU) is LDA by N and WORK(IR) is LDA by N **/ ldwrku = lda; ir = iu + ldwrku*n; ldwrkr = lda; } else if( lwork>=wrkbl+( lda+n )*n ) { /** * WORK(IU) is LDA by N and WORK(IR) is N by N **/ ldwrku = lda; ir = iu + ldwrku*n; ldwrkr = n; } else { /** * WORK(IU) is N by N and WORK(IR) is N by N **/ ldwrku = n; ir = iu + ldwrku*n; ldwrkr = n; } itau = ir + ldwrkr*n; iwork = itau + n; /** * Compute A=Q*R * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to WORK(IU), zeroing out below it **/ dlacpy( 'u', n, n, a, lda, &work_1( iu ), ldwrku ); dlaset( 'l', n-1, n-1, zero, zero, &work_1( iu+1 ), ldwrku ); /** * Generate Q in A * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) **/ dorgqr( m, n, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in WORK(IU), copying result to * WORK(IR) * (Workspace: need 2*N*N+4*N, * prefer 2*N*N+3*N+2*N*NB) **/ dgebrd( n, n, &work_1( iu ), ldwrku, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', n, n, &work_1( iu ), ldwrku, &work_1( ir ), ldwrkr ); /** * Generate left bidiagonalizing vectors in WORK(IU) * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) **/ dorgbr( 'q', n, n, n, &work_1( iu ), ldwrku, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right bidiagonalizing vectors in WORK(IR) * (Workspace: need 2*N*N+4*N-1, * prefer 2*N*N+3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, &work_1( ir ), ldwrkr, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IU) and computing * right singular vectors of R in WORK(IR) * (Workspace: need 2*N*N+5*N-4) **/ dbdsqr( 'u', n, n, n, 0, s, &work_1( ie ), &work_1( ir ), ldwrkr, &work_1( iu ), ldwrku, dum, 1, &work_1( iwork ), info ); /** * Multiply Q in A by left singular vectors of R in * WORK(IU), storing result in U * (Workspace: need N*N) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, n, one, a, lda, &work_1( iu ), ldwrku, zero, u, ldu ); /** * Copy right singular vectors of R to A * (Workspace: need N*N) **/ dlacpy( 'f', n, n, &work_1( ir ), ldwrkr, a, lda ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need 2*N, prefer N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Generate Q in U * (Workspace: need 2*N, prefer N+N*NB) **/ dorgqr( m, n, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Zero out below R in A **/ dlaset( 'l', n-1, n-1, zero, zero, &a_2( 2, 1 ), lda ); /** * Bidiagonalize R in A * (Workspace: need 4*N, prefer 3*N+2*N*NB) **/ dgebrd( n, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply Q in U by left vectors bidiagonalizing R * (Workspace: need 3*N+M, prefer 3*N+M*NB) **/ dormbr( 'q', 'r', 'n', m, n, n, a, lda, &work_1( itauq ), u, ldu, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right vectors bidiagonalizing R in A * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, a, lda, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in A * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, n, m, 0, s, &work_1( ie ), a, lda, u, ldu, dum, 1, &work_1( iwork ), info ); } } else if( wntvas ) { /** * Path 6 (M much larger than N, JOBU='S', JOBVT='S' * or 'A') * N left singular vectors to be computed in U and * N right singular vectors to be computed in VT **/ if( lwork>=n*n+max( 4*n, 5*n-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ iu = 1; if( lwork>=wrkbl+lda*n ) { /** * WORK(IU) is LDA by N **/ ldwrku = lda; } else { /** * WORK(IU) is N by N **/ ldwrku = n; } itau = iu + ldwrku*n; iwork = itau + n; /** * Compute A=Q*R * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to WORK(IU), zeroing out below it **/ dlacpy( 'u', n, n, a, lda, &work_1( iu ), ldwrku ); dlaset( 'l', n-1, n-1, zero, zero, &work_1( iu+1 ), ldwrku ); /** * Generate Q in A * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dorgqr( m, n, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in WORK(IU), copying result to VT * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) **/ dgebrd( n, n, &work_1( iu ), ldwrku, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', n, n, &work_1( iu ), ldwrku, vt, ldvt ); /** * Generate left bidiagonalizing vectors in WORK(IU) * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) **/ dorgbr( 'q', n, n, n, &work_1( iu ), ldwrku, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right bidiagonalizing vectors in VT * (Workspace: need N*N+4*N-1, * prefer N*N+3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, vt, ldvt, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IU) and computing * right singular vectors of R in VT * (Workspace: need N*N+5*N-4) **/ dbdsqr( 'u', n, n, n, 0, s, &work_1( ie ), vt, ldvt, &work_1( iu ), ldwrku, dum, 1, &work_1( iwork ), info ); /** * Multiply Q in A by left singular vectors of R in * WORK(IU), storing result in U * (Workspace: need N*N) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, n, one, a, lda, &work_1( iu ), ldwrku, zero, u, ldu ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need 2*N, prefer N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Generate Q in U * (Workspace: need 2*N, prefer N+N*NB) **/ dorgqr( m, n, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to VT, zeroing out below it **/ dlacpy( 'u', n, n, a, lda, vt, ldvt ); dlaset( 'l', n-1, n-1, zero, zero, &vt_2( 2, 1 ), ldvt ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in VT * (Workspace: need 4*N, prefer 3*N+2*N*NB) **/ dgebrd( n, n, vt, ldvt, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply Q in U by left bidiagonalizing vectors * in VT * (Workspace: need 3*N+M, prefer 3*N+M*NB) **/ dormbr( 'q', 'r', 'n', m, n, n, vt, ldvt, &work_1( itauq ), u, ldu, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right bidiagonalizing vectors in VT * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, vt, ldvt, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in VT * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, n, m, 0, s, &work_1( ie ), vt, ldvt, u, ldu, dum, 1, &work_1( iwork ), info ); } } } else if( wntua ) { if( wntvn ) { /** * Path 7 (M much larger than N, JOBU='A', JOBVT='N') * M left singular vectors to be computed in U and * no right singular vectors to be computed **/ if( lwork>=n*n+max( n+m, max(4*n, 5*n-4) ) ) { /** * Sufficient workspace for a fast algorithm **/ ir = 1; if( lwork>=wrkbl+lda*n ) { /** * WORK(IR) is LDA by N **/ ldwrkr = lda; } else { /** * WORK(IR) is N by N **/ ldwrkr = n; } itau = ir + ldwrkr*n; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Copy R to WORK(IR), zeroing out below it **/ dlacpy( 'u', n, n, a, lda, &work_1( ir ), ldwrkr ); dlaset( 'l', n-1, n-1, zero, zero, &work_1( ir+1 ), ldwrkr ); /** * Generate Q in U * (Workspace: need N*N+N+M, prefer N*N+N+M*NB) **/ dorgqr( m, m, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in WORK(IR) * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) **/ dgebrd( n, n, &work_1( ir ), ldwrkr, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors in WORK(IR) * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) **/ dorgbr( 'q', n, n, n, &work_1( ir ), ldwrkr, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IR) * (Workspace: need N*N+5*N-4) **/ dbdsqr( 'u', n, 0, n, 0, s, &work_1( ie ), dum, 1, &work_1( ir ), ldwrkr, dum, 1, &work_1( iwork ), info ); /** * Multiply Q in U by left singular vectors of R in * WORK(IR), storing result in A * (Workspace: need N*N) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, n, one, u, ldu, &work_1( ir ), ldwrkr, zero, a, lda ); /** * Copy left singular vectors of A from A to U **/ dlacpy( 'f', m, n, a, lda, u, ldu ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need 2*N, prefer N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Generate Q in U * (Workspace: need N+M, prefer N+M*NB) **/ dorgqr( m, m, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Zero out below R in A **/ dlaset( 'l', n-1, n-1, zero, zero, &a_2( 2, 1 ), lda ); /** * Bidiagonalize R in A * (Workspace: need 4*N, prefer 3*N+2*N*NB) **/ dgebrd( n, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply Q in U by left bidiagonalizing vectors * in A * (Workspace: need 3*N+M, prefer 3*N+M*NB) **/ dormbr( 'q', 'r', 'n', m, n, n, a, lda, &work_1( itauq ), u, ldu, &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, 0, m, 0, s, &work_1( ie ), dum, 1, u, ldu, dum, 1, &work_1( iwork ), info ); } } else if( wntvo ) { /** * Path 8 (M much larger than N, JOBU='A', JOBVT='O') * M left singular vectors to be computed in U and * N right singular vectors to be overwritten on A **/ if( lwork>=2*n*n+max( n+m, max(4*n, 5*n-4) ) ) { /** * Sufficient workspace for a fast algorithm **/ iu = 1; if( lwork>=wrkbl+2*lda*n ) { /** * WORK(IU) is LDA by N and WORK(IR) is LDA by N **/ ldwrku = lda; ir = iu + ldwrku*n; ldwrkr = lda; } else if( lwork>=wrkbl+( lda+n )*n ) { /** * WORK(IU) is LDA by N and WORK(IR) is N by N **/ ldwrku = lda; ir = iu + ldwrku*n; ldwrkr = n; } else { /** * WORK(IU) is N by N and WORK(IR) is N by N **/ ldwrku = n; ir = iu + ldwrku*n; ldwrkr = n; } itau = ir + ldwrkr*n; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Generate Q in U * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) **/ dorgqr( m, m, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to WORK(IU), zeroing out below it **/ dlacpy( 'u', n, n, a, lda, &work_1( iu ), ldwrku ); dlaset( 'l', n-1, n-1, zero, zero, &work_1( iu+1 ), ldwrku ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in WORK(IU), copying result to * WORK(IR) * (Workspace: need 2*N*N+4*N, * prefer 2*N*N+3*N+2*N*NB) **/ dgebrd( n, n, &work_1( iu ), ldwrku, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', n, n, &work_1( iu ), ldwrku, &work_1( ir ), ldwrkr ); /** * Generate left bidiagonalizing vectors in WORK(IU) * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) **/ dorgbr( 'q', n, n, n, &work_1( iu ), ldwrku, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right bidiagonalizing vectors in WORK(IR) * (Workspace: need 2*N*N+4*N-1, * prefer 2*N*N+3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, &work_1( ir ), ldwrkr, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IU) and computing * right singular vectors of R in WORK(IR) * (Workspace: need 2*N*N+5*N-4) **/ dbdsqr( 'u', n, n, n, 0, s, &work_1( ie ), &work_1( ir ), ldwrkr, &work_1( iu ), ldwrku, dum, 1, &work_1( iwork ), info ); /** * Multiply Q in U by left singular vectors of R in * WORK(IU), storing result in A * (Workspace: need N*N) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, n, one, u, ldu, &work_1( iu ), ldwrku, zero, a, lda ); /** * Copy left singular vectors of A from A to U **/ dlacpy( 'f', m, n, a, lda, u, ldu ); /** * Copy right singular vectors of R from WORK(IR) to A **/ dlacpy( 'f', n, n, &work_1( ir ), ldwrkr, a, lda ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need 2*N, prefer N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Generate Q in U * (Workspace: need N+M, prefer N+M*NB) **/ dorgqr( m, m, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Zero out below R in A **/ dlaset( 'l', n-1, n-1, zero, zero, &a_2( 2, 1 ), lda ); /** * Bidiagonalize R in A * (Workspace: need 4*N, prefer 3*N+2*N*NB) **/ dgebrd( n, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply Q in U by left bidiagonalizing vectors * in A * (Workspace: need 3*N+M, prefer 3*N+M*NB) **/ dormbr( 'q', 'r', 'n', m, n, n, a, lda, &work_1( itauq ), u, ldu, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right bidiagonalizing vectors in A * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, a, lda, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in A * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, n, m, 0, s, &work_1( ie ), a, lda, u, ldu, dum, 1, &work_1( iwork ), info ); } } else if( wntvas ) { /** * Path 9 (M much larger than N, JOBU='A', JOBVT='S' * or 'A') * M left singular vectors to be computed in U and * N right singular vectors to be computed in VT **/ if( lwork>=n*n+max( n+m, max(4*n, 5*n-4) ) ) { /** * Sufficient workspace for a fast algorithm **/ iu = 1; if( lwork>=wrkbl+lda*n ) { /** * WORK(IU) is LDA by N **/ ldwrku = lda; } else { /** * WORK(IU) is N by N **/ ldwrku = n; } itau = iu + ldwrku*n; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need N*N+2*N, prefer N*N+N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Generate Q in U * (Workspace: need N*N+N+M, prefer N*N+N+M*NB) **/ dorgqr( m, m, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R to WORK(IU), zeroing out below it **/ dlacpy( 'u', n, n, a, lda, &work_1( iu ), ldwrku ); dlaset( 'l', n-1, n-1, zero, zero, &work_1( iu+1 ), ldwrku ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in WORK(IU), copying result to VT * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) **/ dgebrd( n, n, &work_1( iu ), ldwrku, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', n, n, &work_1( iu ), ldwrku, vt, ldvt ); /** * Generate left bidiagonalizing vectors in WORK(IU) * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) **/ dorgbr( 'q', n, n, n, &work_1( iu ), ldwrku, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right bidiagonalizing vectors in VT * (Workspace: need N*N+4*N-1, * prefer N*N+3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, vt, ldvt, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of R in WORK(IU) and computing * right singular vectors of R in VT * (Workspace: need N*N+5*N-4) **/ dbdsqr( 'u', n, n, n, 0, s, &work_1( ie ), vt, ldvt, &work_1( iu ), ldwrku, dum, 1, &work_1( iwork ), info ); /** * Multiply Q in U by left singular vectors of R in * WORK(IU), storing result in A * (Workspace: need N*N) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, n, one, u, ldu, &work_1( iu ), ldwrku, zero, a, lda ); /** * Copy left singular vectors of A from A to U **/ dlacpy( 'f', m, n, a, lda, u, ldu ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + n; /** * Compute A=Q*R, copying result to U * (Workspace: need 2*N, prefer N+N*NB) **/ dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, n, a, lda, u, ldu ); /** * Generate Q in U * (Workspace: need N+M, prefer N+M*NB) **/ dorgqr( m, m, n, u, ldu, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy R from A to VT, zeroing out below it **/ dlacpy( 'u', n, n, a, lda, vt, ldvt ); dlaset( 'l', n-1, n-1, zero, zero, &vt_2( 2, 1 ), ldvt ); ie = itau; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize R in VT * (Workspace: need 4*N, prefer 3*N+2*N*NB) **/ dgebrd( n, n, vt, ldvt, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply Q in U by left bidiagonalizing vectors * in VT * (Workspace: need 3*N+M, prefer 3*N+M*NB) **/ dormbr( 'q', 'r', 'n', m, n, n, vt, ldvt, &work_1( itauq ), u, ldu, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right bidiagonalizing vectors in VT * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, vt, ldvt, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + n; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in VT * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, n, m, 0, s, &work_1( ie ), vt, ldvt, u, ldu, dum, 1, &work_1( iwork ), info ); } } } } else { /** * M .LT. MNTHR * * Path 10 (M at least N, but not much larger) * Reduce to bidiagonal form without QR decomposition **/ ie = 1; itauq = ie + n; itaup = itauq + n; iwork = itaup + n; /** * Bidiagonalize A * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) **/ dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); if( wntuas ) { /** * If left singular vectors desired in U, copy result to U * and generate left bidiagonalizing vectors in U * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) **/ dlacpy( 'l', m, n, a, lda, u, ldu ); if( wntus ) ncu = n; if( wntua ) ncu = m; dorgbr( 'q', m, ncu, n, u, ldu, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); } if( wntvas ) { /** * If right singular vectors desired in VT, copy result to * VT and generate right bidiagonalizing vectors in VT * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) **/ dlacpy( 'u', n, n, a, lda, vt, ldvt ); dorgbr( 'p', n, n, n, vt, ldvt, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); } if( wntuo ) { /** * If left singular vectors desired in A, generate left * bidiagonalizing vectors in A * (Workspace: need 4*N, prefer 3*N+N*NB) **/ dorgbr( 'q', m, n, n, a, lda, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); } if( wntvo ) { /** * If right singular vectors desired in A, generate right * bidiagonalizing vectors in A * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) **/ dorgbr( 'p', n, n, n, a, lda, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); } iwork = ie + n; if( wntuas || wntuo ) nru = m; if( wntun ) nru = 0; if( wntvas || wntvo ) ncvt = n; if( wntvn ) ncvt = 0; if( ( !wntuo ) && ( !wntvo ) ) { /** * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in U and computing right singular * vectors in VT * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, ncvt, nru, 0, s, &work_1( ie ), vt, ldvt, u, ldu, dum, 1, &work_1( iwork ), info ); } else if( ( !wntuo ) && wntvo ) { /** * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in U and computing right singular * vectors in A * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, ncvt, nru, 0, s, &work_1( ie ), a, lda, u, ldu, dum, 1, &work_1( iwork ), info ); } else { /** * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in A and computing right singular * vectors in VT * (Workspace: need 5*N-4) **/ dbdsqr( 'u', n, ncvt, nru, 0, s, &work_1( ie ), vt, ldvt, a, lda, dum, 1, &work_1( iwork ), info ); } } /** * If DBDSQR failed to converge, copy unconverged superdiagonals * to WORK( 2:MINMN ) **/ if( *info!=0 ) { if( ie>2 ) { for (i=1 ; i<=minmn - 1 ; i+=1) { work_1( i+1 ) = work_1( i+ie-1 ); } } if( ie<2 ) { for (i=minmn - 1 ; i>=1 ; i+=-1) { work_1( i+1 ) = work_1( i+ie-1 ); } } } /** * Undo scaling if necessary **/ if( iscl==1 ) { if( anrm>bignum ) dlascl( 'g', 0, 0, bignum, anrm, minmn, 1, s, minmn, &ierr ); if( *info!=0 && anrm>bignum ) dlascl( 'g', 0, 0, bignum, anrm, minmn-1, 1, &work_1( 2 ), minmn, &ierr ); if( anrm