/* * $Id: dgesv2.c,v 1.1.1.1 2005/09/18 22:04:46 dhmunro Exp $ * LAPACK routine to return the SVD of a matrix, m(y)?(x):(y)) /*-----end of Fortran intrinsics-----*/ void dgesv2( char jobu, char jobvt, long m, long n, double a[], long lda, double s[], double u[], long ldu, double vt[], long ldvt, double work[], long lwork, long *info ) { /** * -- LAPACK driver routine (version 1.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * March 31, 1993 * * This is m=1 && m>0 && n>0 ) { if( n>=mnthr ) { if( wntvn ) { /** * Path 1t(N much larger than M, JOBVT='N') **/ maxwrk = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); maxwrk = max( maxwrk, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); if( wntuo || wntuas ) maxwrk = max( maxwrk, 3*m+m* ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) ); maxwrk = max( maxwrk, 5*m-4 ); minwrk = max( 4*m, 5*m-4 ); } else if( wntvo && wntun ) { /** * Path 2t(N much larger than M, JOBU='N', JOBVT='O') **/ wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m, n, m, -1 ) ); wrkbl = max( wrkbl, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); wrkbl = max( wrkbl, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) ); wrkbl = max( wrkbl, 5*m-4 ); maxwrk = max( m*m+wrkbl, m*m+m*n+m ); minwrk = max( 3*m+n, 5*m-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntvo && wntuas ) { /** * Path 3t(N much larger than M, JOBU='S' or 'A', * JOBVT='O') **/ wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m, n, m, -1 ) ); wrkbl = max( wrkbl, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); wrkbl = max( wrkbl, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) ); wrkbl = max( wrkbl, 3*m+m* ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) ); wrkbl = max( wrkbl, 5*m-4 ); maxwrk = max( m*m+wrkbl, m*m+m*n+m ); minwrk = max( 3*m+n, 5*m-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntvs && wntun ) { /** * Path 4t(N much larger than M, JOBU='N', JOBVT='S') **/ wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m, n, m, -1 ) ); wrkbl = max( wrkbl, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); wrkbl = max( wrkbl, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) ); wrkbl = max( wrkbl, 5*m-4 ); maxwrk = m*m + wrkbl; minwrk = max( 3*m+n, 5*m-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntvs && wntuo ) { /** * Path 5t(N much larger than M, JOBU='O', JOBVT='S') **/ wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m, n, m, -1 ) ); wrkbl = max( wrkbl, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); wrkbl = max( wrkbl, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) ); wrkbl = max( wrkbl, 3*m+m* ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) ); wrkbl = max( wrkbl, 5*m-4 ); maxwrk = 2*m*m + wrkbl; minwrk = max( 3*m+n, 5*m-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntvs && wntuas ) { /** * Path 6t(N much larger than M, JOBU='S' or 'A', * JOBVT='S') **/ wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m, n, m, -1 ) ); wrkbl = max( wrkbl, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); wrkbl = max( wrkbl, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) ); wrkbl = max( wrkbl, 3*m+m* ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) ); wrkbl = max( wrkbl, 5*m-4 ); maxwrk = m*m + wrkbl; minwrk = max( 3*m+n, 5*m-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntva && wntun ) { /** * Path 7t(N much larger than M, JOBU='N', JOBVT='A') **/ wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n, n, m, -1 ) ); wrkbl = max( wrkbl, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); wrkbl = max( wrkbl, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) ); wrkbl = max( wrkbl, 5*m-4 ); maxwrk = m*m + wrkbl; minwrk = max( 3*m+n, 5*m-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntva && wntuo ) { /** * Path 8t(N much larger than M, JOBU='O', JOBVT='A') **/ wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n, n, m, -1 ) ); wrkbl = max( wrkbl, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); wrkbl = max( wrkbl, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) ); wrkbl = max( wrkbl, 3*m+m* ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) ); wrkbl = max( wrkbl, 5*m-4 ); maxwrk = 2*m*m + wrkbl; minwrk = max( 3*m+n, 5*m-4 ); minwrk = min( minwrk, maxwrk ); } else if( wntva && wntuas ) { /** * Path 9t(N much larger than M, JOBU='S' or 'A', * JOBVT='A') **/ wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 ); wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n, n, m, -1 ) ); wrkbl = max( wrkbl, 3*m+2*m* ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) ); wrkbl = max( wrkbl, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) ); wrkbl = max( wrkbl, 3*m+m* ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) ); wrkbl = max( wrkbl, 5*m-4 ); maxwrk = m*m + wrkbl; minwrk = max( 3*m+n, 5*m-4 ); minwrk = min( minwrk, maxwrk ); } } else { /** * Path 10t(N greater than M, but not much larger) **/ maxwrk = 3*m + ( m+n )*ilaenv( 1, "dgebrd", " ", m, n, -1, -1 ); if( wntvs || wntvo ) maxwrk = max( maxwrk, 3*m+m* ilaenv( 1, "dorgbr", "p", m, n, m, -1 ) ); if( wntva ) maxwrk = max( maxwrk, 3*m+n* ilaenv( 1, "dorgbr", "p", n, n, m, -1 ) ); if( !wntun ) maxwrk = max( maxwrk, 3*m+( m-1 )* ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) ); maxwrk = max( maxwrk, 5*m-4 ); minwrk = max( 3*m+n, 5*m-4 ); } work_1( 1 ) = maxwrk; } if( lwork=1 ) work_1( 1 ) = one; return; } /** * Get machine constants **/ eps = dlamch( 'p' ); smlnum = sqrt( dlamch( 's' ) ) / eps; bignum = one / smlnum; /** * Scale A if max entry outside range [SMLNUM,BIGNUM] **/ anrm = dlange( 'm', m, n, a, lda, dum ); iscl = 0; if( anrm>zero && anrmbignum ) { iscl = 1; dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, &ierr ); } /** * A has more columns than rows. If A has sufficiently more * columns than rows, first reduce using the LQ decomposition (if * sufficient workspace available) **/ if( n>=mnthr ) { if( wntvn ) { /** * Path 1t(N much larger than M, JOBVT='N') * No right singular vectors to be computed **/ itau = 1; iwork = itau + m; /** * Compute A=L*Q * (Workspace: need 2*M, prefer M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Zero out above L **/ dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ), lda ); ie = 1; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in A * (Workspace: need 4*M, prefer 3*M+2*M*NB) **/ dgebrd( m, m, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); if( wntuo || wntuas ) { /** * If left singular vectors desired, generate Q * (Workspace: need 4*M, prefer 3*M+M*NB) **/ dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); } iwork = ie + m; nru = 0; if( wntuo || wntuas ) nru = m; /** * Perform bidiagonal QR iteration, computing left singular * vectors of A in A if desired * (Workspace: need 5*M-4) **/ dbdsqr( 'u', m, 0, nru, 0, s, &work_1( ie ), dum, 1, a, lda, dum, 1, &work_1( iwork ), info ); /** * If left singular vectors desired in U, copy them there **/ if( wntuas ) dlacpy( 'f', m, m, a, lda, u, ldu ); } else if( wntvo && wntun ) { /** * Path 2t(N much larger than M, JOBU='N', JOBVT='O') * M right singular vectors to be overwritten on A and * no left singular vectors to be computed **/ if( lwork>=m*m+max( 4*m, 5*m-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ ir = 1; if( lwork>=max( wrkbl, lda*n+m )+lda*m ) { /** * WORK(IU) is LDA by N and WORK(IR) is LDA by M **/ ldwrku = lda; chunk = n; ldwrkr = lda; } else if( lwork>=max( wrkbl, lda*n+m )+m*m ) { /** * WORK(IU) is LDA by N and WORK(IR) is M by M **/ ldwrku = lda; chunk = n; ldwrkr = m; } else { /** * WORK(IU) is M by CHUNK and WORK(IR) is M by M **/ ldwrku = m; chunk = ( lwork-m*m-m ) / m; ldwrkr = m; } itau = ir + ldwrkr*m; iwork = itau + m; /** * Compute A=L*Q * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to WORK(IR) and zero out above it **/ dlacpy( 'l', m, m, a, lda, &work_1( ir ), ldwrkr ); dlaset( 'u', m-1, m-1, zero, zero, &work_1( ir+ldwrkr ), ldwrkr ); /** * Generate Q in A * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dorglq( m, n, m, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in WORK(IR) * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) **/ dgebrd( m, m, &work_1( ir ), ldwrkr, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right vectors bidiagonalizing L * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) **/ dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing right * singular vectors of L in WORK(IR) * (Workspace: need M*M+5*M-4) **/ dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ), &work_1( ir ), ldwrkr, dum, 1, dum, 1, &work_1( iwork ), info ); iu = ie + m; /** * Multiply right singular vectors of L in WORK(IR) by Q * in A, storing result in WORK(IU) and copying to A * (Workspace: need M*M+2*M, prefer M*M+M*N+M) **/ for (i=1 ; chunk>0?i<=n:i>=n ; i+=chunk) { blk = min( n-i+1, chunk ); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, blk, m, one, &work_1( ir ), ldwrkr, &a_2( 1, i ), lda, zero, &work_1( iu ), ldwrku ); dlacpy( 'f', m, blk, &work_1( iu ), ldwrku, &a_2( 1, i ), lda ); } } else { /** * Insufficient workspace for a fast algorithm **/ ie = 1; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize A * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) **/ dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right vectors bidiagonalizing A * (Workspace: need 4*M, prefer 3*M+M*NB) **/ dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing right * singular vectors of A in A * (Workspace: need 5*M-4) **/ dbdsqr( 'l', m, n, 0, 0, s, &work_1( ie ), a, lda, dum, 1, dum, 1, &work_1( iwork ), info ); } } else if( wntvo && wntuas ) { /** * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') * M right singular vectors to be overwritten on A and * M left singular vectors to be computed in U **/ if( lwork>=m*m+max( 4*m, 5*m-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ ir = 1; if( lwork>=max( wrkbl, lda*n+m )+lda*m ) { /** * WORK(IU) is LDA by N and WORK(IR) is LDA by M **/ ldwrku = lda; chunk = n; ldwrkr = lda; } else if( lwork>=max( wrkbl, lda*n+m )+m*m ) { /** * WORK(IU) is LDA by N and WORK(IR) is M by M **/ ldwrku = lda; chunk = n; ldwrkr = m; } else { /** * WORK(IU) is M by CHUNK and WORK(IR) is M by M **/ ldwrku = m; chunk = ( lwork-m*m-m ) / m; ldwrkr = m; } itau = ir + ldwrkr*m; iwork = itau + m; /** * Compute A=L*Q * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to U, zeroing about above it **/ dlacpy( 'l', m, m, a, lda, u, ldu ); dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ), ldu ); /** * Generate Q in A * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dorglq( m, n, m, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in U, copying result to WORK(IR) * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) **/ dgebrd( m, m, u, ldu, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, m, u, ldu, &work_1( ir ), ldwrkr ); /** * Generate right vectors bidiagonalizing L in WORK(IR) * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) **/ dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left vectors bidiagonalizing L in U * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) **/ dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of L in U, and computing right * singular vectors of L in WORK(IR) * (Workspace: need M*M+5*M-4) **/ dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ), &work_1( ir ), ldwrkr, u, ldu, dum, 1, &work_1( iwork ), info ); iu = ie + m; /** * Multiply right singular vectors of L in WORK(IR) by Q * in A, storing result in WORK(IU) and copying to A * (Workspace: need M*M+2*M, prefer M*M+M*N+M)) **/ for (i=1 ; chunk>0?i<=n:i>=n ; i+=chunk) { blk = min( n-i+1, chunk ); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, blk, m, one, &work_1( ir ), ldwrkr, &a_2( 1, i ), lda, zero, &work_1( iu ), ldwrku ); dlacpy( 'f', m, blk, &work_1( iu ), ldwrku, &a_2( 1, i ), lda ); } } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + m; /** * Compute A=L*Q * (Workspace: need 2*M, prefer M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to U, zeroing out above it **/ dlacpy( 'l', m, m, a, lda, u, ldu ); dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ), ldu ); /** * Generate Q in A * (Workspace: need 2*M, prefer M+M*NB) **/ dorglq( m, n, m, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in U * (Workspace: need 4*M, prefer 3*M+2*M*NB) **/ dgebrd( m, m, u, ldu, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply right vectors bidiagonalizing L by Q in A * (Workspace: need 3*M+N, prefer 3*M+N*NB) **/ dormbr( 'p', 'l', 't', m, n, m, u, ldu, &work_1( itaup ), a, lda, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left vectors bidiagonalizing L in U * (Workspace: need 4*M, prefer 3*M+M*NB) **/ dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in A * (Workspace: need 5*M-4) **/ dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), a, lda, u, ldu, dum, 1, &work_1( iwork ), info ); } } else if( wntvs ) { if( wntun ) { /** * Path 4t(N much larger than M, JOBU='N', JOBVT='S') * M right singular vectors to be computed in VT and * no left singular vectors to be computed **/ if( lwork>=m*m+max( 4*m, 5*m-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ ir = 1; if( lwork>=wrkbl+lda*m ) { /** * WORK(IR) is LDA by M **/ ldwrkr = lda; } else { /** * WORK(IR) is M by M **/ ldwrkr = m; } itau = ir + ldwrkr*m; iwork = itau + m; /** * Compute A=L*Q * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to WORK(IR), zeroing out above it **/ dlacpy( 'l', m, m, a, lda, &work_1( ir ), ldwrkr ); dlaset( 'u', m-1, m-1, zero, zero, &work_1( ir+ldwrkr ), ldwrkr ); /** * Generate Q in A * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dorglq( m, n, m, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in WORK(IR) * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) **/ dgebrd( m, m, &work_1( ir ), ldwrkr, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right vectors bidiagonalizing L in * WORK(IR) * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) **/ dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing right * singular vectors of L in WORK(IR) * (Workspace: need M*M+5*M-4) **/ dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ), &work_1( ir ), ldwrkr, dum, 1, dum, 1, &work_1( iwork ), info ); /** * Multiply right singular vectors of L in WORK(IR) by * Q in A, storing result in VT * (Workspace: need M*M) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m, one, &work_1( ir ), ldwrkr, a, lda, zero, vt, ldvt ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + m; /** * Compute A=L*Q * (Workspace: need 2*M, prefer M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy result to VT **/ dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Generate Q in VT * (Workspace: need 2*M, prefer M+M*NB) **/ dorglq( m, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Zero out above L in A **/ dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ), lda ); /** * Bidiagonalize L in A * (Workspace: need 4*M, prefer 3*M+2*M*NB) **/ dgebrd( m, m, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply right vectors bidiagonalizing L by Q in VT * (Workspace: need 3*M+N, prefer 3*M+N*NB) **/ dormbr( 'p', 'l', 't', m, n, m, a, lda, &work_1( itaup ), vt, ldvt, &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing right * singular vectors of A in VT * (Workspace: need 5*M-4) **/ dbdsqr( 'u', m, n, 0, 0, s, &work_1( ie ), vt, ldvt, dum, 1, dum, 1, &work_1( iwork ), info ); } } else if( wntuo ) { /** * Path 5t(N much larger than M, JOBU='O', JOBVT='S') * M right singular vectors to be computed in VT and * M left singular vectors to be overwritten on A **/ if( lwork>=2*m*m+max( 4*m, 5*m-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ iu = 1; if( lwork>=wrkbl+2*lda*m ) { /** * WORK(IU) is LDA by M and WORK(IR) is LDA by M **/ ldwrku = lda; ir = iu + ldwrku*m; ldwrkr = lda; } else if( lwork>=wrkbl+( lda+m )*m ) { /** * WORK(IU) is LDA by M and WORK(IR) is M by M **/ ldwrku = lda; ir = iu + ldwrku*m; ldwrkr = m; } else { /** * WORK(IU) is M by M and WORK(IR) is M by M **/ ldwrku = m; ir = iu + ldwrku*m; ldwrkr = m; } itau = ir + ldwrkr*m; iwork = itau + m; /** * Compute A=L*Q * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to WORK(IU), zeroing out below it **/ dlacpy( 'l', m, m, a, lda, &work_1( iu ), ldwrku ); dlaset( 'u', m-1, m-1, zero, zero, &work_1( iu+ldwrku ), ldwrku ); /** * Generate Q in A * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) **/ dorglq( m, n, m, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in WORK(IU), copying result to * WORK(IR) * (Workspace: need 2*M*M+4*M, * prefer 2*M*M+3*M+2*M*NB) **/ dgebrd( m, m, &work_1( iu ), ldwrku, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, m, &work_1( iu ), ldwrku, &work_1( ir ), ldwrkr ); /** * Generate right bidiagonalizing vectors in WORK(IU) * (Workspace: need 2*M*M+4*M-1, * prefer 2*M*M+3*M+(M-1)*NB) **/ dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors in WORK(IR) * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) **/ dorgbr( 'q', m, m, m, &work_1( ir ), ldwrkr, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of L in WORK(IR) and computing * right singular vectors of L in WORK(IU) * (Workspace: need 2*M*M+5*M-4) **/ dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ), &work_1( iu ), ldwrku, &work_1( ir ), ldwrkr, dum, 1, &work_1( iwork ), info ); /** * Multiply right singular vectors of L in WORK(IU) by * Q in A, storing result in VT * (Workspace: need M*M) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m, one, &work_1( iu ), ldwrku, a, lda, zero, vt, ldvt ); /** * Copy left singular vectors of L to A * (Workspace: need M*M) **/ dlacpy( 'f', m, m, &work_1( ir ), ldwrkr, a, lda ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + m; /** * Compute A=L*Q, copying result to VT * (Workspace: need 2*M, prefer M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Generate Q in VT * (Workspace: need 2*M, prefer M+M*NB) **/ dorglq( m, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Zero out above L in A **/ dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ), lda ); /** * Bidiagonalize L in A * (Workspace: need 4*M, prefer 3*M+2*M*NB) **/ dgebrd( m, m, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply right vectors bidiagonalizing L by Q in VT * (Workspace: need 3*M+N, prefer 3*M+N*NB) **/ dormbr( 'p', 'l', 't', m, n, m, a, lda, &work_1( itaup ), vt, ldvt, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors of L in A * (Workspace: need 4*M, prefer 3*M+M*NB) **/ dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, compute left * singular vectors of A in A and compute right * singular vectors of A in VT * (Workspace: need 5*M-4) **/ dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt, ldvt, a, lda, dum, 1, &work_1( iwork ), info ); } } else if( wntuas ) { /** * Path 6t(N much larger than M, JOBU='S' or 'A', * JOBVT='S') * M right singular vectors to be computed in VT and * M left singular vectors to be computed in U **/ if( lwork>=m*m+max( 4*m, 5*m-4 ) ) { /** * Sufficient workspace for a fast algorithm **/ iu = 1; if( lwork>=wrkbl+lda*m ) { /** * WORK(IU) is LDA by N **/ ldwrku = lda; } else { /** * WORK(IU) is LDA by M **/ ldwrku = m; } itau = iu + ldwrku*m; iwork = itau + m; /** * Compute A=L*Q * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to WORK(IU), zeroing out above it **/ dlacpy( 'l', m, m, a, lda, &work_1( iu ), ldwrku ); dlaset( 'u', m-1, m-1, zero, zero, &work_1( iu+ldwrku ), ldwrku ); /** * Generate Q in A * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dorglq( m, n, m, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in WORK(IU), copying result to U * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) **/ dgebrd( m, m, &work_1( iu ), ldwrku, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, m, &work_1( iu ), ldwrku, u, ldu ); /** * Generate right bidiagonalizing vectors in WORK(IU) * (Workspace: need M*M+4*M-1, * prefer M*M+3*M+(M-1)*NB) **/ dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors in U * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) **/ dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of L in U and computing right * singular vectors of L in WORK(IU) * (Workspace: need M*M+5*M-4) **/ dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ), &work_1( iu ), ldwrku, u, ldu, dum, 1, &work_1( iwork ), info ); /** * Multiply right singular vectors of L in WORK(IU) by * Q in A, storing result in VT * (Workspace: need M*M) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m, one, &work_1( iu ), ldwrku, a, lda, zero, vt, ldvt ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + m; /** * Compute A=L*Q, copying result to VT * (Workspace: need 2*M, prefer M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Generate Q in VT * (Workspace: need 2*M, prefer M+M*NB) **/ dorglq( m, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to U, zeroing out above it **/ dlacpy( 'l', m, m, a, lda, u, ldu ); dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ), ldu ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in U * (Workspace: need 4*M, prefer 3*M+2*M*NB) **/ dgebrd( m, m, u, ldu, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply right bidiagonalizing vectors in U by Q * in VT * (Workspace: need 3*M+N, prefer 3*M+N*NB) **/ dormbr( 'p', 'l', 't', m, n, m, u, ldu, &work_1( itaup ), vt, ldvt, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors in U * (Workspace: need 4*M, prefer 3*M+M*NB) **/ dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in VT * (Workspace: need 5*M-4) **/ dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt, ldvt, u, ldu, dum, 1, &work_1( iwork ), info ); } } } else if( wntva ) { if( wntun ) { /** * Path 7t(N much larger than M, JOBU='N', JOBVT='A') * N right singular vectors to be computed in VT and * no left singular vectors to be computed **/ if( lwork>=m*m+max( n+m, max(4*m, 5*m-4) ) ) { /** * Sufficient workspace for a fast algorithm **/ ir = 1; if( lwork>=wrkbl+lda*m ) { /** * WORK(IR) is LDA by M **/ ldwrkr = lda; } else { /** * WORK(IR) is M by M **/ ldwrkr = m; } itau = ir + ldwrkr*m; iwork = itau + m; /** * Compute A=L*Q, copying result to VT * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Copy L to WORK(IR), zeroing out above it **/ dlacpy( 'l', m, m, a, lda, &work_1( ir ), ldwrkr ); dlaset( 'u', m-1, m-1, zero, zero, &work_1( ir+ldwrkr ), ldwrkr ); /** * Generate Q in VT * (Workspace: need M*M+M+N, prefer M*M+M+N*NB) **/ dorglq( n, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in WORK(IR) * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) **/ dgebrd( m, m, &work_1( ir ), ldwrkr, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate right bidiagonalizing vectors in WORK(IR) * (Workspace: need M*M+4*M-1, * prefer M*M+3*M+(M-1)*NB) **/ dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing right * singular vectors of L in WORK(IR) * (Workspace: need M*M+5*M-4) **/ dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ), &work_1( ir ), ldwrkr, dum, 1, dum, 1, &work_1( iwork ), info ); /** * Multiply right singular vectors of L in WORK(IR) by * Q in VT, storing result in A * (Workspace: need M*M) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m, one, &work_1( ir ), ldwrkr, vt, ldvt, zero, a, lda ); /** * Copy right singular vectors of A from A to VT **/ dlacpy( 'f', m, n, a, lda, vt, ldvt ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + m; /** * Compute A=L*Q, copying result to VT * (Workspace: need 2*M, prefer M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Generate Q in VT * (Workspace: need M+N, prefer M+N*NB) **/ dorglq( n, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Zero out above L in A **/ dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ), lda ); /** * Bidiagonalize L in A * (Workspace: need 4*M, prefer 3*M+2*M*NB) **/ dgebrd( m, m, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply right bidiagonalizing vectors in A by Q * in VT * (Workspace: need 3*M+N, prefer 3*M+N*NB) **/ dormbr( 'p', 'l', 't', m, n, m, a, lda, &work_1( itaup ), vt, ldvt, &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing right * singular vectors of A in VT * (Workspace: need 5*M-4) **/ dbdsqr( 'u', m, n, 0, 0, s, &work_1( ie ), vt, ldvt, dum, 1, dum, 1, &work_1( iwork ), info ); } } else if( wntuo ) { /** * Path 8t(N much larger than M, JOBU='O', JOBVT='A') * N right singular vectors to be computed in VT and * M left singular vectors to be overwritten on A **/ if( lwork>=2*m*m+max( n+m, max(4*m, 5*m-4) ) ) { /** * Sufficient workspace for a fast algorithm **/ iu = 1; if( lwork>=wrkbl+2*lda*m ) { /** * WORK(IU) is LDA by M and WORK(IR) is LDA by M **/ ldwrku = lda; ir = iu + ldwrku*m; ldwrkr = lda; } else if( lwork>=wrkbl+( lda+m )*m ) { /** * WORK(IU) is LDA by M and WORK(IR) is M by M **/ ldwrku = lda; ir = iu + ldwrku*m; ldwrkr = m; } else { /** * WORK(IU) is M by M and WORK(IR) is M by M **/ ldwrku = m; ir = iu + ldwrku*m; ldwrkr = m; } itau = ir + ldwrkr*m; iwork = itau + m; /** * Compute A=L*Q, copying result to VT * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Generate Q in VT * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) **/ dorglq( n, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to WORK(IU), zeroing out above it **/ dlacpy( 'l', m, m, a, lda, &work_1( iu ), ldwrku ); dlaset( 'u', m-1, m-1, zero, zero, &work_1( iu+ldwrku ), ldwrku ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in WORK(IU), copying result to * WORK(IR) * (Workspace: need 2*M*M+4*M, * prefer 2*M*M+3*M+2*M*NB) **/ dgebrd( m, m, &work_1( iu ), ldwrku, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, m, &work_1( iu ), ldwrku, &work_1( ir ), ldwrkr ); /** * Generate right bidiagonalizing vectors in WORK(IU) * (Workspace: need 2*M*M+4*M-1, * prefer 2*M*M+3*M+(M-1)*NB) **/ dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors in WORK(IR) * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) **/ dorgbr( 'q', m, m, m, &work_1( ir ), ldwrkr, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of L in WORK(IR) and computing * right singular vectors of L in WORK(IU) * (Workspace: need 2*M*M+5*M-4) **/ dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ), &work_1( iu ), ldwrku, &work_1( ir ), ldwrkr, dum, 1, &work_1( iwork ), info ); /** * Multiply right singular vectors of L in WORK(IU) by * Q in VT, storing result in A * (Workspace: need M*M) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m, one, &work_1( iu ), ldwrku, vt, ldvt, zero, a, lda ); /** * Copy right singular vectors of A from A to VT **/ dlacpy( 'f', m, n, a, lda, vt, ldvt ); /** * Copy left singular vectors of A from WORK(IR) to A **/ dlacpy( 'f', m, m, &work_1( ir ), ldwrkr, a, lda ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + m; /** * Compute A=L*Q, copying result to VT * (Workspace: need 2*M, prefer M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Generate Q in VT * (Workspace: need M+N, prefer M+N*NB) **/ dorglq( n, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Zero out above L in A **/ dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ), lda ); /** * Bidiagonalize L in A * (Workspace: need 4*M, prefer 3*M+2*M*NB) **/ dgebrd( m, m, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply right bidiagonalizing vectors in A by Q * in VT * (Workspace: need 3*M+N, prefer 3*M+N*NB) **/ dormbr( 'p', 'l', 't', m, n, m, a, lda, &work_1( itaup ), vt, ldvt, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors in A * (Workspace: need 4*M, prefer 3*M+M*NB) **/ dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in A and computing right * singular vectors of A in VT * (Workspace: need 5*M-4) **/ dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt, ldvt, a, lda, dum, 1, &work_1( iwork ), info ); } } else if( wntuas ) { /** * Path 9t(N much larger than M, JOBU='S' or 'A', * JOBVT='A') * N right singular vectors to be computed in VT and * M left singular vectors to be computed in U **/ if( lwork>=m*m+max( n+m, max(4*m, 5*m-4) ) ) { /** * Sufficient workspace for a fast algorithm **/ iu = 1; if( lwork>=wrkbl+lda*m ) { /** * WORK(IU) is LDA by M **/ ldwrku = lda; } else { /** * WORK(IU) is M by M **/ ldwrku = m; } itau = iu + ldwrku*m; iwork = itau + m; /** * Compute A=L*Q, copying result to VT * (Workspace: need M*M+2*M, prefer M*M+M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Generate Q in VT * (Workspace: need M*M+M+N, prefer M*M+M+N*NB) **/ dorglq( n, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to WORK(IU), zeroing out above it **/ dlacpy( 'l', m, m, a, lda, &work_1( iu ), ldwrku ); dlaset( 'u', m-1, m-1, zero, zero, &work_1( iu+ldwrku ), ldwrku ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in WORK(IU), copying result to U * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) **/ dgebrd( m, m, &work_1( iu ), ldwrku, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'l', m, m, &work_1( iu ), ldwrku, u, ldu ); /** * Generate right bidiagonalizing vectors in WORK(IU) * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) **/ dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors in U * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) **/ dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of L in U and computing right * singular vectors of L in WORK(IU) * (Workspace: need M*M+5*M-4) **/ dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ), &work_1( iu ), ldwrku, u, ldu, dum, 1, &work_1( iwork ), info ); /** * Multiply right singular vectors of L in WORK(IU) by * Q in VT, storing result in A * (Workspace: need M*M) **/ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m, one, &work_1( iu ), ldwrku, vt, ldvt, zero, a, lda ); /** * Copy right singular vectors of A from A to VT **/ dlacpy( 'f', m, n, a, lda, vt, ldvt ); } else { /** * Insufficient workspace for a fast algorithm **/ itau = 1; iwork = itau + m; /** * Compute A=L*Q, copying result to VT * (Workspace: need 2*M, prefer M+M*NB) **/ dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); dlacpy( 'u', m, n, a, lda, vt, ldvt ); /** * Generate Q in VT * (Workspace: need M+N, prefer M+N*NB) **/ dorglq( n, n, m, vt, ldvt, &work_1( itau ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Copy L to U, zeroing out above it **/ dlacpy( 'l', m, m, a, lda, u, ldu ); dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ), ldu ); ie = itau; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize L in U * (Workspace: need 4*M, prefer 3*M+2*M*NB) **/ dgebrd( m, m, u, ldu, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Multiply right bidiagonalizing vectors in U by Q * in VT * (Workspace: need 3*M+N, prefer 3*M+N*NB) **/ dormbr( 'p', 'l', 't', m, n, m, u, ldu, &work_1( itaup ), vt, ldvt, &work_1( iwork ), lwork-iwork+1, &ierr ); /** * Generate left bidiagonalizing vectors in U * (Workspace: need 4*M, prefer 3*M+M*NB) **/ dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); iwork = ie + m; /** * Perform bidiagonal QR iteration, computing left * singular vectors of A in U and computing right * singular vectors of A in VT * (Workspace: need 5*M-4) **/ dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt, ldvt, u, ldu, dum, 1, &work_1( iwork ), info ); } } } } else { /** * N .LT. MNTHR * * Path 10t(N greater than M, but not much larger) * Reduce to bidiagonal form without LQ decomposition **/ ie = 1; itauq = ie + m; itaup = itauq + m; iwork = itaup + m; /** * Bidiagonalize A * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) **/ dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ), &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); if( wntuas ) { /** * If left singular vectors desired in U, copy result to U * and generate left bidiagonalizing vectors in U * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) **/ dlacpy( 'l', m, m, a, lda, u, ldu ); dorgbr( 'q', m, m, n, u, ldu, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); } if( wntvas ) { /** * If right singular vectors desired in VT, copy result to * VT and generate right bidiagonalizing vectors in VT * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) **/ dlacpy( 'u', m, n, a, lda, vt, ldvt ); if( wntva ) nrvt = n; if( wntvs ) nrvt = m; dorgbr( 'p', nrvt, n, m, vt, ldvt, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); } if( wntuo ) { /** * If left singular vectors desired in A, generate left * bidiagonalizing vectors in A * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) **/ dorgbr( 'q', m, m, n, a, lda, &work_1( itauq ), &work_1( iwork ), lwork-iwork+1, &ierr ); } if( wntvo ) { /** * If right singular vectors desired in A, generate right * bidiagonalizing vectors in A * (Workspace: need 4*M, prefer 3*M+M*NB) **/ dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ), &work_1( iwork ), lwork-iwork+1, &ierr ); } iwork = ie + m; if( wntuas || wntuo ) nru = m; if( wntun ) nru = 0; if( wntvas || wntvo ) ncvt = n; if( wntvn ) ncvt = 0; if( ( !wntuo ) && ( !wntvo ) ) { /** * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in U and computing right singular * vectors in VT * (Workspace: need 5*M-4) **/ dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), vt, ldvt, u, ldu, dum, 1, &work_1( iwork ), info ); } else if( ( !wntuo ) && wntvo ) { /** * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in U and computing right singular * vectors in A * (Workspace: need 5*M-4) **/ dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), a, lda, u, ldu, dum, 1, &work_1( iwork ), info ); } else { /** * Perform bidiagonal QR iteration, if desired, computing * left singular vectors in A and computing right singular * vectors in VT * (Workspace: need 5*M-4) **/ dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), vt, ldvt, a, lda, dum, 1, &work_1( iwork ), info ); } } /** * If DBDSQR failed to converge, copy unconverged superdiagonals * to WORK( 2:MINMN ) **/ if( *info!=0 ) { if( ie>2 ) { for (i=1 ; i<=minmn - 1 ; i+=1) { work_1( i+1 ) = work_1( i+ie-1 ); } } if( ie<2 ) { for (i=minmn - 1 ; i>=1 ; i+=-1) { work_1( i+1 ) = work_1( i+ie-1 ); } } } /** * Undo scaling if necessary **/ if( iscl==1 ) { if( anrm>bignum ) dlascl( 'g', 0, 0, bignum, anrm, minmn, 1, s, minmn, &ierr ); if( *info!=0 && anrm>bignum ) dlascl( 'g', 0, 0, bignum, anrm, minmn-1, 1, &work_1( 2 ), minmn, &ierr ); if( anrm