You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
422 lines
12 KiB
422 lines
12 KiB
2 years ago
|
C> \brief \b ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
|
||
|
*
|
||
|
* =========== DOCUMENTATION ===========
|
||
|
*
|
||
|
* Online html documentation available at
|
||
|
* http://www.netlib.org/lapack/explore-html/
|
||
|
*
|
||
|
* Definition:
|
||
|
* ===========
|
||
|
*
|
||
|
* SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
|
||
|
*
|
||
|
* .. Scalar Arguments ..
|
||
|
* INTEGER INFO, LDA, LWORK, M, N
|
||
|
* ..
|
||
|
* .. Array Arguments ..
|
||
|
* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * )
|
||
|
* ..
|
||
|
*
|
||
|
* Purpose
|
||
|
* =======
|
||
|
*
|
||
|
C>\details \b Purpose:
|
||
|
C>\verbatim
|
||
|
C>
|
||
|
C> ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
|
||
|
C> A = Q * R.
|
||
|
C>
|
||
|
C> This is the left-looking Level 3 BLAS version of the algorithm.
|
||
|
C>
|
||
|
C>\endverbatim
|
||
|
*
|
||
|
* Arguments:
|
||
|
* ==========
|
||
|
*
|
||
|
C> \param[in] M
|
||
|
C> \verbatim
|
||
|
C> M is INTEGER
|
||
|
C> The number of rows of the matrix A. M >= 0.
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
C> \param[in] N
|
||
|
C> \verbatim
|
||
|
C> N is INTEGER
|
||
|
C> The number of columns of the matrix A. N >= 0.
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
C> \param[in,out] A
|
||
|
C> \verbatim
|
||
|
C> A is COMPLEX*16 array, dimension (LDA,N)
|
||
|
C> On entry, the M-by-N matrix A.
|
||
|
C> On exit, the elements on and above the diagonal of the array
|
||
|
C> contain the min(M,N)-by-N upper trapezoidal matrix R (R is
|
||
|
C> upper triangular if m >= n); the elements below the diagonal,
|
||
|
C> with the array TAU, represent the orthogonal matrix Q as a
|
||
|
C> product of min(m,n) elementary reflectors (see Further
|
||
|
C> Details).
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
C> \param[in] LDA
|
||
|
C> \verbatim
|
||
|
C> LDA is INTEGER
|
||
|
C> The leading dimension of the array A. LDA >= max(1,M).
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
C> \param[out] TAU
|
||
|
C> \verbatim
|
||
|
C> TAU is COMPLEX*16 array, dimension (min(M,N))
|
||
|
C> The scalar factors of the elementary reflectors (see Further
|
||
|
C> Details).
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
C> \param[out] WORK
|
||
|
C> \verbatim
|
||
|
C> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
|
||
|
C> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
C> \param[in] LWORK
|
||
|
C> \verbatim
|
||
|
C> LWORK is INTEGER
|
||
|
C> \endverbatim
|
||
|
C> \verbatim
|
||
|
C> The dimension of the array WORK. LWORK >= 1 if MIN(M,N) = 0,
|
||
|
C> otherwise the dimension can be divided into three parts.
|
||
|
C> \endverbatim
|
||
|
C> \verbatim
|
||
|
C> 1) The part for the triangular factor T. If the very last T is not bigger
|
||
|
C> than any of the rest, then this part is NB x ceiling(K/NB), otherwise,
|
||
|
C> NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T
|
||
|
C> \endverbatim
|
||
|
C> \verbatim
|
||
|
C> 2) The part for the very last T when T is bigger than any of the rest T.
|
||
|
C> The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
|
||
|
C> where K = min(M,N), NX is calculated by
|
||
|
C> NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) )
|
||
|
C> \endverbatim
|
||
|
C> \verbatim
|
||
|
C> 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
|
||
|
C> \endverbatim
|
||
|
C> \verbatim
|
||
|
C> So LWORK = part1 + part2 + part3
|
||
|
C> \endverbatim
|
||
|
C> \verbatim
|
||
|
C> If LWORK = -1, then a workspace query is assumed; the routine
|
||
|
C> only calculates the optimal size of the WORK array, returns
|
||
|
C> this value as the first entry of the WORK array, and no error
|
||
|
C> message related to LWORK is issued by XERBLA.
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
C> \param[out] INFO
|
||
|
C> \verbatim
|
||
|
C> INFO is INTEGER
|
||
|
C> = 0: successful exit
|
||
|
C> < 0: if INFO = -i, the i-th argument had an illegal value
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
*
|
||
|
* Authors:
|
||
|
* ========
|
||
|
*
|
||
|
C> \author Univ. of Tennessee
|
||
|
C> \author Univ. of California Berkeley
|
||
|
C> \author Univ. of Colorado Denver
|
||
|
C> \author NAG Ltd.
|
||
|
*
|
||
|
C> \date December 2016
|
||
|
*
|
||
|
C> \ingroup variantsGEcomputational
|
||
|
*
|
||
|
* Further Details
|
||
|
* ===============
|
||
|
C>\details \b Further \b Details
|
||
|
C> \verbatim
|
||
|
C>
|
||
|
C> The matrix Q is represented as a product of elementary reflectors
|
||
|
C>
|
||
|
C> Q = H(1) H(2) . . . H(k), where k = min(m,n).
|
||
|
C>
|
||
|
C> Each H(i) has the form
|
||
|
C>
|
||
|
C> H(i) = I - tau * v * v'
|
||
|
C>
|
||
|
C> where tau is a real scalar, and v is a real vector with
|
||
|
C> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
|
||
|
C> and tau in TAU(i).
|
||
|
C>
|
||
|
C> \endverbatim
|
||
|
C>
|
||
|
* =====================================================================
|
||
|
SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
|
||
|
*
|
||
|
* -- LAPACK computational routine --
|
||
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||
|
*
|
||
|
* .. Scalar Arguments ..
|
||
|
INTEGER INFO, LDA, LWORK, M, N
|
||
|
* ..
|
||
|
* .. Array Arguments ..
|
||
|
COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * )
|
||
|
* ..
|
||
|
*
|
||
|
* =====================================================================
|
||
|
*
|
||
|
* .. Local Scalars ..
|
||
|
LOGICAL LQUERY
|
||
|
INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB,
|
||
|
$ NBMIN, NX, LBWORK, NT, LLWORK
|
||
|
* ..
|
||
|
* .. External Subroutines ..
|
||
|
EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA
|
||
|
* ..
|
||
|
* .. Intrinsic Functions ..
|
||
|
INTRINSIC CEILING, MAX, MIN, REAL
|
||
|
* ..
|
||
|
* .. External Functions ..
|
||
|
INTEGER ILAENV
|
||
|
EXTERNAL ILAENV
|
||
|
* ..
|
||
|
* .. Executable Statements ..
|
||
|
|
||
|
INFO = 0
|
||
|
NBMIN = 2
|
||
|
NX = 0
|
||
|
IWS = N
|
||
|
K = MIN( M, N )
|
||
|
NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
|
||
|
|
||
|
IF( NB.GT.1 .AND. NB.LT.K ) THEN
|
||
|
*
|
||
|
* Determine when to cross over from blocked to unblocked code.
|
||
|
*
|
||
|
NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) )
|
||
|
END IF
|
||
|
*
|
||
|
* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
|
||
|
*
|
||
|
* NB=3 2NB=6 K=10
|
||
|
* | | |
|
||
|
* 1--2--3--4--5--6--7--8--9--10
|
||
|
* | \________/
|
||
|
* K-NX=5 NT=4
|
||
|
*
|
||
|
* So here 4 x 4 is the last T stored in the workspace
|
||
|
*
|
||
|
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||
|
|
||
|
*
|
||
|
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||
|
*
|
||
|
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||
|
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||
|
|
||
|
IF( K.EQ.0 ) THEN
|
||
|
|
||
|
LBWORK = 0
|
||
|
LWKOPT = 1
|
||
|
WORK( 1 ) = LWKOPT
|
||
|
|
||
|
ELSE IF ( NT.GT.NB ) THEN
|
||
|
|
||
|
LBWORK = K-NT
|
||
|
*
|
||
|
* Optimal workspace for dlarfb = MAX(1,N)*NT
|
||
|
*
|
||
|
LWKOPT = (LBWORK+LLWORK)*NB
|
||
|
WORK( 1 ) = (LWKOPT+NT*NT)
|
||
|
|
||
|
ELSE
|
||
|
|
||
|
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||
|
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||
|
WORK( 1 ) = LWKOPT
|
||
|
|
||
|
END IF
|
||
|
|
||
|
*
|
||
|
* Test the input arguments
|
||
|
*
|
||
|
LQUERY = ( LWORK.EQ.-1 )
|
||
|
IF( M.LT.0 ) THEN
|
||
|
INFO = -1
|
||
|
ELSE IF( N.LT.0 ) THEN
|
||
|
INFO = -2
|
||
|
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
|
||
|
INFO = -4
|
||
|
ELSE IF ( .NOT.LQUERY ) THEN
|
||
|
IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) )
|
||
|
$ INFO = -7
|
||
|
END IF
|
||
|
IF( INFO.NE.0 ) THEN
|
||
|
CALL XERBLA( 'ZGEQRF', -INFO )
|
||
|
RETURN
|
||
|
ELSE IF( LQUERY ) THEN
|
||
|
RETURN
|
||
|
END IF
|
||
|
*
|
||
|
* Quick return if possible
|
||
|
*
|
||
|
IF( K.EQ.0 ) THEN
|
||
|
RETURN
|
||
|
END IF
|
||
|
*
|
||
|
IF( NB.GT.1 .AND. NB.LT.K ) THEN
|
||
|
|
||
|
IF( NX.LT.K ) THEN
|
||
|
*
|
||
|
* Determine if workspace is large enough for blocked code.
|
||
|
*
|
||
|
IF ( NT.LE.NB ) THEN
|
||
|
IWS = (LBWORK+LLWORK-NB)*NB
|
||
|
ELSE
|
||
|
IWS = (LBWORK+LLWORK)*NB+NT*NT
|
||
|
END IF
|
||
|
|
||
|
IF( LWORK.LT.IWS ) THEN
|
||
|
*
|
||
|
* Not enough workspace to use optimal NB: reduce NB and
|
||
|
* determine the minimum value of NB.
|
||
|
*
|
||
|
IF ( NT.LE.NB ) THEN
|
||
|
NB = LWORK / (LLWORK+(LBWORK-NB))
|
||
|
ELSE
|
||
|
NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
|
||
|
END IF
|
||
|
|
||
|
NBMIN = MAX( 2, ILAENV( 2, 'ZGEQRF', ' ', M, N, -1,
|
||
|
$ -1 ) )
|
||
|
END IF
|
||
|
END IF
|
||
|
END IF
|
||
|
*
|
||
|
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
|
||
|
*
|
||
|
* Use blocked code initially
|
||
|
*
|
||
|
DO 10 I = 1, K - NX, NB
|
||
|
IB = MIN( K-I+1, NB )
|
||
|
*
|
||
|
* Update the current column using old T's
|
||
|
*
|
||
|
DO 20 J = 1, I - NB, NB
|
||
|
*
|
||
|
* Apply H' to A(J:M,I:I+IB-1) from the left
|
||
|
*
|
||
|
CALL ZLARFB( 'Left', 'Transpose', 'Forward',
|
||
|
$ 'Columnwise', M-J+1, IB, NB,
|
||
|
$ A( J, J ), LDA, WORK(J), LBWORK,
|
||
|
$ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
|
||
|
$ IB)
|
||
|
|
||
|
20 CONTINUE
|
||
|
*
|
||
|
* Compute the QR factorization of the current block
|
||
|
* A(I:M,I:I+IB-1)
|
||
|
*
|
||
|
CALL ZGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ),
|
||
|
$ WORK(LBWORK*NB+NT*NT+1), IINFO )
|
||
|
|
||
|
IF( I+IB.LE.N ) THEN
|
||
|
*
|
||
|
* Form the triangular factor of the block reflector
|
||
|
* H = H(i) H(i+1) . . . H(i+ib-1)
|
||
|
*
|
||
|
CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB,
|
||
|
$ A( I, I ), LDA, TAU( I ),
|
||
|
$ WORK(I), LBWORK )
|
||
|
*
|
||
|
END IF
|
||
|
10 CONTINUE
|
||
|
ELSE
|
||
|
I = 1
|
||
|
END IF
|
||
|
*
|
||
|
* Use unblocked code to factor the last or only block.
|
||
|
*
|
||
|
IF( I.LE.K ) THEN
|
||
|
|
||
|
IF ( I .NE. 1 ) THEN
|
||
|
|
||
|
DO 30 J = 1, I - NB, NB
|
||
|
*
|
||
|
* Apply H' to A(J:M,I:K) from the left
|
||
|
*
|
||
|
CALL ZLARFB( 'Left', 'Transpose', 'Forward',
|
||
|
$ 'Columnwise', M-J+1, K-I+1, NB,
|
||
|
$ A( J, J ), LDA, WORK(J), LBWORK,
|
||
|
$ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
|
||
|
$ K-I+1)
|
||
|
30 CONTINUE
|
||
|
|
||
|
CALL ZGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ),
|
||
|
$ WORK(LBWORK*NB+NT*NT+1),IINFO )
|
||
|
|
||
|
ELSE
|
||
|
*
|
||
|
* Use unblocked code to factor the last or only block.
|
||
|
*
|
||
|
CALL ZGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ),
|
||
|
$ WORK,IINFO )
|
||
|
|
||
|
END IF
|
||
|
END IF
|
||
|
|
||
|
|
||
|
*
|
||
|
* Apply update to the column M+1:N when N > M
|
||
|
*
|
||
|
IF ( M.LT.N .AND. I.NE.1) THEN
|
||
|
*
|
||
|
* Form the last triangular factor of the block reflector
|
||
|
* H = H(i) H(i+1) . . . H(i+ib-1)
|
||
|
*
|
||
|
IF ( NT .LE. NB ) THEN
|
||
|
CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
|
||
|
$ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
|
||
|
ELSE
|
||
|
CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
|
||
|
$ A( I, I ), LDA, TAU( I ),
|
||
|
$ WORK(LBWORK*NB+1), NT )
|
||
|
END IF
|
||
|
|
||
|
*
|
||
|
* Apply H' to A(1:M,M+1:N) from the left
|
||
|
*
|
||
|
DO 40 J = 1, K-NX, NB
|
||
|
|
||
|
IB = MIN( K-J+1, NB )
|
||
|
|
||
|
CALL ZLARFB( 'Left', 'Transpose', 'Forward',
|
||
|
$ 'Columnwise', M-J+1, N-M, IB,
|
||
|
$ A( J, J ), LDA, WORK(J), LBWORK,
|
||
|
$ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
|
||
|
$ N-M)
|
||
|
|
||
|
40 CONTINUE
|
||
|
|
||
|
IF ( NT.LE.NB ) THEN
|
||
|
CALL ZLARFB( 'Left', 'Transpose', 'Forward',
|
||
|
$ 'Columnwise', M-J+1, N-M, K-J+1,
|
||
|
$ A( J, J ), LDA, WORK(J), LBWORK,
|
||
|
$ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
|
||
|
$ N-M)
|
||
|
ELSE
|
||
|
CALL ZLARFB( 'Left', 'Transpose', 'Forward',
|
||
|
$ 'Columnwise', M-J+1, N-M, K-J+1,
|
||
|
$ A( J, J ), LDA,
|
||
|
$ WORK(LBWORK*NB+1),
|
||
|
$ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
|
||
|
$ N-M)
|
||
|
END IF
|
||
|
|
||
|
END IF
|
||
|
|
||
|
WORK( 1 ) = IWS
|
||
|
RETURN
|
||
|
*
|
||
|
* End of ZGEQRF
|
||
|
*
|
||
|
END
|