You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
402 lines
14 KiB
402 lines
14 KiB
2 years ago
|
*> \brief <b> SSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b>
|
||
|
*
|
||
|
* @generated from dsyevd_2stage.f, fortran d -> s, Sat Nov 5 23:55:54 2016
|
||
|
*
|
||
|
* =========== DOCUMENTATION ===========
|
||
|
*
|
||
|
* Online html documentation available at
|
||
|
* http://www.netlib.org/lapack/explore-html/
|
||
|
*
|
||
|
*> \htmlonly
|
||
|
*> Download SSYEVD_2STAGE + dependencies
|
||
|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssyevd_2stage.f">
|
||
|
*> [TGZ]</a>
|
||
|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssyevd_2stage.f">
|
||
|
*> [ZIP]</a>
|
||
|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssyevd_2stage.f">
|
||
|
*> [TXT]</a>
|
||
|
*> \endhtmlonly
|
||
|
*
|
||
|
* Definition:
|
||
|
* ===========
|
||
|
*
|
||
|
* SUBROUTINE SSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
|
||
|
* IWORK, LIWORK, INFO )
|
||
|
*
|
||
|
* IMPLICIT NONE
|
||
|
*
|
||
|
* .. Scalar Arguments ..
|
||
|
* CHARACTER JOBZ, UPLO
|
||
|
* INTEGER INFO, LDA, LIWORK, LWORK, N
|
||
|
* ..
|
||
|
* .. Array Arguments ..
|
||
|
* INTEGER IWORK( * )
|
||
|
* REAL A( LDA, * ), W( * ), WORK( * )
|
||
|
* ..
|
||
|
*
|
||
|
*
|
||
|
*> \par Purpose:
|
||
|
* =============
|
||
|
*>
|
||
|
*> \verbatim
|
||
|
*>
|
||
|
*> SSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
|
||
|
*> real symmetric matrix A using the 2stage technique for
|
||
|
*> the reduction to tridiagonal. If eigenvectors are desired, it uses a
|
||
|
*> divide and conquer algorithm.
|
||
|
*>
|
||
|
*> \endverbatim
|
||
|
*
|
||
|
* Arguments:
|
||
|
* ==========
|
||
|
*
|
||
|
*> \param[in] JOBZ
|
||
|
*> \verbatim
|
||
|
*> JOBZ is CHARACTER*1
|
||
|
*> = 'N': Compute eigenvalues only;
|
||
|
*> = 'V': Compute eigenvalues and eigenvectors.
|
||
|
*> Not available in this release.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] UPLO
|
||
|
*> \verbatim
|
||
|
*> UPLO is CHARACTER*1
|
||
|
*> = 'U': Upper triangle of A is stored;
|
||
|
*> = 'L': Lower triangle of A is stored.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] N
|
||
|
*> \verbatim
|
||
|
*> N is INTEGER
|
||
|
*> The order of the matrix A. N >= 0.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in,out] A
|
||
|
*> \verbatim
|
||
|
*> A is REAL array, dimension (LDA, N)
|
||
|
*> On entry, the symmetric matrix A. If UPLO = 'U', the
|
||
|
*> leading N-by-N upper triangular part of A contains the
|
||
|
*> upper triangular part of the matrix A. If UPLO = 'L',
|
||
|
*> the leading N-by-N lower triangular part of A contains
|
||
|
*> the lower triangular part of the matrix A.
|
||
|
*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
|
||
|
*> orthonormal eigenvectors of the matrix A.
|
||
|
*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
|
||
|
*> or the upper triangle (if UPLO='U') of A, including the
|
||
|
*> diagonal, is destroyed.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] LDA
|
||
|
*> \verbatim
|
||
|
*> LDA is INTEGER
|
||
|
*> The leading dimension of the array A. LDA >= max(1,N).
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] W
|
||
|
*> \verbatim
|
||
|
*> W is REAL array, dimension (N)
|
||
|
*> If INFO = 0, the eigenvalues in ascending order.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] WORK
|
||
|
*> \verbatim
|
||
|
*> WORK is REAL array,
|
||
|
*> dimension (LWORK)
|
||
|
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] LWORK
|
||
|
*> \verbatim
|
||
|
*> LWORK is INTEGER
|
||
|
*> The dimension of the array WORK.
|
||
|
*> If N <= 1, LWORK must be at least 1.
|
||
|
*> If JOBZ = 'N' and N > 1, LWORK must be queried.
|
||
|
*> LWORK = MAX(1, dimension) where
|
||
|
*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1
|
||
|
*> = N*KD + N*max(KD+1,FACTOPTNB)
|
||
|
*> + max(2*KD*KD, KD*NTHREADS)
|
||
|
*> + (KD+1)*N + 2*N+1
|
||
|
*> where KD is the blocking size of the reduction,
|
||
|
*> FACTOPTNB is the blocking used by the QR or LQ
|
||
|
*> algorithm, usually FACTOPTNB=128 is a good choice
|
||
|
*> NTHREADS is the number of threads used when
|
||
|
*> openMP compilation is enabled, otherwise =1.
|
||
|
*> If JOBZ = 'V' and N > 1, LWORK must be at least
|
||
|
*> 1 + 6*N + 2*N**2.
|
||
|
*>
|
||
|
*> If LWORK = -1, then a workspace query is assumed; the routine
|
||
|
*> only calculates the optimal sizes of the WORK and IWORK
|
||
|
*> arrays, returns these values as the first entries of the WORK
|
||
|
*> and IWORK arrays, and no error message related to LWORK or
|
||
|
*> LIWORK is issued by XERBLA.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] IWORK
|
||
|
*> \verbatim
|
||
|
*> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
|
||
|
*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] LIWORK
|
||
|
*> \verbatim
|
||
|
*> LIWORK is INTEGER
|
||
|
*> The dimension of the array IWORK.
|
||
|
*> If N <= 1, LIWORK must be at least 1.
|
||
|
*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1.
|
||
|
*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
|
||
|
*>
|
||
|
*> If LIWORK = -1, then a workspace query is assumed; the
|
||
|
*> routine only calculates the optimal sizes of the WORK and
|
||
|
*> IWORK arrays, returns these values as the first entries of
|
||
|
*> the WORK and IWORK arrays, and no error message related to
|
||
|
*> LWORK or LIWORK is issued by XERBLA.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] INFO
|
||
|
*> \verbatim
|
||
|
*> INFO is INTEGER
|
||
|
*> = 0: successful exit
|
||
|
*> < 0: if INFO = -i, the i-th argument had an illegal value
|
||
|
*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed
|
||
|
*> to converge; i off-diagonal elements of an intermediate
|
||
|
*> tridiagonal form did not converge to zero;
|
||
|
*> if INFO = i and JOBZ = 'V', then the algorithm failed
|
||
|
*> to compute an eigenvalue while working on the submatrix
|
||
|
*> lying in rows and columns INFO/(N+1) through
|
||
|
*> mod(INFO,N+1).
|
||
|
*> \endverbatim
|
||
|
*
|
||
|
* Authors:
|
||
|
* ========
|
||
|
*
|
||
|
*> \author Univ. of Tennessee
|
||
|
*> \author Univ. of California Berkeley
|
||
|
*> \author Univ. of Colorado Denver
|
||
|
*> \author NAG Ltd.
|
||
|
*
|
||
|
*> \ingroup realSYeigen
|
||
|
*
|
||
|
*> \par Contributors:
|
||
|
* ==================
|
||
|
*>
|
||
|
*> Jeff Rutter, Computer Science Division, University of California
|
||
|
*> at Berkeley, USA \n
|
||
|
*> Modified by Francoise Tisseur, University of Tennessee \n
|
||
|
*> Modified description of INFO. Sven, 16 Feb 05. \n
|
||
|
*> \par Further Details:
|
||
|
* =====================
|
||
|
*>
|
||
|
*> \verbatim
|
||
|
*>
|
||
|
*> All details about the 2stage techniques are available in:
|
||
|
*>
|
||
|
*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
|
||
|
*> Parallel reduction to condensed forms for symmetric eigenvalue problems
|
||
|
*> using aggregated fine-grained and memory-aware kernels. In Proceedings
|
||
|
*> of 2011 International Conference for High Performance Computing,
|
||
|
*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
|
||
|
*> Article 8 , 11 pages.
|
||
|
*> http://doi.acm.org/10.1145/2063384.2063394
|
||
|
*>
|
||
|
*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
|
||
|
*> An improved parallel singular value algorithm and its implementation
|
||
|
*> for multicore hardware, In Proceedings of 2013 International Conference
|
||
|
*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
|
||
|
*> Denver, Colorado, USA, 2013.
|
||
|
*> Article 90, 12 pages.
|
||
|
*> http://doi.acm.org/10.1145/2503210.2503292
|
||
|
*>
|
||
|
*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
|
||
|
*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
|
||
|
*> calculations based on fine-grained memory aware tasks.
|
||
|
*> International Journal of High Performance Computing Applications.
|
||
|
*> Volume 28 Issue 2, Pages 196-209, May 2014.
|
||
|
*> http://hpc.sagepub.com/content/28/2/196
|
||
|
*>
|
||
|
*> \endverbatim
|
||
|
*
|
||
|
* =====================================================================
|
||
|
SUBROUTINE SSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
|
||
|
$ IWORK, LIWORK, INFO )
|
||
|
*
|
||
|
IMPLICIT NONE
|
||
|
*
|
||
|
* -- LAPACK driver routine --
|
||
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||
|
*
|
||
|
* .. Scalar Arguments ..
|
||
|
CHARACTER JOBZ, UPLO
|
||
|
INTEGER INFO, LDA, LIWORK, LWORK, N
|
||
|
* ..
|
||
|
* .. Array Arguments ..
|
||
|
INTEGER IWORK( * )
|
||
|
REAL A( LDA, * ), W( * ), WORK( * )
|
||
|
* ..
|
||
|
*
|
||
|
* =====================================================================
|
||
|
*
|
||
|
* .. Parameters ..
|
||
|
REAL ZERO, ONE
|
||
|
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
|
||
|
* ..
|
||
|
* .. Local Scalars ..
|
||
|
*
|
||
|
LOGICAL LOWER, LQUERY, WANTZ
|
||
|
INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE,
|
||
|
$ LIWMIN, LLWORK, LLWRK2, LWMIN,
|
||
|
$ LHTRD, LWTRD, KD, IB, INDHOUS
|
||
|
REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
|
||
|
$ SMLNUM
|
||
|
* ..
|
||
|
* .. External Functions ..
|
||
|
LOGICAL LSAME
|
||
|
INTEGER ILAENV2STAGE
|
||
|
REAL SLAMCH, SLANSY
|
||
|
EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE
|
||
|
* ..
|
||
|
* .. External Subroutines ..
|
||
|
EXTERNAL SLACPY, SLASCL, SORMTR, SSCAL, SSTEDC, SSTERF,
|
||
|
$ SSYTRD_2STAGE, XERBLA
|
||
|
* ..
|
||
|
* .. Intrinsic Functions ..
|
||
|
INTRINSIC MAX, SQRT
|
||
|
* ..
|
||
|
* .. Executable Statements ..
|
||
|
*
|
||
|
* Test the input parameters.
|
||
|
*
|
||
|
WANTZ = LSAME( JOBZ, 'V' )
|
||
|
LOWER = LSAME( UPLO, 'L' )
|
||
|
LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
|
||
|
*
|
||
|
INFO = 0
|
||
|
IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
|
||
|
INFO = -1
|
||
|
ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
|
||
|
INFO = -2
|
||
|
ELSE IF( N.LT.0 ) THEN
|
||
|
INFO = -3
|
||
|
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
||
|
INFO = -5
|
||
|
END IF
|
||
|
*
|
||
|
IF( INFO.EQ.0 ) THEN
|
||
|
IF( N.LE.1 ) THEN
|
||
|
LIWMIN = 1
|
||
|
LWMIN = 1
|
||
|
ELSE
|
||
|
KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ,
|
||
|
$ N, -1, -1, -1 )
|
||
|
IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ,
|
||
|
$ N, KD, -1, -1 )
|
||
|
LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ,
|
||
|
$ N, KD, IB, -1 )
|
||
|
LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ,
|
||
|
$ N, KD, IB, -1 )
|
||
|
IF( WANTZ ) THEN
|
||
|
LIWMIN = 3 + 5*N
|
||
|
LWMIN = 1 + 6*N + 2*N**2
|
||
|
ELSE
|
||
|
LIWMIN = 1
|
||
|
LWMIN = 2*N + 1 + LHTRD + LWTRD
|
||
|
END IF
|
||
|
END IF
|
||
|
WORK( 1 ) = LWMIN
|
||
|
IWORK( 1 ) = LIWMIN
|
||
|
*
|
||
|
IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
|
||
|
INFO = -8
|
||
|
ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
|
||
|
INFO = -10
|
||
|
END IF
|
||
|
END IF
|
||
|
*
|
||
|
IF( INFO.NE.0 ) THEN
|
||
|
CALL XERBLA( 'SSYEVD_2STAGE', -INFO )
|
||
|
RETURN
|
||
|
ELSE IF( LQUERY ) THEN
|
||
|
RETURN
|
||
|
END IF
|
||
|
*
|
||
|
* Quick return if possible
|
||
|
*
|
||
|
IF( N.EQ.0 )
|
||
|
$ RETURN
|
||
|
*
|
||
|
IF( N.EQ.1 ) THEN
|
||
|
W( 1 ) = A( 1, 1 )
|
||
|
IF( WANTZ )
|
||
|
$ A( 1, 1 ) = ONE
|
||
|
RETURN
|
||
|
END IF
|
||
|
*
|
||
|
* Get machine constants.
|
||
|
*
|
||
|
SAFMIN = SLAMCH( 'Safe minimum' )
|
||
|
EPS = SLAMCH( 'Precision' )
|
||
|
SMLNUM = SAFMIN / EPS
|
||
|
BIGNUM = ONE / SMLNUM
|
||
|
RMIN = SQRT( SMLNUM )
|
||
|
RMAX = SQRT( BIGNUM )
|
||
|
*
|
||
|
* Scale matrix to allowable range, if necessary.
|
||
|
*
|
||
|
ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK )
|
||
|
ISCALE = 0
|
||
|
IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
|
||
|
ISCALE = 1
|
||
|
SIGMA = RMIN / ANRM
|
||
|
ELSE IF( ANRM.GT.RMAX ) THEN
|
||
|
ISCALE = 1
|
||
|
SIGMA = RMAX / ANRM
|
||
|
END IF
|
||
|
IF( ISCALE.EQ.1 )
|
||
|
$ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
|
||
|
*
|
||
|
* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form.
|
||
|
*
|
||
|
INDE = 1
|
||
|
INDTAU = INDE + N
|
||
|
INDHOUS = INDTAU + N
|
||
|
INDWRK = INDHOUS + LHTRD
|
||
|
LLWORK = LWORK - INDWRK + 1
|
||
|
INDWK2 = INDWRK + N*N
|
||
|
LLWRK2 = LWORK - INDWK2 + 1
|
||
|
*
|
||
|
CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ),
|
||
|
$ WORK( INDTAU ), WORK( INDHOUS ), LHTRD,
|
||
|
$ WORK( INDWRK ), LLWORK, IINFO )
|
||
|
*
|
||
|
* For eigenvalues only, call SSTERF. For eigenvectors, first call
|
||
|
* SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
|
||
|
* tridiagonal matrix, then call SORMTR to multiply it by the
|
||
|
* Householder transformations stored in A.
|
||
|
*
|
||
|
IF( .NOT.WANTZ ) THEN
|
||
|
CALL SSTERF( N, W, WORK( INDE ), INFO )
|
||
|
ELSE
|
||
|
* Not available in this release, and argument checking should not
|
||
|
* let it getting here
|
||
|
RETURN
|
||
|
CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
|
||
|
$ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
|
||
|
CALL SORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
|
||
|
$ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
|
||
|
CALL SLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
|
||
|
END IF
|
||
|
*
|
||
|
* If matrix was scaled, then rescale eigenvalues appropriately.
|
||
|
*
|
||
|
IF( ISCALE.EQ.1 )
|
||
|
$ CALL SSCAL( N, ONE / SIGMA, W, 1 )
|
||
|
*
|
||
|
WORK( 1 ) = LWMIN
|
||
|
IWORK( 1 ) = LIWMIN
|
||
|
*
|
||
|
RETURN
|
||
|
*
|
||
|
* End of SSYEVD_2STAGE
|
||
|
*
|
||
|
END
|