You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
554 lines
18 KiB
554 lines
18 KiB
2 years ago
|
*> \brief \b DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T
|
||
|
*
|
||
|
* =========== DOCUMENTATION ===========
|
||
|
*
|
||
|
* Online html documentation available at
|
||
|
* http://www.netlib.org/lapack/explore-html/
|
||
|
*
|
||
|
*> \htmlonly
|
||
|
*> Download DSYTRD_SB2ST + dependencies
|
||
|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd_sb2st.f">
|
||
|
*> [TGZ]</a>
|
||
|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd_sb2st.f">
|
||
|
*> [ZIP]</a>
|
||
|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd_sb2st.f">
|
||
|
*> [TXT]</a>
|
||
|
*> \endhtmlonly
|
||
|
*
|
||
|
* Definition:
|
||
|
* ===========
|
||
|
*
|
||
|
* SUBROUTINE DSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
|
||
|
* D, E, HOUS, LHOUS, WORK, LWORK, INFO )
|
||
|
*
|
||
|
* #if defined(_OPENMP)
|
||
|
* use omp_lib
|
||
|
* #endif
|
||
|
*
|
||
|
* IMPLICIT NONE
|
||
|
*
|
||
|
* .. Scalar Arguments ..
|
||
|
* CHARACTER STAGE1, UPLO, VECT
|
||
|
* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO
|
||
|
* ..
|
||
|
* .. Array Arguments ..
|
||
|
* DOUBLE PRECISION D( * ), E( * )
|
||
|
* DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * )
|
||
|
* ..
|
||
|
*
|
||
|
*
|
||
|
*> \par Purpose:
|
||
|
* =============
|
||
|
*>
|
||
|
*> \verbatim
|
||
|
*>
|
||
|
*> DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric
|
||
|
*> tridiagonal form T by a orthogonal similarity transformation:
|
||
|
*> Q**T * A * Q = T.
|
||
|
*> \endverbatim
|
||
|
*
|
||
|
* Arguments:
|
||
|
* ==========
|
||
|
*
|
||
|
*> \param[in] STAGE1
|
||
|
*> \verbatim
|
||
|
*> STAGE1 is CHARACTER*1
|
||
|
*> = 'N': "No": to mention that the stage 1 of the reduction
|
||
|
*> from dense to band using the dsytrd_sy2sb routine
|
||
|
*> was not called before this routine to reproduce AB.
|
||
|
*> In other term this routine is called as standalone.
|
||
|
*> = 'Y': "Yes": to mention that the stage 1 of the
|
||
|
*> reduction from dense to band using the dsytrd_sy2sb
|
||
|
*> routine has been called to produce AB (e.g., AB is
|
||
|
*> the output of dsytrd_sy2sb.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] VECT
|
||
|
*> \verbatim
|
||
|
*> VECT is CHARACTER*1
|
||
|
*> = 'N': No need for the Housholder representation,
|
||
|
*> and thus LHOUS is of size max(1, 4*N);
|
||
|
*> = 'V': the Householder representation is needed to
|
||
|
*> either generate or to apply Q later on,
|
||
|
*> then LHOUS is to be queried and computed.
|
||
|
*> (NOT AVAILABLE IN THIS RELEASE).
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] UPLO
|
||
|
*> \verbatim
|
||
|
*> UPLO is CHARACTER*1
|
||
|
*> = 'U': Upper triangle of A is stored;
|
||
|
*> = 'L': Lower triangle of A is stored.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] N
|
||
|
*> \verbatim
|
||
|
*> N is INTEGER
|
||
|
*> The order of the matrix A. N >= 0.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] KD
|
||
|
*> \verbatim
|
||
|
*> KD is INTEGER
|
||
|
*> The number of superdiagonals of the matrix A if UPLO = 'U',
|
||
|
*> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in,out] AB
|
||
|
*> \verbatim
|
||
|
*> AB is DOUBLE PRECISION array, dimension (LDAB,N)
|
||
|
*> On entry, the upper or lower triangle of the symmetric band
|
||
|
*> matrix A, stored in the first KD+1 rows of the array. The
|
||
|
*> j-th column of A is stored in the j-th column of the array AB
|
||
|
*> as follows:
|
||
|
*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
|
||
|
*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
|
||
|
*> On exit, the diagonal elements of AB are overwritten by the
|
||
|
*> diagonal elements of the tridiagonal matrix T; if KD > 0, the
|
||
|
*> elements on the first superdiagonal (if UPLO = 'U') or the
|
||
|
*> first subdiagonal (if UPLO = 'L') are overwritten by the
|
||
|
*> off-diagonal elements of T; the rest of AB is overwritten by
|
||
|
*> values generated during the reduction.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] LDAB
|
||
|
*> \verbatim
|
||
|
*> LDAB is INTEGER
|
||
|
*> The leading dimension of the array AB. LDAB >= KD+1.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] D
|
||
|
*> \verbatim
|
||
|
*> D is DOUBLE PRECISION array, dimension (N)
|
||
|
*> The diagonal elements of the tridiagonal matrix T.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] E
|
||
|
*> \verbatim
|
||
|
*> E is DOUBLE PRECISION array, dimension (N-1)
|
||
|
*> The off-diagonal elements of the tridiagonal matrix T:
|
||
|
*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] HOUS
|
||
|
*> \verbatim
|
||
|
*> HOUS is DOUBLE PRECISION array, dimension LHOUS, that
|
||
|
*> store the Householder representation.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] LHOUS
|
||
|
*> \verbatim
|
||
|
*> LHOUS is INTEGER
|
||
|
*> The dimension of the array HOUS. LHOUS = MAX(1, dimension)
|
||
|
*> If LWORK = -1, or LHOUS=-1,
|
||
|
*> then a query is assumed; the routine
|
||
|
*> only calculates the optimal size of the HOUS array, returns
|
||
|
*> this value as the first entry of the HOUS array, and no error
|
||
|
*> message related to LHOUS is issued by XERBLA.
|
||
|
*> LHOUS = MAX(1, dimension) where
|
||
|
*> dimension = 4*N if VECT='N'
|
||
|
*> not available now if VECT='H'
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] WORK
|
||
|
*> \verbatim
|
||
|
*> WORK is DOUBLE PRECISION array, dimension LWORK.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[in] LWORK
|
||
|
*> \verbatim
|
||
|
*> LWORK is INTEGER
|
||
|
*> The dimension of the array WORK. LWORK = MAX(1, dimension)
|
||
|
*> If LWORK = -1, or LHOUS=-1,
|
||
|
*> then a workspace query is assumed; the routine
|
||
|
*> only calculates the optimal size of the WORK array, returns
|
||
|
*> this value as the first entry of the WORK array, and no error
|
||
|
*> message related to LWORK is issued by XERBLA.
|
||
|
*> LWORK = MAX(1, dimension) where
|
||
|
*> dimension = (2KD+1)*N + KD*NTHREADS
|
||
|
*> where KD is the blocking size of the reduction,
|
||
|
*> FACTOPTNB is the blocking used by the QR or LQ
|
||
|
*> algorithm, usually FACTOPTNB=128 is a good choice
|
||
|
*> NTHREADS is the number of threads used when
|
||
|
*> openMP compilation is enabled, otherwise =1.
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
*> \param[out] INFO
|
||
|
*> \verbatim
|
||
|
*> INFO is INTEGER
|
||
|
*> = 0: successful exit
|
||
|
*> < 0: if INFO = -i, the i-th argument had an illegal value
|
||
|
*> \endverbatim
|
||
|
*
|
||
|
* Authors:
|
||
|
* ========
|
||
|
*
|
||
|
*> \author Univ. of Tennessee
|
||
|
*> \author Univ. of California Berkeley
|
||
|
*> \author Univ. of Colorado Denver
|
||
|
*> \author NAG Ltd.
|
||
|
*
|
||
|
*> \ingroup real16OTHERcomputational
|
||
|
*
|
||
|
*> \par Further Details:
|
||
|
* =====================
|
||
|
*>
|
||
|
*> \verbatim
|
||
|
*>
|
||
|
*> Implemented by Azzam Haidar.
|
||
|
*>
|
||
|
*> All details are available on technical report, SC11, SC13 papers.
|
||
|
*>
|
||
|
*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
|
||
|
*> Parallel reduction to condensed forms for symmetric eigenvalue problems
|
||
|
*> using aggregated fine-grained and memory-aware kernels. In Proceedings
|
||
|
*> of 2011 International Conference for High Performance Computing,
|
||
|
*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
|
||
|
*> Article 8 , 11 pages.
|
||
|
*> http://doi.acm.org/10.1145/2063384.2063394
|
||
|
*>
|
||
|
*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
|
||
|
*> An improved parallel singular value algorithm and its implementation
|
||
|
*> for multicore hardware, In Proceedings of 2013 International Conference
|
||
|
*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
|
||
|
*> Denver, Colorado, USA, 2013.
|
||
|
*> Article 90, 12 pages.
|
||
|
*> http://doi.acm.org/10.1145/2503210.2503292
|
||
|
*>
|
||
|
*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
|
||
|
*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
|
||
|
*> calculations based on fine-grained memory aware tasks.
|
||
|
*> International Journal of High Performance Computing Applications.
|
||
|
*> Volume 28 Issue 2, Pages 196-209, May 2014.
|
||
|
*> http://hpc.sagepub.com/content/28/2/196
|
||
|
*>
|
||
|
*> \endverbatim
|
||
|
*>
|
||
|
* =====================================================================
|
||
|
SUBROUTINE DSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
|
||
|
$ D, E, HOUS, LHOUS, WORK, LWORK, INFO )
|
||
|
*
|
||
|
#if defined(_OPENMP)
|
||
|
use omp_lib
|
||
|
#endif
|
||
|
*
|
||
|
IMPLICIT NONE
|
||
|
*
|
||
|
* -- LAPACK computational routine --
|
||
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||
|
*
|
||
|
* .. Scalar Arguments ..
|
||
|
CHARACTER STAGE1, UPLO, VECT
|
||
|
INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
|
||
|
* ..
|
||
|
* .. Array Arguments ..
|
||
|
DOUBLE PRECISION D( * ), E( * )
|
||
|
DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * )
|
||
|
* ..
|
||
|
*
|
||
|
* =====================================================================
|
||
|
*
|
||
|
* .. Parameters ..
|
||
|
DOUBLE PRECISION RZERO
|
||
|
DOUBLE PRECISION ZERO, ONE
|
||
|
PARAMETER ( RZERO = 0.0D+0,
|
||
|
$ ZERO = 0.0D+0,
|
||
|
$ ONE = 1.0D+0 )
|
||
|
* ..
|
||
|
* .. Local Scalars ..
|
||
|
LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
|
||
|
INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
|
||
|
$ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
|
||
|
$ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
|
||
|
$ NBTILES, TTYPE, TID, NTHREADS, DEBUG,
|
||
|
$ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
|
||
|
$ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
|
||
|
$ SIDEV, SIZETAU, LDV, LHMIN, LWMIN
|
||
|
* ..
|
||
|
* .. External Subroutines ..
|
||
|
EXTERNAL DSB2ST_KERNELS, DLACPY, DLASET, XERBLA
|
||
|
* ..
|
||
|
* .. Intrinsic Functions ..
|
||
|
INTRINSIC MIN, MAX, CEILING, REAL
|
||
|
* ..
|
||
|
* .. External Functions ..
|
||
|
LOGICAL LSAME
|
||
|
INTEGER ILAENV2STAGE
|
||
|
EXTERNAL LSAME, ILAENV2STAGE
|
||
|
* ..
|
||
|
* .. Executable Statements ..
|
||
|
*
|
||
|
* Determine the minimal workspace size required.
|
||
|
* Test the input parameters
|
||
|
*
|
||
|
DEBUG = 0
|
||
|
INFO = 0
|
||
|
AFTERS1 = LSAME( STAGE1, 'Y' )
|
||
|
WANTQ = LSAME( VECT, 'V' )
|
||
|
UPPER = LSAME( UPLO, 'U' )
|
||
|
LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 )
|
||
|
*
|
||
|
* Determine the block size, the workspace size and the hous size.
|
||
|
*
|
||
|
IB = ILAENV2STAGE( 2, 'DSYTRD_SB2ST', VECT, N, KD, -1, -1 )
|
||
|
LHMIN = ILAENV2STAGE( 3, 'DSYTRD_SB2ST', VECT, N, KD, IB, -1 )
|
||
|
LWMIN = ILAENV2STAGE( 4, 'DSYTRD_SB2ST', VECT, N, KD, IB, -1 )
|
||
|
*
|
||
|
IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN
|
||
|
INFO = -1
|
||
|
ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN
|
||
|
INFO = -2
|
||
|
ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
|
||
|
INFO = -3
|
||
|
ELSE IF( N.LT.0 ) THEN
|
||
|
INFO = -4
|
||
|
ELSE IF( KD.LT.0 ) THEN
|
||
|
INFO = -5
|
||
|
ELSE IF( LDAB.LT.(KD+1) ) THEN
|
||
|
INFO = -7
|
||
|
ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN
|
||
|
INFO = -11
|
||
|
ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
|
||
|
INFO = -13
|
||
|
END IF
|
||
|
*
|
||
|
IF( INFO.EQ.0 ) THEN
|
||
|
HOUS( 1 ) = LHMIN
|
||
|
WORK( 1 ) = LWMIN
|
||
|
END IF
|
||
|
*
|
||
|
IF( INFO.NE.0 ) THEN
|
||
|
CALL XERBLA( 'DSYTRD_SB2ST', -INFO )
|
||
|
RETURN
|
||
|
ELSE IF( LQUERY ) THEN
|
||
|
RETURN
|
||
|
END IF
|
||
|
*
|
||
|
* Quick return if possible
|
||
|
*
|
||
|
IF( N.EQ.0 ) THEN
|
||
|
HOUS( 1 ) = 1
|
||
|
WORK( 1 ) = 1
|
||
|
RETURN
|
||
|
END IF
|
||
|
*
|
||
|
* Determine pointer position
|
||
|
*
|
||
|
LDV = KD + IB
|
||
|
SIZETAU = 2 * N
|
||
|
SIDEV = 2 * N
|
||
|
INDTAU = 1
|
||
|
INDV = INDTAU + SIZETAU
|
||
|
LDA = 2 * KD + 1
|
||
|
SIZEA = LDA * N
|
||
|
INDA = 1
|
||
|
INDW = INDA + SIZEA
|
||
|
NTHREADS = 1
|
||
|
TID = 0
|
||
|
*
|
||
|
IF( UPPER ) THEN
|
||
|
APOS = INDA + KD
|
||
|
AWPOS = INDA
|
||
|
DPOS = APOS + KD
|
||
|
OFDPOS = DPOS - 1
|
||
|
ABDPOS = KD + 1
|
||
|
ABOFDPOS = KD
|
||
|
ELSE
|
||
|
APOS = INDA
|
||
|
AWPOS = INDA + KD + 1
|
||
|
DPOS = APOS
|
||
|
OFDPOS = DPOS + 1
|
||
|
ABDPOS = 1
|
||
|
ABOFDPOS = 2
|
||
|
|
||
|
ENDIF
|
||
|
*
|
||
|
* Case KD=0:
|
||
|
* The matrix is diagonal. We just copy it (convert to "real" for
|
||
|
* real because D is double and the imaginary part should be 0)
|
||
|
* and store it in D. A sequential code here is better or
|
||
|
* in a parallel environment it might need two cores for D and E
|
||
|
*
|
||
|
IF( KD.EQ.0 ) THEN
|
||
|
DO 30 I = 1, N
|
||
|
D( I ) = ( AB( ABDPOS, I ) )
|
||
|
30 CONTINUE
|
||
|
DO 40 I = 1, N-1
|
||
|
E( I ) = RZERO
|
||
|
40 CONTINUE
|
||
|
*
|
||
|
HOUS( 1 ) = 1
|
||
|
WORK( 1 ) = 1
|
||
|
RETURN
|
||
|
END IF
|
||
|
*
|
||
|
* Case KD=1:
|
||
|
* The matrix is already Tridiagonal. We have to make diagonal
|
||
|
* and offdiagonal elements real, and store them in D and E.
|
||
|
* For that, for real precision just copy the diag and offdiag
|
||
|
* to D and E while for the COMPLEX case the bulge chasing is
|
||
|
* performed to convert the hermetian tridiagonal to symmetric
|
||
|
* tridiagonal. A simpler conversion formula might be used, but then
|
||
|
* updating the Q matrix will be required and based if Q is generated
|
||
|
* or not this might complicate the story.
|
||
|
*
|
||
|
IF( KD.EQ.1 ) THEN
|
||
|
DO 50 I = 1, N
|
||
|
D( I ) = ( AB( ABDPOS, I ) )
|
||
|
50 CONTINUE
|
||
|
*
|
||
|
IF( UPPER ) THEN
|
||
|
DO 60 I = 1, N-1
|
||
|
E( I ) = ( AB( ABOFDPOS, I+1 ) )
|
||
|
60 CONTINUE
|
||
|
ELSE
|
||
|
DO 70 I = 1, N-1
|
||
|
E( I ) = ( AB( ABOFDPOS, I ) )
|
||
|
70 CONTINUE
|
||
|
ENDIF
|
||
|
*
|
||
|
HOUS( 1 ) = 1
|
||
|
WORK( 1 ) = 1
|
||
|
RETURN
|
||
|
END IF
|
||
|
*
|
||
|
* Main code start here.
|
||
|
* Reduce the symmetric band of A to a tridiagonal matrix.
|
||
|
*
|
||
|
THGRSIZ = N
|
||
|
GRSIZ = 1
|
||
|
SHIFT = 3
|
||
|
NBTILES = CEILING( REAL(N)/REAL(KD) )
|
||
|
STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) )
|
||
|
THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) )
|
||
|
*
|
||
|
CALL DLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA )
|
||
|
CALL DLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA )
|
||
|
*
|
||
|
*
|
||
|
* openMP parallelisation start here
|
||
|
*
|
||
|
#if defined(_OPENMP)
|
||
|
!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
|
||
|
!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
|
||
|
!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
|
||
|
!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
|
||
|
!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
|
||
|
!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
|
||
|
!$OMP MASTER
|
||
|
#endif
|
||
|
*
|
||
|
* main bulge chasing loop
|
||
|
*
|
||
|
DO 100 THGRID = 1, THGRNB
|
||
|
STT = (THGRID-1)*THGRSIZ+1
|
||
|
THED = MIN( (STT + THGRSIZ -1), (N-1))
|
||
|
DO 110 I = STT, N-1
|
||
|
ED = MIN( I, THED )
|
||
|
IF( STT.GT.ED ) EXIT
|
||
|
DO 120 M = 1, STEPERCOL
|
||
|
ST = STT
|
||
|
DO 130 SWEEPID = ST, ED
|
||
|
DO 140 K = 1, GRSIZ
|
||
|
MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ)
|
||
|
$ + (M-1)*GRSIZ + K
|
||
|
IF ( MYID.EQ.1 ) THEN
|
||
|
TTYPE = 1
|
||
|
ELSE
|
||
|
TTYPE = MOD( MYID, 2 ) + 2
|
||
|
ENDIF
|
||
|
|
||
|
IF( TTYPE.EQ.2 ) THEN
|
||
|
COLPT = (MYID/2)*KD + SWEEPID
|
||
|
STIND = COLPT-KD+1
|
||
|
EDIND = MIN(COLPT,N)
|
||
|
BLKLASTIND = COLPT
|
||
|
ELSE
|
||
|
COLPT = ((MYID+1)/2)*KD + SWEEPID
|
||
|
STIND = COLPT-KD+1
|
||
|
EDIND = MIN(COLPT,N)
|
||
|
IF( ( STIND.GE.EDIND-1 ).AND.
|
||
|
$ ( EDIND.EQ.N ) ) THEN
|
||
|
BLKLASTIND = N
|
||
|
ELSE
|
||
|
BLKLASTIND = 0
|
||
|
ENDIF
|
||
|
ENDIF
|
||
|
*
|
||
|
* Call the kernel
|
||
|
*
|
||
|
#if defined(_OPENMP) && _OPENMP >= 201307
|
||
|
IF( TTYPE.NE.1 ) THEN
|
||
|
!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
|
||
|
!$OMP$ DEPEND(in:WORK(MYID-1))
|
||
|
!$OMP$ DEPEND(out:WORK(MYID))
|
||
|
TID = OMP_GET_THREAD_NUM()
|
||
|
CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE,
|
||
|
$ STIND, EDIND, SWEEPID, N, KD, IB,
|
||
|
$ WORK ( INDA ), LDA,
|
||
|
$ HOUS( INDV ), HOUS( INDTAU ), LDV,
|
||
|
$ WORK( INDW + TID*KD ) )
|
||
|
!$OMP END TASK
|
||
|
ELSE
|
||
|
!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
|
||
|
!$OMP$ DEPEND(out:WORK(MYID))
|
||
|
TID = OMP_GET_THREAD_NUM()
|
||
|
CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE,
|
||
|
$ STIND, EDIND, SWEEPID, N, KD, IB,
|
||
|
$ WORK ( INDA ), LDA,
|
||
|
$ HOUS( INDV ), HOUS( INDTAU ), LDV,
|
||
|
$ WORK( INDW + TID*KD ) )
|
||
|
!$OMP END TASK
|
||
|
ENDIF
|
||
|
#else
|
||
|
CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE,
|
||
|
$ STIND, EDIND, SWEEPID, N, KD, IB,
|
||
|
$ WORK ( INDA ), LDA,
|
||
|
$ HOUS( INDV ), HOUS( INDTAU ), LDV,
|
||
|
$ WORK( INDW + TID*KD ) )
|
||
|
#endif
|
||
|
IF ( BLKLASTIND.GE.(N-1) ) THEN
|
||
|
STT = STT + 1
|
||
|
EXIT
|
||
|
ENDIF
|
||
|
140 CONTINUE
|
||
|
130 CONTINUE
|
||
|
120 CONTINUE
|
||
|
110 CONTINUE
|
||
|
100 CONTINUE
|
||
|
*
|
||
|
#if defined(_OPENMP)
|
||
|
!$OMP END MASTER
|
||
|
!$OMP END PARALLEL
|
||
|
#endif
|
||
|
*
|
||
|
* Copy the diagonal from A to D. Note that D is REAL thus only
|
||
|
* the Real part is needed, the imaginary part should be zero.
|
||
|
*
|
||
|
DO 150 I = 1, N
|
||
|
D( I ) = ( WORK( DPOS+(I-1)*LDA ) )
|
||
|
150 CONTINUE
|
||
|
*
|
||
|
* Copy the off diagonal from A to E. Note that E is REAL thus only
|
||
|
* the Real part is needed, the imaginary part should be zero.
|
||
|
*
|
||
|
IF( UPPER ) THEN
|
||
|
DO 160 I = 1, N-1
|
||
|
E( I ) = ( WORK( OFDPOS+I*LDA ) )
|
||
|
160 CONTINUE
|
||
|
ELSE
|
||
|
DO 170 I = 1, N-1
|
||
|
E( I ) = ( WORK( OFDPOS+(I-1)*LDA ) )
|
||
|
170 CONTINUE
|
||
|
ENDIF
|
||
|
*
|
||
|
HOUS( 1 ) = LHMIN
|
||
|
WORK( 1 ) = LWMIN
|
||
|
RETURN
|
||
|
*
|
||
|
* End of DSYTRD_SB2ST
|
||
|
*
|
||
|
END
|
||
|
|