LAPACK-3.11.0/SRC/chetrd_he2hb.f

*> \brief \b CHETRD_HE2HB
*
*  @generated from zhetrd_he2hb.f, fortran z -> c, Wed Dec  7 08:22:40 2016
*      
*  =========== DOCUMENTATION ===========
*
* Online html documentation available at 
*            http://www.netlib.org/lapack/explore-html/ 
*
*> \htmlonly
*> Download CHETRD_HE2HB + dependencies 
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd_he2hb.f"> 
*> [TGZ]</a> 
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd_he2hb.f"> 
*> [ZIP]</a> 
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd_he2hb.f"> 
*> [TXT]</a>
*> \endhtmlonly 
*
*  Definition:
*  ===========
*
*       SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, 
*                              WORK, LWORK, INFO )
*
*       IMPLICIT NONE
*
*       .. Scalar Arguments ..
*       CHARACTER          UPLO
*       INTEGER            INFO, LDA, LDAB, LWORK, N, KD
*       ..
*       .. Array Arguments ..
*       COMPLEX            A( LDA, * ), AB( LDAB, * ), 
*                          TAU( * ), WORK( * )
*       ..
*  
*
*> \par Purpose:
*  =============
*>
*> \verbatim
*>
*> CHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
*> band-diagonal form AB by a unitary similarity transformation:
*> Q**H * A * Q = AB.
*> \endverbatim
*
*  Arguments:
*  ==========
*
*> \param[in] UPLO
*> \verbatim
*>          UPLO is CHARACTER*1
*>          = 'U':  Upper triangle of A is stored;
*>          = 'L':  Lower triangle of A is stored.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*>          N is INTEGER
*>          The order of the matrix A.  N >= 0.
*> \endverbatim
*>
*> \param[in] KD
*> \verbatim
*>          KD is INTEGER
*>          The number of superdiagonals of the reduced matrix if UPLO = 'U',
*>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
*>          The reduced matrix is stored in the array AB.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*>          A is COMPLEX array, dimension (LDA,N)
*>          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
*>          N-by-N upper triangular part of A contains the upper
*>          triangular part of the matrix A, and the strictly lower
*>          triangular part of A is not referenced.  If UPLO = 'L', the
*>          leading N-by-N lower triangular part of A contains the lower
*>          triangular part of the matrix A, and the strictly upper
*>          triangular part of A is not referenced.
*>          On exit, if UPLO = 'U', the diagonal and first superdiagonal
*>          of A are overwritten by the corresponding elements of the
*>          tridiagonal matrix T, and the elements above the first
*>          superdiagonal, with the array TAU, represent the unitary
*>          matrix Q as a product of elementary reflectors; if UPLO
*>          = 'L', the diagonal and first subdiagonal of A are over-
*>          written by the corresponding elements of the tridiagonal
*>          matrix T, and the elements below the first subdiagonal, with
*>          the array TAU, represent the unitary matrix Q as a product
*>          of elementary reflectors. See Further Details.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*>          LDA is INTEGER
*>          The leading dimension of the array A.  LDA >= max(1,N).
*> \endverbatim
*>
*> \param[out] AB
*> \verbatim
*>          AB is COMPLEX array, dimension (LDAB,N)
*>          On exit, the upper or lower triangle of the Hermitian band
*>          matrix A, stored in the first KD+1 rows of the array.  The
*>          j-th column of A is stored in the j-th column of the array AB
*>          as follows:
*>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
*>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
*> \endverbatim
*>
*> \param[in] LDAB
*> \verbatim
*>          LDAB is INTEGER
*>          The leading dimension of the array AB.  LDAB >= KD+1.
*> \endverbatim
*>
*> \param[out] TAU
*> \verbatim
*>          TAU is COMPLEX array, dimension (N-KD)
*>          The scalar factors of the elementary reflectors (see Further
*>          Details).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*>          WORK is COMPLEX array, dimension (LWORK)
*>          On exit, if INFO = 0, or if LWORK=-1, 
*>          WORK(1) returns the size of LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*>          LWORK is INTEGER
*>          The dimension of the array WORK which should be calculated
*>          by a workspace query. LWORK = MAX(1, LWORK_QUERY)
*>          If LWORK = -1, then a workspace query is assumed; the routine
*>          only calculates the optimal size of the WORK array, returns
*>          this value as the first entry of the WORK array, and no error
*>          message related to LWORK is issued by XERBLA.
*>          LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
*>          where FACTOPTNB is the blocking used by the QR or LQ
*>          algorithm, usually FACTOPTNB=128 is a good choice otherwise
*>          putting LWORK=-1 will provide the size of WORK.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*>          INFO is INTEGER
*>          = 0:  successful exit
*>          < 0:  if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
*  Authors:
*  ========
*
*> \author Univ. of Tennessee 
*> \author Univ. of California Berkeley 
*> \author Univ. of Colorado Denver 
*> \author NAG Ltd. 
*
*> \ingroup complexHEcomputational
*
*> \par Further Details:
*  =====================
*>
*> \verbatim
*>
*>  Implemented by Azzam Haidar.
*>
*>  All details are available on technical report, SC11, SC13 papers.
*>
*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
*>  of 2011 International Conference for High Performance Computing,
*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
*>  Article 8 , 11 pages.
*>  http://doi.acm.org/10.1145/2063384.2063394
*>
*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
*>  An improved parallel singular value algorithm and its implementation 
*>  for multicore hardware, In Proceedings of 2013 International Conference
*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
*>  Denver, Colorado, USA, 2013.
*>  Article 90, 12 pages.
*>  http://doi.acm.org/10.1145/2503210.2503292
*>
*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
*>  calculations based on fine-grained memory aware tasks.
*>  International Journal of High Performance Computing Applications.
*>  Volume 28 Issue 2, Pages 196-209, May 2014.
*>  http://hpc.sagepub.com/content/28/2/196 
*>
*> \endverbatim
*>
*> \verbatim
*>
*>  If UPLO = 'U', the matrix Q is represented as a product of elementary
*>  reflectors
*>
*>     Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
*>
*>  Each H(i) has the form
*>
*>     H(i) = I - tau * v * v**H
*>
*>  where tau is a complex scalar, and v is a complex vector with
*>  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
*>  A(i,i+kd+1:n), and tau in TAU(i).
*>
*>  If UPLO = 'L', the matrix Q is represented as a product of elementary
*>  reflectors
*>
*>     Q = H(1) H(2) . . . H(k), where k = n-kd.
*>
*>  Each H(i) has the form
*>
*>     H(i) = I - tau * v * v**H
*>
*>  where tau is a complex scalar, and v is a complex vector with
*>  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
*>  A(i+kd+2:n,i), and tau in TAU(i).
*>
*>  The contents of A on exit are illustrated by the following examples
*>  with n = 5:
*>
*>  if UPLO = 'U':                       if UPLO = 'L':
*>
*>    (  ab  ab/v1  v1      v1     v1    )              (  ab                            )
*>    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     )
*>    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              )
*>    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       )
*>    (                            ab    )              (  v1     v2     v3     ab/v4 ab )
*>
*>  where d and e denote diagonal and off-diagonal elements of T, and vi
*>  denotes an element of the vector defining H(i).
*> \endverbatim
*>
*  =====================================================================
      SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, 
     $                         WORK, LWORK, INFO )
*
      IMPLICIT NONE
*
*  -- LAPACK computational routine --
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
*     .. Scalar Arguments ..
      CHARACTER          UPLO
      INTEGER            INFO, LDA, LDAB, LWORK, N, KD
*     ..
*     .. Array Arguments ..
      COMPLEX            A( LDA, * ), AB( LDAB, * ), 
     $                   TAU( * ), WORK( * )
*     ..
*
*  =====================================================================
*
*     .. Parameters ..
      REAL               RONE
      COMPLEX            ZERO, ONE, HALF
      PARAMETER          ( RONE = 1.0E+0,
     $                   ZERO = ( 0.0E+0, 0.0E+0 ),
     $                   ONE = ( 1.0E+0, 0.0E+0 ),
     $                   HALF = ( 0.5E+0, 0.0E+0 ) )
*     ..
*     .. Local Scalars ..
      LOGICAL            LQUERY, UPPER
      INTEGER            I, J, IINFO, LWMIN, PN, PK, LK,
     $                   LDT, LDW, LDS2, LDS1, 
     $                   LS2, LS1, LW, LT,
     $                   TPOS, WPOS, S2POS, S1POS
*     ..
*     .. External Subroutines ..
      EXTERNAL           XERBLA, CHER2K, CHEMM, CGEMM, CCOPY,
     $                   CLARFT, CGELQF, CGEQRF, CLASET
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MIN, MAX
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ILAENV2STAGE 
      EXTERNAL           LSAME, ILAENV2STAGE
*     ..
*     .. Executable Statements ..
*
*     Determine the minimal workspace size required 
*     and test the input parameters
*
      INFO   = 0
      UPPER  = LSAME( UPLO, 'U' )
      LQUERY = ( LWORK.EQ.-1 )
      LWMIN  = ILAENV2STAGE( 4, 'CHETRD_HE2HB', '', N, KD, -1, -1 )
      
      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( KD.LT.0 ) THEN
         INFO = -3
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -5
      ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN
         INFO = -7
      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
         INFO = -10
      END IF
*
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'CHETRD_HE2HB', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         WORK( 1 ) = LWMIN
         RETURN
      END IF
*
*     Quick return if possible        
*     Copy the upper/lower portion of A into AB 
*
      IF( N.LE.KD+1 ) THEN
          IF( UPPER ) THEN
              DO 100 I = 1, N
                  LK = MIN( KD+1, I )
                  CALL CCOPY( LK, A( I-LK+1, I ), 1, 
     $                            AB( KD+1-LK+1, I ), 1 )
  100         CONTINUE
          ELSE
              DO 110 I = 1, N
                  LK = MIN( KD+1, N-I+1 )
                  CALL CCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )
  110         CONTINUE
          ENDIF
          WORK( 1 ) = 1
          RETURN
      END IF
*
*     Determine the pointer position for the workspace
*      
      LDT    = KD
      LDS1   = KD
      LT     = LDT*KD
      LW     = N*KD
      LS1    = LDS1*KD
      LS2    = LWMIN - LT - LW - LS1
*      LS2 = N*MAX(KD,FACTOPTNB) 
      TPOS   = 1
      WPOS   = TPOS  + LT
      S1POS  = WPOS  + LW
      S2POS  = S1POS + LS1 
      IF( UPPER ) THEN
          LDW    = KD
          LDS2   = KD
      ELSE
          LDW    = N
          LDS2   = N
      ENDIF
*
*
*     Set the workspace of the triangular matrix T to zero once such a
*     way every time T is generated the upper/lower portion will be always zero
*   
      CALL CLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )
*
      IF( UPPER ) THEN
          DO 10 I = 1, N - KD, KD
             PN = N-I-KD+1
             PK = MIN( N-I-KD+1, KD )
*        
*            Compute the LQ factorization of the current block
*        
             CALL CGELQF( KD, PN, A( I, I+KD ), LDA,
     $                    TAU( I ), WORK( S2POS ), LS2, IINFO )
*        
*            Copy the upper portion of A into AB
*        
             DO 20 J = I, I+PK-1
                LK = MIN( KD, N-J ) + 1
                CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
   20        CONTINUE
*                
             CALL CLASET( 'Lower', PK, PK, ZERO, ONE, 
     $                    A( I, I+KD ), LDA )
*        
*            Form the matrix T
*        
             CALL CLARFT( 'Forward', 'Rowwise', PN, PK,
     $                    A( I, I+KD ), LDA, TAU( I ), 
     $                    WORK( TPOS ), LDT )
*        
*            Compute W:
*             
             CALL CGEMM( 'Conjugate', 'No transpose', PK, PN, PK,
     $                   ONE,  WORK( TPOS ), LDT,
     $                         A( I, I+KD ), LDA,
     $                   ZERO, WORK( S2POS ), LDS2 )
*        
             CALL CHEMM( 'Right', UPLO, PK, PN,
     $                   ONE,  A( I+KD, I+KD ), LDA,
     $                         WORK( S2POS ), LDS2,
     $                   ZERO, WORK( WPOS ), LDW )
*        
             CALL CGEMM( 'No transpose', 'Conjugate', PK, PK, PN,
     $                   ONE,  WORK( WPOS ), LDW,
     $                         WORK( S2POS ), LDS2,
     $                   ZERO, WORK( S1POS ), LDS1 )
*        
             CALL CGEMM( 'No transpose', 'No transpose', PK, PN, PK,
     $                   -HALF, WORK( S1POS ), LDS1, 
     $                          A( I, I+KD ), LDA,
     $                   ONE,   WORK( WPOS ), LDW )
*             
*        
*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using
*            an update of the form:  A := A - V'*W - W'*V
*        
             CALL CHER2K( UPLO, 'Conjugate', PN, PK,
     $                    -ONE, A( I, I+KD ), LDA,
     $                          WORK( WPOS ), LDW,
     $                    RONE, A( I+KD, I+KD ), LDA )
   10     CONTINUE
*
*        Copy the upper band to AB which is the band storage matrix
*
         DO 30 J = N-KD+1, N
            LK = MIN(KD, N-J) + 1
            CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
   30    CONTINUE
*
      ELSE
*
*         Reduce the lower triangle of A to lower band matrix
*        
          DO 40 I = 1, N - KD, KD
             PN = N-I-KD+1
             PK = MIN( N-I-KD+1, KD )
*        
*            Compute the QR factorization of the current block
*        
             CALL CGEQRF( PN, KD, A( I+KD, I ), LDA,
     $                    TAU( I ), WORK( S2POS ), LS2, IINFO )
*        
*            Copy the upper portion of A into AB 
*        
             DO 50 J = I, I+PK-1
                LK = MIN( KD, N-J ) + 1
                CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
   50        CONTINUE
*                
             CALL CLASET( 'Upper', PK, PK, ZERO, ONE, 
     $                    A( I+KD, I ), LDA )
*        
*            Form the matrix T
*        
             CALL CLARFT( 'Forward', 'Columnwise', PN, PK,
     $                    A( I+KD, I ), LDA, TAU( I ), 
     $                    WORK( TPOS ), LDT )
*        
*            Compute W:
*             
             CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK,
     $                   ONE, A( I+KD, I ), LDA,
     $                         WORK( TPOS ), LDT,
     $                   ZERO, WORK( S2POS ), LDS2 )
*        
             CALL CHEMM( 'Left', UPLO, PN, PK,
     $                   ONE, A( I+KD, I+KD ), LDA,
     $                         WORK( S2POS ), LDS2,
     $                   ZERO, WORK( WPOS ), LDW )
*        
             CALL CGEMM( 'Conjugate', 'No transpose', PK, PK, PN,
     $                   ONE, WORK( S2POS ), LDS2,
     $                         WORK( WPOS ), LDW,
     $                   ZERO, WORK( S1POS ), LDS1 )
*        
             CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK,
     $                   -HALF, A( I+KD, I ), LDA,
     $                         WORK( S1POS ), LDS1,
     $                   ONE, WORK( WPOS ), LDW )
*             
*        
*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using
*            an update of the form:  A := A - V*W' - W*V'
*        
             CALL CHER2K( UPLO, 'No transpose', PN, PK,
     $                    -ONE, A( I+KD, I ), LDA,
     $                           WORK( WPOS ), LDW,
     $                    RONE, A( I+KD, I+KD ), LDA )
*            ==================================================================
*            RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
*             DO 45 J = I, I+PK-1
*                LK = MIN( KD, N-J ) + 1
*                CALL CCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
*   45        CONTINUE
*            ==================================================================
   40     CONTINUE
*
*        Copy the lower band to AB which is the band storage matrix
*
         DO 60 J = N-KD+1, N
            LK = MIN(KD, N-J) + 1
            CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
   60    CONTINUE

      END IF
*
      WORK( 1 ) = LWMIN
      RETURN
*
*     End of CHETRD_HE2HB
*
      END
Initial commit 2 years ago			`*> \brief \b CHETRD_HE2HB`
			`*`
			`* @generated from zhetrd_he2hb.f, fortran z -> c, Wed Dec 7 08:22:40 2016`
			`*`
			`* =========== DOCUMENTATION ===========`
			`*`
			`* Online html documentation available at`
			`* http://www.netlib.org/lapack/explore-html/`
			`*`
			`*> \htmlonly`
			`*> Download CHETRD_HE2HB + dependencies`
			`*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd_he2hb.f">`
			`*> [TGZ]</a>`
			`*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd_he2hb.f">`
			`*> [ZIP]</a>`
			`*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd_he2hb.f">`
			`*> [TXT]</a>`
			`*> \endhtmlonly`
			`*`
			`* Definition:`
			`* ===========`
			`*`
			`* SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,`
			`* WORK, LWORK, INFO )`
			`*`
			`* IMPLICIT NONE`
			`*`
			`* .. Scalar Arguments ..`
			`* CHARACTER UPLO`
			`* INTEGER INFO, LDA, LDAB, LWORK, N, KD`
			`* ..`
			`* .. Array Arguments ..`
			`* COMPLEX A( LDA, * ), AB( LDAB, * ),`
			`* TAU( * ), WORK( * )`
			`* ..`
			`*`
			`*`
			`*> \par Purpose:`
			`* =============`
			`*>`
			`*> \verbatim`
			`*>`
			`*> CHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian`
			`*> band-diagonal form AB by a unitary similarity transformation:`
			`> QH A * Q = AB.`
			`*> \endverbatim`
			`*`
			`* Arguments:`
			`* ==========`
			`*`
			`*> \param[in] UPLO`
			`*> \verbatim`
			`> UPLO is CHARACTER1`
			`*> = 'U': Upper triangle of A is stored;`
			`*> = 'L': Lower triangle of A is stored.`
			`*> \endverbatim`
			`*>`
			`*> \param[in] N`
			`*> \verbatim`
			`*> N is INTEGER`
			`*> The order of the matrix A. N >= 0.`
			`*> \endverbatim`
			`*>`
			`*> \param[in] KD`
			`*> \verbatim`
			`*> KD is INTEGER`
			`*> The number of superdiagonals of the reduced matrix if UPLO = 'U',`
			`*> or the number of subdiagonals if UPLO = 'L'. KD >= 0.`
			`*> The reduced matrix is stored in the array AB.`
			`*> \endverbatim`
			`*>`
			`*> \param[in,out] A`
			`*> \verbatim`
			`*> A is COMPLEX array, dimension (LDA,N)`
			`*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading`
			`*> N-by-N upper triangular part of A contains the upper`
			`*> triangular part of the matrix A, and the strictly lower`
			`*> triangular part of A is not referenced. If UPLO = 'L', the`
			`*> leading N-by-N lower triangular part of A contains the lower`
			`*> triangular part of the matrix A, and the strictly upper`
			`*> triangular part of A is not referenced.`
			`*> On exit, if UPLO = 'U', the diagonal and first superdiagonal`
			`*> of A are overwritten by the corresponding elements of the`
			`*> tridiagonal matrix T, and the elements above the first`
			`*> superdiagonal, with the array TAU, represent the unitary`
			`*> matrix Q as a product of elementary reflectors; if UPLO`
			`*> = 'L', the diagonal and first subdiagonal of A are over-`
			`*> written by the corresponding elements of the tridiagonal`
			`*> matrix T, and the elements below the first subdiagonal, with`
			`*> the array TAU, represent the unitary matrix Q as a product`
			`*> of elementary reflectors. See Further Details.`
			`*> \endverbatim`
			`*>`
			`*> \param[in] LDA`
			`*> \verbatim`
			`*> LDA is INTEGER`
			`*> The leading dimension of the array A. LDA >= max(1,N).`
			`*> \endverbatim`
			`*>`
			`*> \param[out] AB`
			`*> \verbatim`
			`*> AB is COMPLEX array, dimension (LDAB,N)`
			`*> On exit, the upper or lower triangle of the Hermitian band`
			`*> matrix A, stored in the first KD+1 rows of the array. The`
			`*> j-th column of A is stored in the j-th column of the array AB`
			`*> as follows:`
			`*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;`
			`*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).`
			`*> \endverbatim`
			`*>`
			`*> \param[in] LDAB`
			`*> \verbatim`
			`*> LDAB is INTEGER`
			`*> The leading dimension of the array AB. LDAB >= KD+1.`
			`*> \endverbatim`
			`*>`
			`*> \param[out] TAU`
			`*> \verbatim`
			`*> TAU is COMPLEX array, dimension (N-KD)`
			`*> The scalar factors of the elementary reflectors (see Further`
			`*> Details).`
			`*> \endverbatim`
			`*>`
			`*> \param[out] WORK`
			`*> \verbatim`
			`*> WORK is COMPLEX array, dimension (LWORK)`
			`*> On exit, if INFO = 0, or if LWORK=-1,`
			`*> WORK(1) returns the size of LWORK.`
			`*> \endverbatim`
			`*>`
			`*> \param[in] LWORK`
			`*> \verbatim`
			`*> LWORK is INTEGER`
			`*> The dimension of the array WORK which should be calculated`
			`*> by a workspace query. LWORK = MAX(1, LWORK_QUERY)`
			`*> If LWORK = -1, then a workspace query is assumed; the routine`
			`*> only calculates the optimal size of the WORK array, returns`
			`*> this value as the first entry of the WORK array, and no error`
			`*> message related to LWORK is issued by XERBLA.`
			`> LWORK_QUERY = NKD + Nmax(KD,FACTOPTNB) + 2KD*KD`
			`*> where FACTOPTNB is the blocking used by the QR or LQ`
			`*> algorithm, usually FACTOPTNB=128 is a good choice otherwise`
			`*> putting LWORK=-1 will provide the size of WORK.`
			`*> \endverbatim`
			`*>`
			`*> \param[out] INFO`
			`*> \verbatim`
			`*> INFO is INTEGER`
			`*> = 0: successful exit`
			`*> < 0: if INFO = -i, the i-th argument had an illegal value`
			`*> \endverbatim`
			`*`
			`* Authors:`
			`* ========`
			`*`
			`*> \author Univ. of Tennessee`
			`*> \author Univ. of California Berkeley`
			`*> \author Univ. of Colorado Denver`
			`*> \author NAG Ltd.`
			`*`
			`*> \ingroup complexHEcomputational`
			`*`
			`*> \par Further Details:`
			`* =====================`
			`*>`
			`*> \verbatim`
			`*>`
			`*> Implemented by Azzam Haidar.`
			`*>`
			`*> All details are available on technical report, SC11, SC13 papers.`
			`*>`
			`*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.`
			`*> Parallel reduction to condensed forms for symmetric eigenvalue problems`
			`*> using aggregated fine-grained and memory-aware kernels. In Proceedings`
			`*> of 2011 International Conference for High Performance Computing,`
			`*> Networking, Storage and Analysis (SC '11), New York, NY, USA,`
			`*> Article 8 , 11 pages.`
			`*> http://doi.acm.org/10.1145/2063384.2063394`
			`*>`
			`*> A. Haidar, J. Kurzak, P. Luszczek, 2013.`
			`*> An improved parallel singular value algorithm and its implementation`
			`*> for multicore hardware, In Proceedings of 2013 International Conference`
			`*> for High Performance Computing, Networking, Storage and Analysis (SC '13).`
			`*> Denver, Colorado, USA, 2013.`
			`*> Article 90, 12 pages.`
			`*> http://doi.acm.org/10.1145/2503210.2503292`
			`*>`
			`*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.`
			`*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure`
			`*> calculations based on fine-grained memory aware tasks.`
			`*> International Journal of High Performance Computing Applications.`
			`*> Volume 28 Issue 2, Pages 196-209, May 2014.`
			`*> http://hpc.sagepub.com/content/28/2/196`
			`*>`
			`*> \endverbatim`
			`*>`
			`*> \verbatim`
			`*>`
			`*> If UPLO = 'U', the matrix Q is represented as a product of elementary`
			`*> reflectors`
			`*>`
			`> Q = H(k)H . . . H(2)H H(1)*H, where k = n-kd.`
			`*>`
			`*> Each H(i) has the form`
			`*>`
			`> H(i) = I - tau v * v**H`
			`*>`
			`*> where tau is a complex scalar, and v is a complex vector with`
			`*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in`
			`*> A(i,i+kd+1:n), and tau in TAU(i).`
			`*>`
			`*> If UPLO = 'L', the matrix Q is represented as a product of elementary`
			`*> reflectors`
			`*>`
			`*> Q = H(1) H(2) . . . H(k), where k = n-kd.`
			`*>`
			`*> Each H(i) has the form`
			`*>`
			`> H(i) = I - tau v * v**H`
			`*>`
			`*> where tau is a complex scalar, and v is a complex vector with`
			`*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in`
			`*> A(i+kd+2:n,i), and tau in TAU(i).`
			`*>`
			`*> The contents of A on exit are illustrated by the following examples`
			`*> with n = 5:`
			`*>`
			`*> if UPLO = 'U': if UPLO = 'L':`
			`*>`
			`*> ( ab ab/v1 v1 v1 v1 ) ( ab )`
			`*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab )`
			`*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab )`
			`*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab )`
			`*> ( ab ) ( v1 v2 v3 ab/v4 ab )`
			`*>`
			`*> where d and e denote diagonal and off-diagonal elements of T, and vi`
			`*> denotes an element of the vector defining H(i).`
			`*> \endverbatim`
			`*>`
			`* =====================================================================`
			`SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,`
			`$ WORK, LWORK, INFO )`
			`*`
			`IMPLICIT NONE`
			`*`
			`* -- LAPACK computational routine --`
			`* -- LAPACK is a software package provided by Univ. of Tennessee, --`
			`* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--`
			`*`
			`* .. Scalar Arguments ..`
			`CHARACTER UPLO`
			`INTEGER INFO, LDA, LDAB, LWORK, N, KD`
			`* ..`
			`* .. Array Arguments ..`
			`COMPLEX A( LDA, * ), AB( LDAB, * ),`
			`$ TAU( * ), WORK( * )`
			`* ..`
			`*`
			`* =====================================================================`
			`*`
			`* .. Parameters ..`
			`REAL RONE`
			`COMPLEX ZERO, ONE, HALF`
			`PARAMETER ( RONE = 1.0E+0,`
			`$ ZERO = ( 0.0E+0, 0.0E+0 ),`
			`$ ONE = ( 1.0E+0, 0.0E+0 ),`
			`$ HALF = ( 0.5E+0, 0.0E+0 ) )`
			`* ..`
			`* .. Local Scalars ..`
			`LOGICAL LQUERY, UPPER`
			`INTEGER I, J, IINFO, LWMIN, PN, PK, LK,`
			`$ LDT, LDW, LDS2, LDS1,`
			`$ LS2, LS1, LW, LT,`
			`$ TPOS, WPOS, S2POS, S1POS`
			`* ..`
			`* .. External Subroutines ..`
			`EXTERNAL XERBLA, CHER2K, CHEMM, CGEMM, CCOPY,`
			`$ CLARFT, CGELQF, CGEQRF, CLASET`
			`* ..`
			`* .. Intrinsic Functions ..`
			`INTRINSIC MIN, MAX`
			`* ..`
			`* .. External Functions ..`
			`LOGICAL LSAME`
			`INTEGER ILAENV2STAGE`
			`EXTERNAL LSAME, ILAENV2STAGE`
			`* ..`
			`* .. Executable Statements ..`
			`*`
			`* Determine the minimal workspace size required`
			`* and test the input parameters`
			`*`
			`INFO = 0`
			`UPPER = LSAME( UPLO, 'U' )`
			`LQUERY = ( LWORK.EQ.-1 )`
			`LWMIN = ILAENV2STAGE( 4, 'CHETRD_HE2HB', '', N, KD, -1, -1 )`

			`IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN`
			`INFO = -1`
			`ELSE IF( N.LT.0 ) THEN`
			`INFO = -2`
			`ELSE IF( KD.LT.0 ) THEN`
			`INFO = -3`
			`ELSE IF( LDA.LT.MAX( 1, N ) ) THEN`
			`INFO = -5`
			`ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN`
			`INFO = -7`
			`ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN`
			`INFO = -10`
			`END IF`
			`*`
			`IF( INFO.NE.0 ) THEN`
			`CALL XERBLA( 'CHETRD_HE2HB', -INFO )`
			`RETURN`
			`ELSE IF( LQUERY ) THEN`
			`WORK( 1 ) = LWMIN`
			`RETURN`
			`END IF`
			`*`
			`* Quick return if possible`
			`* Copy the upper/lower portion of A into AB`
			`*`
			`IF( N.LE.KD+1 ) THEN`
			`IF( UPPER ) THEN`
			`DO 100 I = 1, N`
			`LK = MIN( KD+1, I )`
			`CALL CCOPY( LK, A( I-LK+1, I ), 1,`
			`$ AB( KD+1-LK+1, I ), 1 )`
			`100 CONTINUE`
			`ELSE`
			`DO 110 I = 1, N`
			`LK = MIN( KD+1, N-I+1 )`
			`CALL CCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )`
			`110 CONTINUE`
			`ENDIF`
			`WORK( 1 ) = 1`
			`RETURN`
			`END IF`
			`*`
			`* Determine the pointer position for the workspace`
			`*`
			`LDT = KD`
			`LDS1 = KD`
			`LT = LDT*KD`
			`LW = N*KD`
			`LS1 = LDS1*KD`
			`LS2 = LWMIN - LT - LW - LS1`
			`* LS2 = N*MAX(KD,FACTOPTNB)`
			`TPOS = 1`
			`WPOS = TPOS + LT`
			`S1POS = WPOS + LW`
			`S2POS = S1POS + LS1`
			`IF( UPPER ) THEN`
			`LDW = KD`
			`LDS2 = KD`
			`ELSE`
			`LDW = N`
			`LDS2 = N`
			`ENDIF`
			`*`
			`*`
			`* Set the workspace of the triangular matrix T to zero once such a`
			`* way every time T is generated the upper/lower portion will be always zero`
			`*`
			`CALL CLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )`
			`*`
			`IF( UPPER ) THEN`
			`DO 10 I = 1, N - KD, KD`
			`PN = N-I-KD+1`
			`PK = MIN( N-I-KD+1, KD )`
			`*`
			`* Compute the LQ factorization of the current block`
			`*`
			`CALL CGELQF( KD, PN, A( I, I+KD ), LDA,`
			`$ TAU( I ), WORK( S2POS ), LS2, IINFO )`
			`*`
			`* Copy the upper portion of A into AB`
			`*`
			`DO 20 J = I, I+PK-1`
			`LK = MIN( KD, N-J ) + 1`
			`CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )`
			`20 CONTINUE`
			`*`
			`CALL CLASET( 'Lower', PK, PK, ZERO, ONE,`
			`$ A( I, I+KD ), LDA )`
			`*`
			`* Form the matrix T`
			`*`
			`CALL CLARFT( 'Forward', 'Rowwise', PN, PK,`
			`$ A( I, I+KD ), LDA, TAU( I ),`
			`$ WORK( TPOS ), LDT )`
			`*`
			`* Compute W:`
			`*`
			`CALL CGEMM( 'Conjugate', 'No transpose', PK, PN, PK,`
			`$ ONE, WORK( TPOS ), LDT,`
			`$ A( I, I+KD ), LDA,`
			`$ ZERO, WORK( S2POS ), LDS2 )`
			`*`
			`CALL CHEMM( 'Right', UPLO, PK, PN,`
			`$ ONE, A( I+KD, I+KD ), LDA,`
			`$ WORK( S2POS ), LDS2,`
			`$ ZERO, WORK( WPOS ), LDW )`
			`*`
			`CALL CGEMM( 'No transpose', 'Conjugate', PK, PK, PN,`
			`$ ONE, WORK( WPOS ), LDW,`
			`$ WORK( S2POS ), LDS2,`
			`$ ZERO, WORK( S1POS ), LDS1 )`
			`*`
			`CALL CGEMM( 'No transpose', 'No transpose', PK, PN, PK,`
			`$ -HALF, WORK( S1POS ), LDS1,`
			`$ A( I, I+KD ), LDA,`
			`$ ONE, WORK( WPOS ), LDW )`
			`*`
			`*`
			`* Update the unreduced submatrix A(i+kd:n,i+kd:n), using`
			`* an update of the form: A := A - V'W - W'V`
			`*`
			`CALL CHER2K( UPLO, 'Conjugate', PN, PK,`
			`$ -ONE, A( I, I+KD ), LDA,`
			`$ WORK( WPOS ), LDW,`
			`$ RONE, A( I+KD, I+KD ), LDA )`
			`10 CONTINUE`
			`*`
			`* Copy the upper band to AB which is the band storage matrix`
			`*`
			`DO 30 J = N-KD+1, N`
			`LK = MIN(KD, N-J) + 1`
			`CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )`
			`30 CONTINUE`
			`*`
			`ELSE`
			`*`
			`* Reduce the lower triangle of A to lower band matrix`
			`*`
			`DO 40 I = 1, N - KD, KD`
			`PN = N-I-KD+1`
			`PK = MIN( N-I-KD+1, KD )`
			`*`
			`* Compute the QR factorization of the current block`
			`*`
			`CALL CGEQRF( PN, KD, A( I+KD, I ), LDA,`
			`$ TAU( I ), WORK( S2POS ), LS2, IINFO )`
			`*`
			`* Copy the upper portion of A into AB`
			`*`
			`DO 50 J = I, I+PK-1`
			`LK = MIN( KD, N-J ) + 1`
			`CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )`
			`50 CONTINUE`
			`*`
			`CALL CLASET( 'Upper', PK, PK, ZERO, ONE,`
			`$ A( I+KD, I ), LDA )`
			`*`
			`* Form the matrix T`
			`*`
			`CALL CLARFT( 'Forward', 'Columnwise', PN, PK,`
			`$ A( I+KD, I ), LDA, TAU( I ),`
			`$ WORK( TPOS ), LDT )`
			`*`
			`* Compute W:`
			`*`
			`CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK,`
			`$ ONE, A( I+KD, I ), LDA,`
			`$ WORK( TPOS ), LDT,`
			`$ ZERO, WORK( S2POS ), LDS2 )`
			`*`
			`CALL CHEMM( 'Left', UPLO, PN, PK,`
			`$ ONE, A( I+KD, I+KD ), LDA,`
			`$ WORK( S2POS ), LDS2,`
			`$ ZERO, WORK( WPOS ), LDW )`
			`*`
			`CALL CGEMM( 'Conjugate', 'No transpose', PK, PK, PN,`
			`$ ONE, WORK( S2POS ), LDS2,`
			`$ WORK( WPOS ), LDW,`
			`$ ZERO, WORK( S1POS ), LDS1 )`
			`*`
			`CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK,`
			`$ -HALF, A( I+KD, I ), LDA,`
			`$ WORK( S1POS ), LDS1,`
			`$ ONE, WORK( WPOS ), LDW )`
			`*`
			`*`
			`* Update the unreduced submatrix A(i+kd:n,i+kd:n), using`
			`* an update of the form: A := A - VW' - WV'`
			`*`
			`CALL CHER2K( UPLO, 'No transpose', PN, PK,`
			`$ -ONE, A( I+KD, I ), LDA,`
			`$ WORK( WPOS ), LDW,`
			`$ RONE, A( I+KD, I+KD ), LDA )`
			`* ==================================================================`
			`* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED`
			`* DO 45 J = I, I+PK-1`
			`* LK = MIN( KD, N-J ) + 1`
			`* CALL CCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )`
			`* 45 CONTINUE`
			`* ==================================================================`
			`40 CONTINUE`
			`*`
			`* Copy the lower band to AB which is the band storage matrix`
			`*`
			`DO 60 J = N-KD+1, N`
			`LK = MIN(KD, N-J) + 1`
			`CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )`
			`60 CONTINUE`

			`END IF`
			`*`
			`WORK( 1 ) = LWMIN`
			`RETURN`
			`*`
			`* End of CHETRD_HE2HB`
			`*`
			`END`