Cloned library LAPACK-3.11.0 with extra build files for internal package management.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

631 lines
19 KiB

*> \brief \b SLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download SLASD2 + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slasd2.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slasd2.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slasd2.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
* LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
* IDXC, IDXQ, COLTYP, INFO )
*
* .. Scalar Arguments ..
* INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
* REAL ALPHA, BETA
* ..
* .. Array Arguments ..
* INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
* $ IDXQ( * )
* REAL D( * ), DSIGMA( * ), U( LDU, * ),
* $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
* $ Z( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SLASD2 merges the two sets of singular values together into a single
*> sorted set. Then it tries to deflate the size of the problem.
*> There are two ways in which deflation can occur: when two or more
*> singular values are close together or if there is a tiny entry in the
*> Z vector. For each such occurrence the order of the related secular
*> equation problem is reduced by one.
*>
*> SLASD2 is called from SLASD1.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] NL
*> \verbatim
*> NL is INTEGER
*> The row dimension of the upper block. NL >= 1.
*> \endverbatim
*>
*> \param[in] NR
*> \verbatim
*> NR is INTEGER
*> The row dimension of the lower block. NR >= 1.
*> \endverbatim
*>
*> \param[in] SQRE
*> \verbatim
*> SQRE is INTEGER
*> = 0: the lower block is an NR-by-NR square matrix.
*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
*>
*> The bidiagonal matrix has N = NL + NR + 1 rows and
*> M = N + SQRE >= N columns.
*> \endverbatim
*>
*> \param[out] K
*> \verbatim
*> K is INTEGER
*> Contains the dimension of the non-deflated matrix,
*> This is the order of the related secular equation. 1 <= K <=N.
*> \endverbatim
*>
*> \param[in,out] D
*> \verbatim
*> D is REAL array, dimension (N)
*> On entry D contains the singular values of the two submatrices
*> to be combined. On exit D contains the trailing (N-K) updated
*> singular values (those which were deflated) sorted into
*> increasing order.
*> \endverbatim
*>
*> \param[out] Z
*> \verbatim
*> Z is REAL array, dimension (N)
*> On exit Z contains the updating row vector in the secular
*> equation.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is REAL
*> Contains the diagonal element associated with the added row.
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is REAL
*> Contains the off-diagonal element associated with the added
*> row.
*> \endverbatim
*>
*> \param[in,out] U
*> \verbatim
*> U is REAL array, dimension (LDU,N)
*> On entry U contains the left singular vectors of two
*> submatrices in the two square blocks with corners at (1,1),
*> (NL, NL), and (NL+2, NL+2), (N,N).
*> On exit U contains the trailing (N-K) updated left singular
*> vectors (those which were deflated) in its last N-K columns.
*> \endverbatim
*>
*> \param[in] LDU
*> \verbatim
*> LDU is INTEGER
*> The leading dimension of the array U. LDU >= N.
*> \endverbatim
*>
*> \param[in,out] VT
*> \verbatim
*> VT is REAL array, dimension (LDVT,M)
*> On entry VT**T contains the right singular vectors of two
*> submatrices in the two square blocks with corners at (1,1),
*> (NL+1, NL+1), and (NL+2, NL+2), (M,M).
*> On exit VT**T contains the trailing (N-K) updated right singular
*> vectors (those which were deflated) in its last N-K columns.
*> In case SQRE =1, the last row of VT spans the right null
*> space.
*> \endverbatim
*>
*> \param[in] LDVT
*> \verbatim
*> LDVT is INTEGER
*> The leading dimension of the array VT. LDVT >= M.
*> \endverbatim
*>
*> \param[out] DSIGMA
*> \verbatim
*> DSIGMA is REAL array, dimension (N)
*> Contains a copy of the diagonal elements (K-1 singular values
*> and one zero) in the secular equation.
*> \endverbatim
*>
*> \param[out] U2
*> \verbatim
*> U2 is REAL array, dimension (LDU2,N)
*> Contains a copy of the first K-1 left singular vectors which
*> will be used by SLASD3 in a matrix multiply (SGEMM) to solve
*> for the new left singular vectors. U2 is arranged into four
*> blocks. The first block contains a column with 1 at NL+1 and
*> zero everywhere else; the second block contains non-zero
*> entries only at and above NL; the third contains non-zero
*> entries only below NL+1; and the fourth is dense.
*> \endverbatim
*>
*> \param[in] LDU2
*> \verbatim
*> LDU2 is INTEGER
*> The leading dimension of the array U2. LDU2 >= N.
*> \endverbatim
*>
*> \param[out] VT2
*> \verbatim
*> VT2 is REAL array, dimension (LDVT2,N)
*> VT2**T contains a copy of the first K right singular vectors
*> which will be used by SLASD3 in a matrix multiply (SGEMM) to
*> solve for the new right singular vectors. VT2 is arranged into
*> three blocks. The first block contains a row that corresponds
*> to the special 0 diagonal element in SIGMA; the second block
*> contains non-zeros only at and before NL +1; the third block
*> contains non-zeros only at and after NL +2.
*> \endverbatim
*>
*> \param[in] LDVT2
*> \verbatim
*> LDVT2 is INTEGER
*> The leading dimension of the array VT2. LDVT2 >= M.
*> \endverbatim
*>
*> \param[out] IDXP
*> \verbatim
*> IDXP is INTEGER array, dimension (N)
*> This will contain the permutation used to place deflated
*> values of D at the end of the array. On output IDXP(2:K)
*> points to the nondeflated D-values and IDXP(K+1:N)
*> points to the deflated singular values.
*> \endverbatim
*>
*> \param[out] IDX
*> \verbatim
*> IDX is INTEGER array, dimension (N)
*> This will contain the permutation used to sort the contents of
*> D into ascending order.
*> \endverbatim
*>
*> \param[out] IDXC
*> \verbatim
*> IDXC is INTEGER array, dimension (N)
*> This will contain the permutation used to arrange the columns
*> of the deflated U matrix into three groups: the first group
*> contains non-zero entries only at and above NL, the second
*> contains non-zero entries only below NL+2, and the third is
*> dense.
*> \endverbatim
*>
*> \param[in,out] IDXQ
*> \verbatim
*> IDXQ is INTEGER array, dimension (N)
*> This contains the permutation which separately sorts the two
*> sub-problems in D into ascending order. Note that entries in
*> the first hlaf of this permutation must first be moved one
*> position backward; and entries in the second half
*> must first have NL+1 added to their values.
*> \endverbatim
*>
*> \param[out] COLTYP
*> \verbatim
*> COLTYP is INTEGER array, dimension (N)
*> As workspace, this will contain a label which will indicate
*> which of the following types a column in the U2 matrix or a
*> row in the VT2 matrix is:
*> 1 : non-zero in the upper half only
*> 2 : non-zero in the lower half only
*> 3 : dense
*> 4 : deflated
*>
*> On exit, it is an array of dimension 4, with COLTYP(I) being
*> the dimension of the I-th type columns.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit.
*> < 0: if INFO = -i, the i-th argument had an illegal value.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup OTHERauxiliary
*
*> \par Contributors:
* ==================
*>
*> Ming Gu and Huan Ren, Computer Science Division, University of
*> California at Berkeley, USA
*>
* =====================================================================
SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
$ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
$ IDXC, IDXQ, COLTYP, INFO )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
REAL ALPHA, BETA
* ..
* .. Array Arguments ..
INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
$ IDXQ( * )
REAL D( * ), DSIGMA( * ), U( LDU, * ),
$ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
$ Z( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL ZERO, ONE, TWO, EIGHT
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
$ EIGHT = 8.0E+0 )
* ..
* .. Local Arrays ..
INTEGER CTOT( 4 ), PSM( 4 )
* ..
* .. Local Scalars ..
INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
$ N, NLP1, NLP2
REAL C, EPS, HLFTOL, S, TAU, TOL, Z1
* ..
* .. External Functions ..
REAL SLAMCH, SLAPY2
EXTERNAL SLAMCH, SLAPY2
* ..
* .. External Subroutines ..
EXTERNAL SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
IF( NL.LT.1 ) THEN
INFO = -1
ELSE IF( NR.LT.1 ) THEN
INFO = -2
ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
INFO = -3
END IF
*
N = NL + NR + 1
M = N + SQRE
*
IF( LDU.LT.N ) THEN
INFO = -10
ELSE IF( LDVT.LT.M ) THEN
INFO = -12
ELSE IF( LDU2.LT.N ) THEN
INFO = -15
ELSE IF( LDVT2.LT.M ) THEN
INFO = -17
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SLASD2', -INFO )
RETURN
END IF
*
NLP1 = NL + 1
NLP2 = NL + 2
*
* Generate the first part of the vector Z; and move the singular
* values in the first part of D one position backward.
*
Z1 = ALPHA*VT( NLP1, NLP1 )
Z( 1 ) = Z1
DO 10 I = NL, 1, -1
Z( I+1 ) = ALPHA*VT( I, NLP1 )
D( I+1 ) = D( I )
IDXQ( I+1 ) = IDXQ( I ) + 1
10 CONTINUE
*
* Generate the second part of the vector Z.
*
DO 20 I = NLP2, M
Z( I ) = BETA*VT( I, NLP2 )
20 CONTINUE
*
* Initialize some reference arrays.
*
DO 30 I = 2, NLP1
COLTYP( I ) = 1
30 CONTINUE
DO 40 I = NLP2, N
COLTYP( I ) = 2
40 CONTINUE
*
* Sort the singular values into increasing order
*
DO 50 I = NLP2, N
IDXQ( I ) = IDXQ( I ) + NLP1
50 CONTINUE
*
* DSIGMA, IDXC, IDXC, and the first column of U2
* are used as storage space.
*
DO 60 I = 2, N
DSIGMA( I ) = D( IDXQ( I ) )
U2( I, 1 ) = Z( IDXQ( I ) )
IDXC( I ) = COLTYP( IDXQ( I ) )
60 CONTINUE
*
CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
*
DO 70 I = 2, N
IDXI = 1 + IDX( I )
D( I ) = DSIGMA( IDXI )
Z( I ) = U2( IDXI, 1 )
COLTYP( I ) = IDXC( IDXI )
70 CONTINUE
*
* Calculate the allowable deflation tolerance
*
EPS = SLAMCH( 'Epsilon' )
TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
*
* There are 2 kinds of deflation -- first a value in the z-vector
* is small, second two (or more) singular values are very close
* together (their difference is small).
*
* If the value in the z-vector is small, we simply permute the
* array so that the corresponding singular value is moved to the
* end.
*
* If two values in the D-vector are close, we perform a two-sided
* rotation designed to make one of the corresponding z-vector
* entries zero, and then permute the array so that the deflated
* singular value is moved to the end.
*
* If there are multiple singular values then the problem deflates.
* Here the number of equal singular values are found. As each equal
* singular value is found, an elementary reflector is computed to
* rotate the corresponding singular subspace so that the
* corresponding components of Z are zero in this new basis.
*
K = 1
K2 = N + 1
DO 80 J = 2, N
IF( ABS( Z( J ) ).LE.TOL ) THEN
*
* Deflate due to small z component.
*
K2 = K2 - 1
IDXP( K2 ) = J
COLTYP( J ) = 4
IF( J.EQ.N )
$ GO TO 120
ELSE
JPREV = J
GO TO 90
END IF
80 CONTINUE
90 CONTINUE
J = JPREV
100 CONTINUE
J = J + 1
IF( J.GT.N )
$ GO TO 110
IF( ABS( Z( J ) ).LE.TOL ) THEN
*
* Deflate due to small z component.
*
K2 = K2 - 1
IDXP( K2 ) = J
COLTYP( J ) = 4
ELSE
*
* Check if singular values are close enough to allow deflation.
*
IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
*
* Deflation is possible.
*
S = Z( JPREV )
C = Z( J )
*
* Find sqrt(a**2+b**2) without overflow or
* destructive underflow.
*
TAU = SLAPY2( C, S )
C = C / TAU
S = -S / TAU
Z( J ) = TAU
Z( JPREV ) = ZERO
*
* Apply back the Givens rotation to the left and right
* singular vector matrices.
*
IDXJP = IDXQ( IDX( JPREV )+1 )
IDXJ = IDXQ( IDX( J )+1 )
IF( IDXJP.LE.NLP1 ) THEN
IDXJP = IDXJP - 1
END IF
IF( IDXJ.LE.NLP1 ) THEN
IDXJ = IDXJ - 1
END IF
CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
$ S )
IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
COLTYP( J ) = 3
END IF
COLTYP( JPREV ) = 4
K2 = K2 - 1
IDXP( K2 ) = JPREV
JPREV = J
ELSE
K = K + 1
U2( K, 1 ) = Z( JPREV )
DSIGMA( K ) = D( JPREV )
IDXP( K ) = JPREV
JPREV = J
END IF
END IF
GO TO 100
110 CONTINUE
*
* Record the last singular value.
*
K = K + 1
U2( K, 1 ) = Z( JPREV )
DSIGMA( K ) = D( JPREV )
IDXP( K ) = JPREV
*
120 CONTINUE
*
* Count up the total number of the various types of columns, then
* form a permutation which positions the four column types into
* four groups of uniform structure (although one or more of these
* groups may be empty).
*
DO 130 J = 1, 4
CTOT( J ) = 0
130 CONTINUE
DO 140 J = 2, N
CT = COLTYP( J )
CTOT( CT ) = CTOT( CT ) + 1
140 CONTINUE
*
* PSM(*) = Position in SubMatrix (of types 1 through 4)
*
PSM( 1 ) = 2
PSM( 2 ) = 2 + CTOT( 1 )
PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
*
* Fill out the IDXC array so that the permutation which it induces
* will place all type-1 columns first, all type-2 columns next,
* then all type-3's, and finally all type-4's, starting from the
* second column. This applies similarly to the rows of VT.
*
DO 150 J = 2, N
JP = IDXP( J )
CT = COLTYP( JP )
IDXC( PSM( CT ) ) = J
PSM( CT ) = PSM( CT ) + 1
150 CONTINUE
*
* Sort the singular values and corresponding singular vectors into
* DSIGMA, U2, and VT2 respectively. The singular values/vectors
* which were not deflated go into the first K slots of DSIGMA, U2,
* and VT2 respectively, while those which were deflated go into the
* last N - K slots, except that the first column/row will be treated
* separately.
*
DO 160 J = 2, N
JP = IDXP( J )
DSIGMA( J ) = D( JP )
IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
IF( IDXJ.LE.NLP1 ) THEN
IDXJ = IDXJ - 1
END IF
CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
160 CONTINUE
*
* Determine DSIGMA(1), DSIGMA(2) and Z(1)
*
DSIGMA( 1 ) = ZERO
HLFTOL = TOL / TWO
IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
$ DSIGMA( 2 ) = HLFTOL
IF( M.GT.N ) THEN
Z( 1 ) = SLAPY2( Z1, Z( M ) )
IF( Z( 1 ).LE.TOL ) THEN
C = ONE
S = ZERO
Z( 1 ) = TOL
ELSE
C = Z1 / Z( 1 )
S = Z( M ) / Z( 1 )
END IF
ELSE
IF( ABS( Z1 ).LE.TOL ) THEN
Z( 1 ) = TOL
ELSE
Z( 1 ) = Z1
END IF
END IF
*
* Move the rest of the updating row to Z.
*
CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
*
* Determine the first column of U2, the first row of VT2 and the
* last row of VT.
*
CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
U2( NLP1, 1 ) = ONE
IF( M.GT.N ) THEN
DO 170 I = 1, NLP1
VT( M, I ) = -S*VT( NLP1, I )
VT2( 1, I ) = C*VT( NLP1, I )
170 CONTINUE
DO 180 I = NLP2, M
VT2( 1, I ) = S*VT( M, I )
VT( M, I ) = C*VT( M, I )
180 CONTINUE
ELSE
CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
END IF
IF( M.GT.N ) THEN
CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
END IF
*
* The deflated singular values and their corresponding vectors go
* into the back of D, U, and V respectively.
*
IF( N.GT.K ) THEN
CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
$ LDU )
CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
$ LDVT )
END IF
*
* Copy CTOT into COLTYP for referencing in SLASD3.
*
DO 190 J = 1, 4
COLTYP( J ) = CTOT( J )
190 CONTINUE
*
RETURN
*
* End of SLASD2
*
END