Cloned SEACAS for EXODUS library with extra build files for internal package management.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

425 lines
17 KiB

/*
* Copyright(C) 1999-2020, 2022, 2023 National Technology & Engineering Solutions
* of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
* NTESS, the U.S. Government retains certain rights in this software.
*
* See packages/seacas/LICENSE for details
*/
#include "defs.h"
#include "smalloc.h"
#include "structs.h"
#include <stdio.h>
/* Lanczos iteration with FULL orthogonalization.
Works in standard (version 1) or inverse operator (version 2) mode. */
/* Finds the lowest (zero) eigenvalue, but does not print it
or compute corresponding eigenvector. */
/* Does NOT find distinct eigenvectors corresponding to multiple
eigenvectors and hence will not lead to good partitioners
for symmetric graphs. Will be useful for graphs known
(believed) not to have multiple eigenvectors, e.g. graphs
in which a random set of edges have been added or perturbed. */
/* May fail on small graphs with high multiplicities (e.g. k5) if Ritz
pairs converge before there have been as many iterations as the number
of eigenvalues sought. This is rare and a different random number
seed will generally alleviate the problem. */
/* Convergence check uses Paige bji estimate over the whole
spectrum of T. This is a lot of work, but we are trying to be
extra safe. Since we are orthogonalizing fully, we assume the
bji esitmates are very good and don't provide a contingency for
when they don't match the residuals. */
/* A lot of the time in this routine (say half) is spent in ql finding
the evals of T on each iteration. This could be reduced by only using
ql say every 10 steps. This might require that the orthogonalization
be done with Householder (rather than Gram-Scmidt as currently)
to avoid a problem with zero beta values causing subsequent breakdown.
But this routine is really intended to be used for smallish problems
where the Lanczos runs will be int, e.g. when we are using the inverse
operator method. In the inverse operator case, it's really important to
stop quickly to avoid additional back solves. If the ql work is really
a problem then we should be using a selective othogonalization algorithm.
This routine provides a convenient reference point for how well those
routines are functioning since it has the same basic structure but just
does more orthogonalizing. */
/* The algorithm orthogonalizes the starting vector and the residual vectors
against the vector of all ones since we know that is the null space of
the Laplacian. This generally saves net time because Lanczos tends to
converge faster. */
/* Replaced call to ql() with call to get_ritzvals(). This starts with ql
bisection, whichever is predicted to be faster based on a simple complexity
model. If that fails it switches to the other. This routine should be
safer and faster than straight ql (which does occasionally fail). */
/* NOTE: This routine indexes beta (and workj) from 1 to maxj+1, whereas
selective orthogonalization indexes them from 0 to maxj. */
/* Comments for Lanczos with inverted operator: */
/* Used Symmlq for the back solve since already maintaining that code for
the RQI/Symmlq multilevel method. Straight CG would only be marginally
faster. */
/* The orthogonalization against the vector of all ones in Symmlq is not
as efficient as possible - could in principle use orthog1 method, but
don't want to disrupt the RQI/Symmlq code. Also, probably would only
need to orthogonalize out a given mode periodically. But we want to be
extra robust numericlly. */
void lanczos_FO(struct vtx_data **A, /* graph data structure */
int n, /* number of rows/columns in matrix */
int d, /* problem dimension = # evecs to find */
double **y, /* columns of y are eigenvectors of A */
double *lambda, /* ritz approximation to eigenvals of A */
double *bound, /* on ritz pair approximations to eig pairs of A */
double eigtol, /* tolerance on eigenvectors */
double *vwsqrt, /* square root of vertex weights */
double maxdeg, /* maximum degree of graph */
int version /* 1 = standard mode, 2 = inverse operator mode */
)
{
extern FILE *Output_File; /* output file or NULL */
extern int DEBUG_EVECS; /* print debugging output? */
extern int DEBUG_TRACE; /* trace main execution path */
extern int WARNING_EVECS; /* print warning messages? */
extern int LANCZOS_MAXITNS; /* maximum Lanczos iterations allowed */
extern double BISECTION_SAFETY; /* safety factor for bisection algorithm */
extern double SRESTOL; /* resid tol for T evec comp */
extern double DOUBLE_MAX; /* Warning on inaccurate computation of evec of T */
extern double splarax_time; /* time matvecs */
extern double orthog_time; /* time orthogonalization work */
extern double tevec_time; /* time tridiagonal eigvec work */
extern double evec_time; /* time to generate eigenvectors */
extern double ql_time; /* time tridiagonal eigval work */
extern double blas_time; /* time for blas (not assembly coded) */
extern double init_time; /* time for allocating memory, etc. */
extern double scan_time; /* time for scanning bounds list */
extern double debug_time; /* time for debug computations and output */
int i, j; /* indices */
int maxj; /* maximum number of Lanczos iterations */
double *u, *r; /* Lanczos vectors */
double *Aq; /* sparse matrix-vector product vector */
double *alpha, *beta; /* the Lanczos scalars from each step */
double *ritz; /* copy of alpha for tqli */
double *workj; /* work vector (eg. for tqli) */
double *workn; /* work vector (eg. for checkeig) */
double *s; /* eigenvector of T */
double **q; /* columns of q = Lanczos basis vectors */
double *bj; /* beta(j)*(last element of evecs of T) */
double bis_safety; /* real safety factor for bisection algorithm */
double Sres; /* how well Tevec calculated eigvecs */
double Sres_max; /* Maximum value of Sres */
int inc_bis_safety; /* need to increase bisection safety */
double *Ares; /* how well Lanczos calculated each eigpair */
double *inv_lambda; /* eigenvalues of inverse operator */
int *index; /* the Ritz index of an eigenpair */
struct orthlink *orthlist = NULL; /* vectors to orthogonalize against in Lanczos */
struct orthlink *orthlist2 = NULL; /* vectors to orthogonalize against in Symmlq */
struct orthlink *temp; /* for expanding orthogonalization list */
double *ritzvec = NULL; /* ritz vector for current iteration */
double *zeros = NULL; /* vector of all zeros */
double *ones = NULL; /* vector of all ones */
struct scanlink *scanlist; /* list of fields for min ritz vals */
struct scanlink *curlnk; /* for traversing the scanlist */
double bji_tol; /* tol on bji estimate of A e-residual */
int converged; /* has the iteration converged? */
double time; /* current clock time */
double shift, rtol; /* symmlq input */
long precon, goodb, nout; /* symmlq input */
long checka, intlim; /* symmlq input */
double anorm, acond; /* symmlq output */
double rnorm, ynorm; /* symmlq output */
long istop, itn; /* symmlq output */
double macheps; /* machine precision calculated by symmlq */
double normxlim; /* a stopping criteria for symmlq */
long itnmin; /* enforce minimum number of iterations */
int symmlqitns = 0; /* # symmlq itns */
double *wv1 = NULL, *wv2 = NULL, *wv3 = NULL; /* Symmlq work space */
double *wv4 = NULL, *wv5 = NULL, *wv6 = NULL; /* Symmlq work space */
long long_n; /* long int copy of n for symmlq */
int ritzval_flag = 0; /* status flag for ql() */
double Anorm; /* Norm estimate of the Laplacian matrix */
int left, right; /* ranges on the search for ritzvals */
int memory_ok; /* TRUE as long as don't run out of memory */
if (DEBUG_TRACE > 0) {
printf("<Entering lanczos_FO>\n");
}
if (DEBUG_EVECS > 0) {
if (version == 1) {
printf("Full orthogonalization Lanczos, matrix size = %d\n", n);
}
else {
printf("Full orthogonalization Lanczos, inverted operator, matrix size = %d\n", n);
}
}
/* Initialize time. */
time = lanc_seconds();
if (n < d + 1) {
bail("ERROR: System too small for number of eigenvalues requested.", 1);
/* d+1 since don't use zero eigenvalue pair */
}
/* Allocate Lanczos space. */
maxj = LANCZOS_MAXITNS;
u = mkvec(1, n);
r = mkvec(1, n);
Aq = mkvec(1, n);
ritzvec = mkvec(1, n);
zeros = mkvec(1, n);
setvec(zeros, 1, n, 0.0);
workn = mkvec(1, n);
Ares = mkvec(1, d);
inv_lambda = mkvec(1, d);
index = smalloc((d + 1) * sizeof(int));
alpha = mkvec(1, maxj);
beta = mkvec(1, maxj + 1);
ritz = mkvec(1, maxj);
s = mkvec(1, maxj);
bj = mkvec(1, maxj);
workj = mkvec(1, maxj + 1);
q = smalloc((maxj + 1) * sizeof(double *));
scanlist = mkscanlist(d);
if (version == 2) {
/* Allocate Symmlq space all in one chunk. */
wv1 = smalloc(6 * (n + 1) * sizeof(double));
wv2 = &wv1[(n + 1)];
wv3 = &wv1[2 * (n + 1)];
wv4 = &wv1[3 * (n + 1)];
wv5 = &wv1[4 * (n + 1)];
wv6 = &wv1[5 * (n + 1)];
/* Set invariant symmlq parameters */
precon = FALSE; /* FALSE until we figure out a good way */
goodb = FALSE; /* should be FALSE for this application */
checka = FALSE; /* if don't know by now, too bad */
intlim = n; /* set to enforce a maximum number of Symmlq itns */
itnmin = 0; /* set to enforce a minimum number of Symmlq itns */
shift = 0.0; /* since just solving rather than doing RQI */
symmlqitns = 0; /* total number of Symmlq iterations */
nout = 0; /* Effectively disabled - see notes in symmlq.f */
rtol = 1.0e-5; /* requested residual tolerance */
normxlim = DOUBLE_MAX; /* Effectively disables ||x|| termination criterion */
long_n = n; /* copy to long for linting */
}
/* Initialize. */
vecran(r, 1, n);
if (vwsqrt == NULL) {
/* whack one's direction from initial vector */
orthog1(r, 1, n);
/* list the ones direction for later use in Symmlq */
if (version == 2) {
orthlist2 = makeorthlnk();
ones = mkvec(1, n);
setvec(ones, 1, n, 1.0);
orthlist2->vec = ones;
orthlist2->pntr = NULL;
}
}
else {
/* whack vwsqrt direction from initial vector */
orthogvec(r, 1, n, vwsqrt);
if (version == 2) {
/* list the vwsqrt direction for later use in Symmlq */
orthlist2 = makeorthlnk();
orthlist2->vec = vwsqrt;
orthlist2->pntr = NULL;
}
}
beta[1] = ch_norm(r, 1, n);
q[0] = zeros;
bji_tol = eigtol;
orthlist = NULL;
Sres_max = 0.0;
Anorm = 2 * maxdeg; /* Gershgorin estimate for ||A|| */
bis_safety = BISECTION_SAFETY;
inc_bis_safety = FALSE;
init_time += lanc_seconds() - time;
/* Main Lanczos loop. */
j = 1;
converged = FALSE;
memory_ok = TRUE;
while ((j <= maxj) && (converged == FALSE) && memory_ok) {
time = lanc_seconds();
/* Allocate next Lanczos vector. If fail, back up one step and compute approx. eigvec. */
q[j] = mkvec_ret(1, n);
if (q[j] == NULL) {
memory_ok = FALSE;
if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
strout("WARNING: Lanczos out of memory; computing best approximation available.\n");
}
if (j <= 2) {
bail("ERROR: Sorry, can't salvage Lanczos.", 1);
/* ... save yourselves, men. */
}
j--;
}
vecscale(q[j], 1, n, 1.0 / beta[j], r);
blas_time += lanc_seconds() - time;
time = lanc_seconds();
if (version == 1) {
splarax(Aq, A, n, q[j], vwsqrt, workn);
}
else if (version == 2) {
symmlq(&long_n, &(q[j][1]), &wv1[1], &wv2[1], &wv3[1], &wv4[1], &Aq[1], &wv5[1], &wv6[1],
&checka, &goodb, &precon, &shift, &nout, &intlim, &rtol, &istop, &itn, &anorm, &acond,
&rnorm, &ynorm, (double *)A, vwsqrt, (double *)orthlist2, &macheps, &normxlim,
&itnmin);
symmlqitns += itn;
if (DEBUG_EVECS > 2) {
printf("Symmlq report: rtol %g\n", rtol);
printf(" system norm %g, solution norm %g\n", anorm, ynorm);
printf(" system condition %g, residual %g\n", acond, rnorm);
printf(" termination condition %2ld, iterations %3ld\n", istop, itn);
}
}
splarax_time += lanc_seconds() - time;
time = lanc_seconds();
update(u, 1, n, Aq, -beta[j], q[j - 1]);
alpha[j] = dot(u, 1, n, q[j]);
update(r, 1, n, u, -alpha[j], q[j]);
blas_time += lanc_seconds() - time;
time = lanc_seconds();
if (vwsqrt == NULL) {
orthog1(r, 1, n);
}
else {
orthogvec(r, 1, n, vwsqrt);
}
orthogonalize(r, n, orthlist);
temp = orthlist;
orthlist = makeorthlnk();
orthlist->vec = q[j];
orthlist->pntr = temp;
beta[j + 1] = ch_norm(r, 1, n);
orthog_time += lanc_seconds() - time;
time = lanc_seconds();
left = j / 2;
right = j - left + 1;
if (inc_bis_safety) {
bis_safety *= 10;
inc_bis_safety = FALSE;
}
ritzval_flag = get_ritzvals(alpha, beta + 1, j, Anorm, workj + 1, ritz, d, left, right, eigtol,
bis_safety);
/* ... have to off-set beta and workj since full orthogonalization
indexes these from 1 to maxj+1 whereas selective orthog.
indexes them from 0 to maxj */
if (ritzval_flag != 0) {
bail("ERROR: Both Sturm bisection and QL failed.", 1);
/* ... give up. */
}
ql_time += lanc_seconds() - time;
/* Convergence check using Paige bji estimates. */
time = lanc_seconds();
for (i = 1; i <= j; i++) {
Sres = Tevec(alpha, beta, j, ritz[i], s);
if (Sres > Sres_max) {
Sres_max = Sres;
}
if (Sres > SRESTOL) {
inc_bis_safety = TRUE;
}
bj[i] = s[j] * beta[j + 1];
}
tevec_time += lanc_seconds() - time;
time = lanc_seconds();
if (version == 1) {
scanmin(ritz, 1, j, &scanlist);
}
else {
scanmax(ritz, 1, j, &scanlist);
}
converged = TRUE;
if (j < d) {
converged = FALSE;
}
else {
curlnk = scanlist;
while (curlnk != NULL) {
if (bj[curlnk->indx] > bji_tol) {
converged = FALSE;
}
curlnk = curlnk->pntr;
}
}
scan_time += lanc_seconds() - time;
j++;
}
j--;
/* Collect eigenvalue and bound information. */
time = lanc_seconds();
mkeigvecs(scanlist, lambda, bound, index, bj, d, &Sres_max, alpha, beta + 1, j, s, y, n, q);
evec_time += lanc_seconds() - time;
/* Analyze computation for and report additional problems */
time = lanc_seconds();
if (DEBUG_EVECS > 0 && version == 2) {
printf("\nTotal Symmlq iterations %3d\n", symmlqitns);
}
if (version == 2) {
for (i = 1; i <= d; i++) {
lambda[i] = 1.0 / lambda[i];
}
}
warnings(workn, A, y, n, lambda, vwsqrt, Ares, bound, index, d, j, maxj, Sres_max, eigtol, u,
Anorm, Output_File);
debug_time += lanc_seconds() - time;
/* Free any memory allocated in this routine. */
time = lanc_seconds();
frvec(u, 1);
frvec(r, 1);
frvec(Aq, 1);
frvec(ritzvec, 1);
frvec(zeros, 1);
if (vwsqrt == NULL && version == 2) {
frvec(ones, 1);
}
frvec(workn, 1);
frvec(Ares, 1);
frvec(inv_lambda, 1);
sfree(index);
frvec(alpha, 1);
frvec(beta, 1);
frvec(ritz, 1);
frvec(s, 1);
frvec(bj, 1);
frvec(workj, 1);
if (version == 2) {
frvec(wv1, 0);
}
while (scanlist != NULL) {
curlnk = scanlist->pntr;
sfree(scanlist);
scanlist = curlnk;
}
for (i = 1; i <= j; i++) {
frvec(q[i], 1);
}
while (orthlist != NULL) {
temp = orthlist->pntr;
sfree(orthlist);
orthlist = temp;
}
while (version == 2 && orthlist2 != NULL) {
temp = orthlist2->pntr;
sfree(orthlist2);
orthlist2 = temp;
}
sfree(q);
init_time += lanc_seconds() - time;
}