EXODUS/packages/seacas/libraries/chaco/eigen/lanczos_FO.c

/*
 * Copyright(C) 1999-2020, 2022, 2023 National Technology & Engineering Solutions
 * of Sandia, LLC (NTESS).  Under the terms of Contract DE-NA0003525 with
 * NTESS, the U.S. Government retains certain rights in this software.
 *
 * See packages/seacas/LICENSE for details
 */

#include "defs.h"
#include "smalloc.h"
#include "structs.h"
#include <stdio.h>

/* Lanczos iteration with FULL orthogonalization.
   Works in standard (version 1) or inverse operator (version 2) mode. */
/* Finds the lowest (zero) eigenvalue, but does not print it
   or compute corresponding eigenvector. */
/* Does NOT find distinct eigenvectors corresponding to multiple
   eigenvectors and hence will not lead to good partitioners
   for symmetric graphs. Will be useful for graphs known
   (believed) not to have multiple eigenvectors, e.g. graphs
   in which a random set of edges have been added or perturbed. */
/* May fail on small graphs with high multiplicities (e.g. k5) if Ritz
   pairs converge before there have been as many iterations as the number
   of eigenvalues sought. This is rare and a different random number
   seed will generally alleviate the problem. */
/* Convergence check uses Paige bji estimate over the whole
   spectrum of T. This is a lot of work, but we are trying to be
   extra safe. Since we are orthogonalizing fully, we assume the
   bji esitmates are very good and don't provide a contingency for
   when they don't match the residuals. */
/* A lot of the time in this routine (say half) is spent in ql finding
   the evals of T on each iteration. This could be reduced by only using
   ql say every 10 steps. This might require that the orthogonalization
   be done with Householder (rather than Gram-Scmidt as currently)
   to avoid a problem with zero beta values causing subsequent breakdown.
   But this routine is really intended to be used for smallish problems
   where the Lanczos runs will be int, e.g. when we are using the inverse
   operator method. In the inverse operator case, it's really important to
   stop quickly to avoid additional back solves. If the ql work is really
   a problem then we should be using a selective othogonalization algorithm.
   This routine provides a convenient reference point for how well those
   routines are functioning since it has the same basic structure but just
   does more orthogonalizing. */
/* The algorithm orthogonalizes the starting vector and the residual vectors
   against the vector of all ones since we know that is the null space of
   the Laplacian. This generally saves net time because Lanczos tends to
   converge faster. */
/* Replaced call to ql() with call to get_ritzvals(). This starts with ql
   bisection, whichever is predicted to be faster based on a simple complexity
   model. If that fails it switches to the other. This routine should be
   safer and faster than straight ql (which does occasionally fail). */
/* NOTE: This routine indexes beta (and workj) from 1 to maxj+1, whereas
   selective orthogonalization indexes them from 0 to maxj. */

/* Comments for Lanczos with inverted operator: */
/* Used Symmlq for the back solve since already maintaining that code for
   the RQI/Symmlq multilevel method. Straight CG would only be marginally
   faster. */
/* The orthogonalization against the vector of all ones in Symmlq is not
   as efficient as possible - could in principle use orthog1 method, but
   don't want to disrupt the RQI/Symmlq code. Also, probably would only
   need to orthogonalize out a given mode periodically. But we want to be
   extra robust numericlly. */

void lanczos_FO(struct vtx_data **A,      /* graph data structure */
                int               n,      /* number of rows/columns in matrix */
                int               d,      /* problem dimension = # evecs to find */
                double          **y,      /* columns of y are eigenvectors of A  */
                double           *lambda, /* ritz approximation to eigenvals of A */
                double           *bound,  /* on ritz pair approximations to eig pairs of A */
                double            eigtol, /* tolerance on eigenvectors */
                double           *vwsqrt, /* square root of vertex weights */
                double            maxdeg, /* maximum degree of graph */
                int               version /* 1 = standard mode, 2 = inverse operator mode */
)

{
  extern FILE     *Output_File;         /* output file or NULL */
  extern int       DEBUG_EVECS;         /* print debugging output? */
  extern int       DEBUG_TRACE;         /* trace main execution path */
  extern int       WARNING_EVECS;       /* print warning messages? */
  extern int       LANCZOS_MAXITNS;     /* maximum Lanczos iterations allowed */
  extern double    BISECTION_SAFETY;    /* safety factor for bisection algorithm */
  extern double    SRESTOL;             /* resid tol for T evec comp */
  extern double    DOUBLE_MAX;          /* Warning on inaccurate computation of evec of T */
  extern double    splarax_time;        /* time matvecs */
  extern double    orthog_time;         /* time orthogonalization work */
  extern double    tevec_time;          /* time tridiagonal eigvec work */
  extern double    evec_time;           /* time to generate eigenvectors */
  extern double    ql_time;             /* time tridiagonal eigval work */
  extern double    blas_time;           /* time for blas (not assembly coded) */
  extern double    init_time;           /* time for allocating memory, etc. */
  extern double    scan_time;           /* time for scanning bounds list */
  extern double    debug_time;          /* time for debug computations and output */
  int              i, j;                /* indices */
  int              maxj;                /* maximum number of Lanczos iterations */
  double          *u, *r;               /* Lanczos vectors */
  double          *Aq;                  /* sparse matrix-vector product vector */
  double          *alpha, *beta;        /* the Lanczos scalars from each step */
  double          *ritz;                /* copy of alpha for tqli */
  double          *workj;               /* work vector (eg. for tqli) */
  double          *workn;               /* work vector (eg. for checkeig) */
  double          *s;                   /* eigenvector of T */
  double         **q;                   /* columns of q = Lanczos basis vectors */
  double          *bj;                  /* beta(j)*(last element of evecs of T) */
  double           bis_safety;          /* real safety factor for bisection algorithm */
  double           Sres;                /* how well Tevec calculated eigvecs */
  double           Sres_max;            /* Maximum value of Sres */
  int              inc_bis_safety;      /* need to increase bisection safety */
  double          *Ares;                /* how well Lanczos calculated each eigpair */
  double          *inv_lambda;          /* eigenvalues of inverse operator */
  int             *index;               /* the Ritz index of an eigenpair */
  struct orthlink *orthlist  = NULL;    /* vectors to orthogonalize against in Lanczos */
  struct orthlink *orthlist2 = NULL;    /* vectors to orthogonalize against in Symmlq */
  struct orthlink *temp;                /* for expanding orthogonalization list */
  double          *ritzvec = NULL;      /* ritz vector for current iteration */
  double          *zeros   = NULL;      /* vector of all zeros */
  double          *ones    = NULL;      /* vector of all ones */
  struct scanlink *scanlist;            /* list of fields for min ritz vals */
  struct scanlink *curlnk;              /* for traversing the scanlist */
  double           bji_tol;             /* tol on bji estimate of A e-residual */
  int              converged;           /* has the iteration converged? */
  double           time;                /* current clock time */
  double           shift, rtol;         /* symmlq input */
  long             precon, goodb, nout; /* symmlq input */
  long             checka, intlim;      /* symmlq input */
  double           anorm, acond;        /* symmlq output */
  double           rnorm, ynorm;        /* symmlq output */
  long             istop, itn;          /* symmlq output */
  double           macheps;             /* machine precision calculated by symmlq */
  double           normxlim;            /* a stopping criteria for symmlq */
  long             itnmin;              /* enforce minimum number of iterations */
  int              symmlqitns = 0;      /* # symmlq itns */
  double          *wv1 = NULL, *wv2 = NULL, *wv3 = NULL; /* Symmlq work space */
  double          *wv4 = NULL, *wv5 = NULL, *wv6 = NULL; /* Symmlq work space */
  long             long_n;                               /* long int copy of n for symmlq */
  int              ritzval_flag = 0;                     /* status flag for ql() */
  double           Anorm;                                /* Norm estimate of the Laplacian matrix */
  int              left, right;                          /* ranges on the search for ritzvals */
  int              memory_ok; /* TRUE as long as don't run out of memory */

  if (DEBUG_TRACE > 0) {
    printf("<Entering lanczos_FO>\n");
  }

  if (DEBUG_EVECS > 0) {
    if (version == 1) {
      printf("Full orthogonalization Lanczos, matrix size = %d\n", n);
    }
    else {
      printf("Full orthogonalization Lanczos, inverted operator, matrix size = %d\n", n);
    }
  }

  /* Initialize time. */
  time = lanc_seconds();

  if (n < d + 1) {
    bail("ERROR: System too small for number of eigenvalues requested.", 1);
    /* d+1 since don't use zero eigenvalue pair */
  }

  /* Allocate Lanczos space. */
  maxj    = LANCZOS_MAXITNS;
  u       = mkvec(1, n);
  r       = mkvec(1, n);
  Aq      = mkvec(1, n);
  ritzvec = mkvec(1, n);
  zeros   = mkvec(1, n);
  setvec(zeros, 1, n, 0.0);
  workn      = mkvec(1, n);
  Ares       = mkvec(1, d);
  inv_lambda = mkvec(1, d);
  index      = smalloc((d + 1) * sizeof(int));
  alpha      = mkvec(1, maxj);
  beta       = mkvec(1, maxj + 1);
  ritz       = mkvec(1, maxj);
  s          = mkvec(1, maxj);
  bj         = mkvec(1, maxj);
  workj      = mkvec(1, maxj + 1);
  q          = smalloc((maxj + 1) * sizeof(double *));
  scanlist   = mkscanlist(d);

  if (version == 2) {
    /* Allocate Symmlq space all in one chunk. */
    wv1 = smalloc(6 * (n + 1) * sizeof(double));
    wv2 = &wv1[(n + 1)];
    wv3 = &wv1[2 * (n + 1)];
    wv4 = &wv1[3 * (n + 1)];
    wv5 = &wv1[4 * (n + 1)];
    wv6 = &wv1[5 * (n + 1)];

    /* Set invariant symmlq parameters */
    precon     = FALSE;      /* FALSE until we figure out a good way */
    goodb      = FALSE;      /* should be FALSE for this application */
    checka     = FALSE;      /* if don't know by now, too bad */
    intlim     = n;          /* set to enforce a maximum number of Symmlq itns */
    itnmin     = 0;          /* set to enforce a minimum number of Symmlq itns */
    shift      = 0.0;        /* since just solving rather than doing RQI */
    symmlqitns = 0;          /* total number of Symmlq iterations */
    nout       = 0;          /* Effectively disabled - see notes in symmlq.f */
    rtol       = 1.0e-5;     /* requested residual tolerance */
    normxlim   = DOUBLE_MAX; /* Effectively disables ||x|| termination criterion */
    long_n     = n;          /* copy to long for linting */
  }

  /* Initialize. */
  vecran(r, 1, n);
  if (vwsqrt == NULL) {
    /* whack one's direction from initial vector */
    orthog1(r, 1, n);

    /* list the ones direction for later use in Symmlq */
    if (version == 2) {
      orthlist2 = makeorthlnk();
      ones      = mkvec(1, n);
      setvec(ones, 1, n, 1.0);
      orthlist2->vec  = ones;
      orthlist2->pntr = NULL;
    }
  }
  else {
    /* whack vwsqrt direction from initial vector */
    orthogvec(r, 1, n, vwsqrt);

    if (version == 2) {
      /* list the vwsqrt direction for later use in Symmlq */
      orthlist2       = makeorthlnk();
      orthlist2->vec  = vwsqrt;
      orthlist2->pntr = NULL;
    }
  }
  beta[1]        = ch_norm(r, 1, n);
  q[0]           = zeros;
  bji_tol        = eigtol;
  orthlist       = NULL;
  Sres_max       = 0.0;
  Anorm          = 2 * maxdeg; /* Gershgorin estimate for ||A|| */
  bis_safety     = BISECTION_SAFETY;
  inc_bis_safety = FALSE;
  init_time += lanc_seconds() - time;

  /* Main Lanczos loop. */
  j         = 1;
  converged = FALSE;
  memory_ok = TRUE;
  while ((j <= maxj) && (converged == FALSE) && memory_ok) {
    time = lanc_seconds();

    /* Allocate next Lanczos vector. If fail, back up one step and compute approx. eigvec. */
    q[j] = mkvec_ret(1, n);
    if (q[j] == NULL) {
      memory_ok = FALSE;
      if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
        strout("WARNING: Lanczos out of memory; computing best approximation available.\n");
      }
      if (j <= 2) {
        bail("ERROR: Sorry, can't salvage Lanczos.", 1);
        /* ... save yourselves, men.  */
      }
      j--;
    }

    vecscale(q[j], 1, n, 1.0 / beta[j], r);
    blas_time += lanc_seconds() - time;
    time = lanc_seconds();
    if (version == 1) {
      splarax(Aq, A, n, q[j], vwsqrt, workn);
    }
    else if (version == 2) {
      symmlq(&long_n, &(q[j][1]), &wv1[1], &wv2[1], &wv3[1], &wv4[1], &Aq[1], &wv5[1], &wv6[1],
             &checka, &goodb, &precon, &shift, &nout, &intlim, &rtol, &istop, &itn, &anorm, &acond,
             &rnorm, &ynorm, (double *)A, vwsqrt, (double *)orthlist2, &macheps, &normxlim,
             &itnmin);
      symmlqitns += itn;
      if (DEBUG_EVECS > 2) {
        printf("Symmlq report:      rtol %g\n", rtol);
        printf("  system norm %g, solution norm %g\n", anorm, ynorm);
        printf("  system condition %g, residual %g\n", acond, rnorm);
        printf("  termination condition %2ld, iterations %3ld\n", istop, itn);
      }
    }
    splarax_time += lanc_seconds() - time;
    time = lanc_seconds();
    update(u, 1, n, Aq, -beta[j], q[j - 1]);
    alpha[j] = dot(u, 1, n, q[j]);
    update(r, 1, n, u, -alpha[j], q[j]);
    blas_time += lanc_seconds() - time;
    time = lanc_seconds();
    if (vwsqrt == NULL) {
      orthog1(r, 1, n);
    }
    else {
      orthogvec(r, 1, n, vwsqrt);
    }
    orthogonalize(r, n, orthlist);
    temp           = orthlist;
    orthlist       = makeorthlnk();
    orthlist->vec  = q[j];
    orthlist->pntr = temp;
    beta[j + 1]    = ch_norm(r, 1, n);
    orthog_time += lanc_seconds() - time;

    time  = lanc_seconds();
    left  = j / 2;
    right = j - left + 1;
    if (inc_bis_safety) {
      bis_safety *= 10;
      inc_bis_safety = FALSE;
    }
    ritzval_flag = get_ritzvals(alpha, beta + 1, j, Anorm, workj + 1, ritz, d, left, right, eigtol,
                                bis_safety);
    /* ... have to off-set beta and workj since full orthogonalization
           indexes these from 1 to maxj+1 whereas selective orthog.
           indexes them from 0 to maxj */

    if (ritzval_flag != 0) {
      bail("ERROR: Both Sturm bisection and QL failed.", 1);
      /* ... give up. */
    }
    ql_time += lanc_seconds() - time;

    /* Convergence check using Paige bji estimates. */
    time = lanc_seconds();
    for (i = 1; i <= j; i++) {
      Sres = Tevec(alpha, beta, j, ritz[i], s);
      if (Sres > Sres_max) {
        Sres_max = Sres;
      }
      if (Sres > SRESTOL) {
        inc_bis_safety = TRUE;
      }
      bj[i] = s[j] * beta[j + 1];
    }
    tevec_time += lanc_seconds() - time;

    time = lanc_seconds();
    if (version == 1) {
      scanmin(ritz, 1, j, &scanlist);
    }
    else {
      scanmax(ritz, 1, j, &scanlist);
    }
    converged = TRUE;
    if (j < d) {
      converged = FALSE;
    }
    else {
      curlnk = scanlist;
      while (curlnk != NULL) {
        if (bj[curlnk->indx] > bji_tol) {
          converged = FALSE;
        }
        curlnk = curlnk->pntr;
      }
    }
    scan_time += lanc_seconds() - time;
    j++;
  }
  j--;

  /* Collect eigenvalue and bound information. */
  time = lanc_seconds();
  mkeigvecs(scanlist, lambda, bound, index, bj, d, &Sres_max, alpha, beta + 1, j, s, y, n, q);
  evec_time += lanc_seconds() - time;

  /* Analyze computation for and report additional problems */
  time = lanc_seconds();
  if (DEBUG_EVECS > 0 && version == 2) {
    printf("\nTotal Symmlq iterations %3d\n", symmlqitns);
  }
  if (version == 2) {
    for (i = 1; i <= d; i++) {
      lambda[i] = 1.0 / lambda[i];
    }
  }
  warnings(workn, A, y, n, lambda, vwsqrt, Ares, bound, index, d, j, maxj, Sres_max, eigtol, u,
           Anorm, Output_File);
  debug_time += lanc_seconds() - time;

  /* Free any memory allocated in this routine. */
  time = lanc_seconds();
  frvec(u, 1);
  frvec(r, 1);
  frvec(Aq, 1);
  frvec(ritzvec, 1);
  frvec(zeros, 1);
  if (vwsqrt == NULL && version == 2) {
    frvec(ones, 1);
  }
  frvec(workn, 1);
  frvec(Ares, 1);
  frvec(inv_lambda, 1);
  sfree(index);
  frvec(alpha, 1);
  frvec(beta, 1);
  frvec(ritz, 1);
  frvec(s, 1);
  frvec(bj, 1);
  frvec(workj, 1);
  if (version == 2) {
    frvec(wv1, 0);
  }
  while (scanlist != NULL) {
    curlnk = scanlist->pntr;
    sfree(scanlist);
    scanlist = curlnk;
  }
  for (i = 1; i <= j; i++) {
    frvec(q[i], 1);
  }
  while (orthlist != NULL) {
    temp = orthlist->pntr;
    sfree(orthlist);
    orthlist = temp;
  }
  while (version == 2 && orthlist2 != NULL) {
    temp = orthlist2->pntr;
    sfree(orthlist2);
    orthlist2 = temp;
  }
  sfree(q);
  init_time += lanc_seconds() - time;
}