EXODUS/packages/seacas/libraries/chaco/klspiff/nway_kl.c

/*
 * Copyright(C) 1999-2020, 2022, 2023 National Technology & Engineering Solutions
 * of Sandia, LLC (NTESS).  Under the terms of Contract DE-NA0003525 with
 * NTESS, the U.S. Government retains certain rights in this software.
 *
 * See packages/seacas/LICENSE for details
 */

#include "defs.h"
#include "params.h"
#include "smalloc.h"
#include "structs.h"
#include <math.h>
#include <stdio.h>

/* Idea:
   'buckets[i][j]' is a set of buckets to sort moves from i to j.
   listspace[i] is space for lists in buckets[i][j].
   Loop through all nonequal pairs [i][j], taking the first element
   in each list.  Compare them all to find the largest allowed move.
   Make that move, and save it in movelist.
*/

int KL_MAX_PASS = -1; /* max KL passes; infinite if <= 0 */

int nway_kl(struct vtx_data **graph,       /* data structure for graph */
            int               nvtxs,       /* number of vtxs in graph */
            struct bilist ****buckets,     /* array of lists for bucket sort */
            struct bilist   **listspace,   /* list data structure for each vertex */
            int             **tops,        /* 2-D array of top of each set of buckets */
            int             **dvals,       /* d-values for each transition */
            int              *sets,        /* processor each vertex is assigned to */
            int               maxdval,     /* maximum d-value for a vertex */
            int               nsets,       /* number of sets divided into */
            double           *goal,        /* desired set sizes */
            float            *term_wgts[], /* weights for terminal propagation */
            int (*hops)[MAXSETS],          /* cost of set transitions */
            int     max_dev,               /* largest allowed deviation from balance */
            int     using_ewgts,           /* are edge weights being used? */
            int   **bndy_list,             /* list of vertices on boundary (0 ends) */
            double *startweight            /* sum of vweights in each set (in and out) */
)

/* Suaris and Kedem algorithm for quadrisection, generalized to an */
/* arbitrary number of sets, with intra-set cost function specified by hops. */
/* Note: this is for a single divide step. */
/* Also, sets contains an initial (possibly crummy) partitioning. */

{
  extern double   kl_bucket_time;             /* time spent in KL bucketsort */
  extern int      KL_BAD_MOVES;               /* # bad moves in a row to stop KL */
  extern int      DEBUG_KL;                   /* debug flag for KL */
  extern int      KL_RANDOM;                  /* use randomness in KL? */
  extern int      KL_NTRIES_BAD;              /* number of unhelpful passes before quitting */
  extern int      KL_UNDO_LIST;               /* should I back out of changes or start over? */
  extern int      KL_MAX_PASS;                /* maximum number of outer KL loops */
  extern double   CUT_TO_HOP_COST;            /* if term_prop; cut/hop importance */
  struct bilist  *movelist;                   /* list of vtxs to be moved */
  struct bilist **endlist;                    /* end of movelists */
  struct bilist  *bestptr;                    /* best vertex in linked list */
  struct bilist  *bptr;                       /* loops through bucket list */
  float          *ewptr     = NULL;           /* loops through edge weights */
  double         *locked    = NULL;           /* weight of vertices locked in a set */
  double         *loose     = NULL;           /* weight of vtxs that can move from a set */
  int            *bspace    = NULL;           /* list of active vertices for bucketsort */
  double         *weightsum = NULL;           /* sum of vweights for each partition */
  int            *edges     = NULL;           /* edge list for a vertex */
  int            *bdy_ptr   = NULL;           /* loops through bndy_list */
  double          time;                       /* timing parameter */
  double          delta;                      /* desire of sets to change size */
  double          bestdelta = -1;             /* strongest delta value */
  double          deltaplus;                  /* largest negative deviation from goal size */
  double          deltaminus;                 /* largest negative deviation from goal size */
  int             list_length;                /* how long is list of vertices to bucketsort? */
  int             balanced;                   /* is partition balanced? */
  int             temp_balanced;              /* is intermediate partition balanced? */
  int             ever_balanced;              /* has any partition been balanced? */
  int             bestvtx  = -1;              /* best vertex to move */
  int             bestval  = -1;              /* best change in value for a vtx move */
  int             bestfrom = -1, bestto = -1; /* sets best vertex moves between */
  int             vweight;                    /* weight of best vertex */
  int             gtotal;                     /* sum of changes from moving */
  int             improved;                   /* total improvement from KL */
  double          balance_val = 0.0;          /* how imbalanced is it */
  double          balance_best;               /* best balance yet if trying hard */
  double          bestg;                      /* maximum gtotal found in KL loop */
  double          bestg_min;                  /* smaller than any possible bestg */
  int             beststep;                   /* step where maximum value occurred */
  int             neighbor;                   /* neighbor of a vertex */
  int             step_cutoff;                /* number of negative steps in a row allowed */
  int             cost_cutoff;                /* amount of negative d-values allowed */
  int             neg_steps;                  /* number of negative steps in a row */
  int             neg_cost;                   /* decrease in sum of d-values */
  int             vtx;                        /* vertex number */
  int             dval;                       /* dval of a vertex */
  int             group;                      /* set that a vertex is assigned to */
  double          cut_cost;                   /* if term_prop; relative cut/hop importance */
  int             diff;                       /* change in a d-value */
  int             stuck1st, stuck2nd;         /* how soon will moves be disallowed? */
  int             beststuck1 = -1, beststuck2 = -1; /* best stuck values for tie-breaking */
  int             eweight;                          /* a particular edge weight */
  int             worth_undoing;                    /* is it worth undoing list? */
  float           undo_frac;            /* fraction of vtxs indicating worth of undoing */
  int             step;                 /* loops through movements of vertices */
  int             parity;               /* sort forwards or backwards? */
  int             done;                 /* has termination criteria been achieved? */
  int             nbad;                 /* number of unhelpful passes in a row */
  int             npass;                /* total number of passes */
  int             nbadtries;            /* number of unhelpful passes before quitting */
  int             enforce_balance;      /* force a balanced partition? */
  int             enforce_balance_hard; /* really force a balanced partition? */
  int             balance_trouble;      /* even balance_hard isn't working */
  int             size;                 /* array spacing */
  int             i, j, k, l;           /* loop counters */

  nbadtries = KL_NTRIES_BAD;

  enforce_balance      = FALSE;
  temp_balanced        = FALSE;
  enforce_balance_hard = FALSE;
  balance_trouble      = FALSE;

  size = (int)(&(listspace[0][1]) - &(listspace[0][0]));

  undo_frac = .3;

  cut_cost = 1;
  if (term_wgts[1] != NULL) {
    if (CUT_TO_HOP_COST > 1) {
      cut_cost = CUT_TO_HOP_COST;
    }
  }

  bspace    = smalloc_ret(nvtxs * sizeof(int));
  weightsum = smalloc_ret(nsets * sizeof(double));
  locked    = smalloc_ret(nsets * sizeof(double));
  loose     = smalloc_ret(nsets * sizeof(double));

  if (bspace == NULL || weightsum == NULL || locked == NULL || loose == NULL) {
    sfree(loose);
    sfree(locked);
    sfree(weightsum);
    sfree(bspace);
    return (1);
  }

  if (*bndy_list != NULL) {
    bdy_ptr     = *bndy_list;
    list_length = 0;
    while (*bdy_ptr != 0) {
      bspace[list_length++] = *bdy_ptr++;
    }
    sfree(*bndy_list);

    if (list_length == 0) { /* No boundary -> make everybody bndy. */
      for (i = 0; i < nvtxs; i++) {
        bspace[i] = i + 1;
      }
      list_length = nvtxs;
    }
    /* Set dvals to flag uninitialized vertices. */
    for (i = 1; i <= nvtxs; i++) {
      dvals[i][0] = 3 * maxdval;
    }
  }
  else {
    list_length = nvtxs;
  }

  step_cutoff = KL_BAD_MOVES;
  cost_cutoff = maxdval * step_cutoff / 7;
  if (cost_cutoff < step_cutoff) {
    cost_cutoff = step_cutoff;
  }

  deltaminus = deltaplus = 0;
  for (i = 0; i < nsets; i++) {
    if (startweight[i] - goal[i] > deltaplus) {
      deltaplus = startweight[i] - goal[i];
    }
    else if (goal[i] - startweight[i] > deltaminus) {
      deltaminus = goal[i] - startweight[i];
    }
  }
  balanced = (deltaplus + deltaminus <= max_dev);

  bestg_min = -2.0 * nvtxs * maxdval;
  parity    = FALSE;
  eweight   = cut_cost + .5;
  nbad      = 0;
  npass     = 0;
  improved  = 0;
  done      = FALSE;
  while (!done) {
    npass++;
    ever_balanced = FALSE;

    /* Initialize various quantities. */
    balance_best = 0;
    for (i = 0; i < nsets; i++) {
      for (j = 0; j < nsets; j++) {
        tops[i][j] = 2 * maxdval;
      }
      weightsum[i] = startweight[i];
      loose[i]     = weightsum[i];
      locked[i]    = 0;
      balance_best += goal[i];
    }

    gtotal   = 0;
    bestg    = bestg_min;
    beststep = -1;

    movelist = NULL;
    endlist  = &movelist;

    neg_steps = 0;

    /* Compute the initial d-values, and bucket-sort them. */
    time = seconds();
    if (nsets == 2) {
      bucketsorts_bi(graph, nvtxs, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets,
                     parity, hops, bspace, list_length, npass, using_ewgts);
    }
    else {
      bucketsorts(graph, nvtxs, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets, parity,
                  hops, bspace, list_length, npass, using_ewgts);
    }
    parity = !parity;
    kl_bucket_time += seconds() - time;

    if (DEBUG_KL > 2) {
      pbuckets(buckets, listspace, maxdval, nsets);
    }

    /* Now determine the set of K-L moves. */

    for (step = 1;; step++) {

      /* Find the highest d-value in each set. */
      /* But only consider moves from large to small sets, or moves */
      /* in which balance is preserved. */
      /* Break ties in some nonarbitrary manner. */
      bestval = -maxdval - 1;
      for (i = 0; i < nsets; i++) {
        for (j = 0; j < nsets; j++) {
          /* Only allow moves from large sets to small sets, or */
          /* moves which preserve balance. */
          if (i != j) {
            /* Find the best move from i to j. */
            for (k = tops[i][j]; k >= 0 && buckets[i][j][k] == NULL; k--) {
              ;
            }
            tops[i][j] = k;

            if (k >= 0) {
              l       = (j > i) ? j - 1 : j;
              vtx     = ((int)(buckets[i][j][k] - listspace[l])) / size;
              vweight = graph[vtx]->vwgt;

              if ((enforce_balance_hard && weightsum[i] >= goal[i] && weightsum[j] <= goal[j] &&
                   weightsum[i] - goal[i] - (weightsum[j] - goal[j]) > max_dev) ||
                  (!enforce_balance_hard && weightsum[i] >= goal[i] && weightsum[j] <= goal[j]) ||
                  (!enforce_balance_hard &&
                   weightsum[i] - vweight - goal[i] > -(double)((max_dev + 1) / 2) &&
                   weightsum[j] + vweight - goal[j] < (double)((max_dev + 1) / 2))) {

                /* Is it the best move seen so far? */
                if (k - maxdval > bestval) {
                  bestval = k - maxdval;
                  bestvtx = vtx;
                  bestto  = j;
                  /* DO I NEED ALL THIS DATA?  Just to break ties. */
                  bestdelta = fabs(weightsum[i] - vweight - goal[i]) +
                              fabs(weightsum[j] + vweight - goal[j]);
                  beststuck1 = min(loose[i], goal[j] - locked[j]);
                  beststuck2 = max(loose[i], goal[j] - locked[j]);
                }

                else if (k - maxdval == bestval) {
                  /* Tied.  Is better balanced than current best? */
                  /* If tied, move among sets with most freedom. */
                  stuck1st = min(loose[i], goal[j] - locked[j]);
                  stuck2nd = max(loose[i], goal[j] - locked[j]);
                  delta    = fabs(weightsum[i] - vweight - goal[i]) +
                          fabs(weightsum[j] + vweight - goal[j]);

                  /* NOTE: Randomization in this check isn't ideal */
                  /* if more than two guys are tied. */
                  if (delta < bestdelta ||
                      (delta == bestdelta &&
                       (stuck1st > beststuck1 ||
                        (stuck1st == beststuck1 &&
                         (stuck2nd > beststuck2 ||
                          (stuck2nd == beststuck2 && (KL_RANDOM && drandom() < .5))))))) {
                    bestval    = k - maxdval;
                    bestvtx    = vtx;
                    bestto     = j;
                    bestdelta  = delta;
                    beststuck1 = stuck1st;
                    beststuck2 = stuck2nd;
                  }
                }
              }
            }
          }
        }
      }
      if (bestval == -maxdval - 1) { /* No allowed moves */
        if (DEBUG_KL > 0) {
          printf("No KL moves at step %d.  bestg = %g at step %d.\n", step, bestg, beststep);
        }
        break;
      }

      bestptr  = &(listspace[0][bestvtx]);
      bestfrom = sets[bestvtx];

      vweight = graph[bestvtx]->vwgt;
      weightsum[bestto] += vweight;
      weightsum[bestfrom] -= vweight;
      loose[bestfrom] -= vweight;
      locked[bestto] += vweight;

      if (enforce_balance) { /* Check if this partition is balanced. */
        deltaminus = deltaplus = 0;
        for (i = 0; i < nsets; i++) {
          if (weightsum[i] - goal[i] > deltaplus) {
            deltaplus = weightsum[i] - goal[i];
          }
          else if (goal[i] - weightsum[i] > deltaminus) {
            deltaminus = goal[i] - weightsum[i];
          }
        }
        balance_val   = deltaminus + deltaplus;
        temp_balanced = (balance_val <= max_dev);
        ever_balanced = (ever_balanced || temp_balanced);
      }

      gtotal += bestval;
      if (((gtotal > bestg && (!enforce_balance || temp_balanced)) ||
           (enforce_balance_hard && balance_val < balance_best)) &&
          step != nvtxs) {
        bestg    = gtotal;
        beststep = step;
        if (enforce_balance_hard) {
          balance_best = balance_val;
        }
        if (temp_balanced) {
          enforce_balance_hard = FALSE;
        }
      }

      if (DEBUG_KL > 1) {
        printf("At KL step %d, bestvtx=%d, bestval=%d (%d-> %d)\n", step, bestvtx, bestval,
               bestfrom, bestto);
      }

      /* Monitor the stopping criteria. */
      if (bestval < 0) {
        if (!enforce_balance || ever_balanced) {
          neg_steps++;
        }
        if (bestg != bestg_min) {
          neg_cost = bestg - gtotal;
        }
        else {
          neg_cost = -maxdval - 1;
        }
        if ((neg_steps > step_cutoff || neg_cost > cost_cutoff) &&
            !(enforce_balance && bestg == bestg_min) && (beststep != step)) {
          if (DEBUG_KL > 0) {
            if (neg_steps > step_cutoff) {
              printf("KL step cutoff at step %d.  bestg = %g at step %d.\n", step, bestg, beststep);
            }
            else if (neg_cost > cost_cutoff) {
              printf("KL cost cutoff at step %d.  bestg = %g at step %d.\n", step, bestg, beststep);
            }
          }
          break;
        }
      }
      else if (bestval > 0) {
        neg_steps = 0;
      }

      /* Remove vertex from its buckets, and flag it as finished. */
      l = 0;
      for (k = 0; k < nsets; k++) {
        if (k != bestfrom) {
          dval = dvals[bestvtx][l] + maxdval;
          removebilist(&listspace[l][bestvtx], &buckets[bestfrom][k][dval]);
          l++;
        }
      }

      /* Is there a better way to do this? */
      sets[bestvtx] = -sets[bestvtx] - 1;

      /* Set up the linked list of moved vertices. */
      bestptr->next = NULL;
      bestptr->prev = (struct bilist *)(unsigned long long)bestto;
      *endlist      = bestptr;
      endlist       = &(bestptr->next);

      /* Now update the d-values of all the neighbors */
      edges = graph[bestvtx]->edges;
      if (using_ewgts) {
        ewptr = graph[bestvtx]->ewgts;
      }
      for (j = graph[bestvtx]->nedges - 1; j; j--) {
        neighbor = *(++edges);
        if (using_ewgts) {
          eweight = *(++ewptr) * cut_cost + .5;
        }

        /* First make sure neighbor is alive. */
        if (sets[neighbor] >= 0) {
          group = sets[neighbor];

          if (dvals[neighbor][0] >= 3 * maxdval) {
            /* New vertex, not yet in buckets. */
            /* Can't be neighbor of moved vtx, so compute */
            /* initial dvals and buckets, then update. */
            bucketsort1(graph, neighbor, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets,
                        hops, using_ewgts);
          }

          l = 0;
          for (k = 0; k < nsets; k++) {
            if (k != group) {
              diff = eweight * (hops[k][bestfrom] - hops[group][bestfrom] + hops[group][bestto] -
                                hops[k][bestto]);
              dval = dvals[neighbor][l] + maxdval;
              movebilist(&listspace[l][neighbor], &buckets[group][k][dval],
                         &buckets[group][k][dval + diff]);
              dvals[neighbor][l] += diff;
              dval += diff;
              if (dval > tops[group][k]) {
                tops[group][k] = dval;
              }
              l++;
            }
          }
        }
      }
      if (DEBUG_KL > 2) {
        pbuckets(buckets, listspace, maxdval, nsets);
      }
    }

    /* Done with a pass; should we actually perform any swaps? */
    bptr = movelist;
    if (bestg > 0 || (bestg != bestg_min && !balanced && enforce_balance) ||
        (bestg != bestg_min && balance_trouble)) {
      improved += bestg;
      for (i = 1; i <= beststep; i++) {
        vtx    = ((int)(bptr - listspace[0])) / size;
        bestto = (int)(unsigned long long)bptr->prev;
        startweight[bestto] += graph[vtx]->vwgt;
        startweight[-sets[vtx] - 1] -= graph[vtx]->vwgt;
        sets[vtx] = bestto;
        bptr      = bptr->next;
      }

      deltaminus = deltaplus = 0;
      for (i = 0; i < nsets; i++) {
        if (startweight[i] - goal[i] > deltaplus) {
          deltaplus = startweight[i] - goal[i];
        }
        else if (goal[i] - startweight[i] > deltaminus) {
          deltaminus = goal[i] - startweight[i];
        }
      }
      /*
      printf(" deltaplus = %f, deltaminus = %f, max_dev = %d\n", deltaplus, deltaminus, max_dev);
      */
      balanced = (deltaplus + deltaminus <= max_dev);
    }
    else {
      nbad++;
    }

    if (!balanced || bptr == movelist) {
      if (enforce_balance) {
        if (enforce_balance_hard) {
          balance_trouble = TRUE;
        }
        enforce_balance_hard = TRUE;
      }
      enforce_balance = TRUE;
      nbad++;
    }

    worth_undoing = (step < undo_frac * nvtxs);
    done          = (nbad >= nbadtries && balanced);
    if (KL_MAX_PASS > 0) {
      done = done || (npass == KL_MAX_PASS && balanced);
    }
    if (!done) { /* Prepare for next pass. */
      if (KL_UNDO_LIST && worth_undoing && !balance_trouble) {
        /* Make a list of modified vertices for next bucketsort. */
        /* Also, ensure these vertices are removed from their buckets. */
        list_length =
            make_kl_list(graph, movelist, buckets, listspace, sets, nsets, bspace, dvals, maxdval);
      }
    }
    if (done || !(KL_UNDO_LIST && worth_undoing && !balance_trouble)) {
      /* Restore set numbers of remaining, altered vertices. */
      while (bptr != NULL) {
        vtx       = ((int)(bptr - listspace[0])) / size;
        sets[vtx] = -sets[vtx] - 1;
        bptr      = bptr->next;
      }
      list_length = nvtxs;
    }

    if (done && *bndy_list != NULL) {
      make_bndy_list(graph, movelist, buckets, listspace, sets, nsets, bspace, tops, bndy_list);
    }
  }

  if (DEBUG_KL > 0) {
    printf("   KL required %d passes to improve by %d.\n", npass, improved);
  }

  sfree(loose);
  sfree(locked);
  sfree(weightsum);
  sfree(bspace);
  return (0);
}