You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
532 lines
21 KiB
532 lines
21 KiB
/*
|
|
* Copyright(C) 1999-2020, 2022, 2023 National Technology & Engineering Solutions
|
|
* of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
|
* NTESS, the U.S. Government retains certain rights in this software.
|
|
*
|
|
* See packages/seacas/LICENSE for details
|
|
*/
|
|
|
|
#include "defs.h"
|
|
#include "params.h"
|
|
#include "smalloc.h"
|
|
#include "structs.h"
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
|
|
/* Idea:
|
|
'buckets[i][j]' is a set of buckets to sort moves from i to j.
|
|
listspace[i] is space for lists in buckets[i][j].
|
|
Loop through all nonequal pairs [i][j], taking the first element
|
|
in each list. Compare them all to find the largest allowed move.
|
|
Make that move, and save it in movelist.
|
|
*/
|
|
|
|
int KL_MAX_PASS = -1; /* max KL passes; infinite if <= 0 */
|
|
|
|
int nway_kl(struct vtx_data **graph, /* data structure for graph */
|
|
int nvtxs, /* number of vtxs in graph */
|
|
struct bilist ****buckets, /* array of lists for bucket sort */
|
|
struct bilist **listspace, /* list data structure for each vertex */
|
|
int **tops, /* 2-D array of top of each set of buckets */
|
|
int **dvals, /* d-values for each transition */
|
|
int *sets, /* processor each vertex is assigned to */
|
|
int maxdval, /* maximum d-value for a vertex */
|
|
int nsets, /* number of sets divided into */
|
|
double *goal, /* desired set sizes */
|
|
float *term_wgts[], /* weights for terminal propagation */
|
|
int (*hops)[MAXSETS], /* cost of set transitions */
|
|
int max_dev, /* largest allowed deviation from balance */
|
|
int using_ewgts, /* are edge weights being used? */
|
|
int **bndy_list, /* list of vertices on boundary (0 ends) */
|
|
double *startweight /* sum of vweights in each set (in and out) */
|
|
)
|
|
|
|
/* Suaris and Kedem algorithm for quadrisection, generalized to an */
|
|
/* arbitrary number of sets, with intra-set cost function specified by hops. */
|
|
/* Note: this is for a single divide step. */
|
|
/* Also, sets contains an initial (possibly crummy) partitioning. */
|
|
|
|
{
|
|
extern double kl_bucket_time; /* time spent in KL bucketsort */
|
|
extern int KL_BAD_MOVES; /* # bad moves in a row to stop KL */
|
|
extern int DEBUG_KL; /* debug flag for KL */
|
|
extern int KL_RANDOM; /* use randomness in KL? */
|
|
extern int KL_NTRIES_BAD; /* number of unhelpful passes before quitting */
|
|
extern int KL_UNDO_LIST; /* should I back out of changes or start over? */
|
|
extern int KL_MAX_PASS; /* maximum number of outer KL loops */
|
|
extern double CUT_TO_HOP_COST; /* if term_prop; cut/hop importance */
|
|
struct bilist *movelist; /* list of vtxs to be moved */
|
|
struct bilist **endlist; /* end of movelists */
|
|
struct bilist *bestptr; /* best vertex in linked list */
|
|
struct bilist *bptr; /* loops through bucket list */
|
|
float *ewptr = NULL; /* loops through edge weights */
|
|
double *locked = NULL; /* weight of vertices locked in a set */
|
|
double *loose = NULL; /* weight of vtxs that can move from a set */
|
|
int *bspace = NULL; /* list of active vertices for bucketsort */
|
|
double *weightsum = NULL; /* sum of vweights for each partition */
|
|
int *edges = NULL; /* edge list for a vertex */
|
|
int *bdy_ptr = NULL; /* loops through bndy_list */
|
|
double time; /* timing parameter */
|
|
double delta; /* desire of sets to change size */
|
|
double bestdelta = -1; /* strongest delta value */
|
|
double deltaplus; /* largest negative deviation from goal size */
|
|
double deltaminus; /* largest negative deviation from goal size */
|
|
int list_length; /* how long is list of vertices to bucketsort? */
|
|
int balanced; /* is partition balanced? */
|
|
int temp_balanced; /* is intermediate partition balanced? */
|
|
int ever_balanced; /* has any partition been balanced? */
|
|
int bestvtx = -1; /* best vertex to move */
|
|
int bestval = -1; /* best change in value for a vtx move */
|
|
int bestfrom = -1, bestto = -1; /* sets best vertex moves between */
|
|
int vweight; /* weight of best vertex */
|
|
int gtotal; /* sum of changes from moving */
|
|
int improved; /* total improvement from KL */
|
|
double balance_val = 0.0; /* how imbalanced is it */
|
|
double balance_best; /* best balance yet if trying hard */
|
|
double bestg; /* maximum gtotal found in KL loop */
|
|
double bestg_min; /* smaller than any possible bestg */
|
|
int beststep; /* step where maximum value occurred */
|
|
int neighbor; /* neighbor of a vertex */
|
|
int step_cutoff; /* number of negative steps in a row allowed */
|
|
int cost_cutoff; /* amount of negative d-values allowed */
|
|
int neg_steps; /* number of negative steps in a row */
|
|
int neg_cost; /* decrease in sum of d-values */
|
|
int vtx; /* vertex number */
|
|
int dval; /* dval of a vertex */
|
|
int group; /* set that a vertex is assigned to */
|
|
double cut_cost; /* if term_prop; relative cut/hop importance */
|
|
int diff; /* change in a d-value */
|
|
int stuck1st, stuck2nd; /* how soon will moves be disallowed? */
|
|
int beststuck1 = -1, beststuck2 = -1; /* best stuck values for tie-breaking */
|
|
int eweight; /* a particular edge weight */
|
|
int worth_undoing; /* is it worth undoing list? */
|
|
float undo_frac; /* fraction of vtxs indicating worth of undoing */
|
|
int step; /* loops through movements of vertices */
|
|
int parity; /* sort forwards or backwards? */
|
|
int done; /* has termination criteria been achieved? */
|
|
int nbad; /* number of unhelpful passes in a row */
|
|
int npass; /* total number of passes */
|
|
int nbadtries; /* number of unhelpful passes before quitting */
|
|
int enforce_balance; /* force a balanced partition? */
|
|
int enforce_balance_hard; /* really force a balanced partition? */
|
|
int balance_trouble; /* even balance_hard isn't working */
|
|
int size; /* array spacing */
|
|
int i, j, k, l; /* loop counters */
|
|
|
|
nbadtries = KL_NTRIES_BAD;
|
|
|
|
enforce_balance = FALSE;
|
|
temp_balanced = FALSE;
|
|
enforce_balance_hard = FALSE;
|
|
balance_trouble = FALSE;
|
|
|
|
size = (int)(&(listspace[0][1]) - &(listspace[0][0]));
|
|
|
|
undo_frac = .3;
|
|
|
|
cut_cost = 1;
|
|
if (term_wgts[1] != NULL) {
|
|
if (CUT_TO_HOP_COST > 1) {
|
|
cut_cost = CUT_TO_HOP_COST;
|
|
}
|
|
}
|
|
|
|
bspace = smalloc_ret(nvtxs * sizeof(int));
|
|
weightsum = smalloc_ret(nsets * sizeof(double));
|
|
locked = smalloc_ret(nsets * sizeof(double));
|
|
loose = smalloc_ret(nsets * sizeof(double));
|
|
|
|
if (bspace == NULL || weightsum == NULL || locked == NULL || loose == NULL) {
|
|
sfree(loose);
|
|
sfree(locked);
|
|
sfree(weightsum);
|
|
sfree(bspace);
|
|
return (1);
|
|
}
|
|
|
|
if (*bndy_list != NULL) {
|
|
bdy_ptr = *bndy_list;
|
|
list_length = 0;
|
|
while (*bdy_ptr != 0) {
|
|
bspace[list_length++] = *bdy_ptr++;
|
|
}
|
|
sfree(*bndy_list);
|
|
|
|
if (list_length == 0) { /* No boundary -> make everybody bndy. */
|
|
for (i = 0; i < nvtxs; i++) {
|
|
bspace[i] = i + 1;
|
|
}
|
|
list_length = nvtxs;
|
|
}
|
|
/* Set dvals to flag uninitialized vertices. */
|
|
for (i = 1; i <= nvtxs; i++) {
|
|
dvals[i][0] = 3 * maxdval;
|
|
}
|
|
}
|
|
else {
|
|
list_length = nvtxs;
|
|
}
|
|
|
|
step_cutoff = KL_BAD_MOVES;
|
|
cost_cutoff = maxdval * step_cutoff / 7;
|
|
if (cost_cutoff < step_cutoff) {
|
|
cost_cutoff = step_cutoff;
|
|
}
|
|
|
|
deltaminus = deltaplus = 0;
|
|
for (i = 0; i < nsets; i++) {
|
|
if (startweight[i] - goal[i] > deltaplus) {
|
|
deltaplus = startweight[i] - goal[i];
|
|
}
|
|
else if (goal[i] - startweight[i] > deltaminus) {
|
|
deltaminus = goal[i] - startweight[i];
|
|
}
|
|
}
|
|
balanced = (deltaplus + deltaminus <= max_dev);
|
|
|
|
bestg_min = -2.0 * nvtxs * maxdval;
|
|
parity = FALSE;
|
|
eweight = cut_cost + .5;
|
|
nbad = 0;
|
|
npass = 0;
|
|
improved = 0;
|
|
done = FALSE;
|
|
while (!done) {
|
|
npass++;
|
|
ever_balanced = FALSE;
|
|
|
|
/* Initialize various quantities. */
|
|
balance_best = 0;
|
|
for (i = 0; i < nsets; i++) {
|
|
for (j = 0; j < nsets; j++) {
|
|
tops[i][j] = 2 * maxdval;
|
|
}
|
|
weightsum[i] = startweight[i];
|
|
loose[i] = weightsum[i];
|
|
locked[i] = 0;
|
|
balance_best += goal[i];
|
|
}
|
|
|
|
gtotal = 0;
|
|
bestg = bestg_min;
|
|
beststep = -1;
|
|
|
|
movelist = NULL;
|
|
endlist = &movelist;
|
|
|
|
neg_steps = 0;
|
|
|
|
/* Compute the initial d-values, and bucket-sort them. */
|
|
time = seconds();
|
|
if (nsets == 2) {
|
|
bucketsorts_bi(graph, nvtxs, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets,
|
|
parity, hops, bspace, list_length, npass, using_ewgts);
|
|
}
|
|
else {
|
|
bucketsorts(graph, nvtxs, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets, parity,
|
|
hops, bspace, list_length, npass, using_ewgts);
|
|
}
|
|
parity = !parity;
|
|
kl_bucket_time += seconds() - time;
|
|
|
|
if (DEBUG_KL > 2) {
|
|
pbuckets(buckets, listspace, maxdval, nsets);
|
|
}
|
|
|
|
/* Now determine the set of K-L moves. */
|
|
|
|
for (step = 1;; step++) {
|
|
|
|
/* Find the highest d-value in each set. */
|
|
/* But only consider moves from large to small sets, or moves */
|
|
/* in which balance is preserved. */
|
|
/* Break ties in some nonarbitrary manner. */
|
|
bestval = -maxdval - 1;
|
|
for (i = 0; i < nsets; i++) {
|
|
for (j = 0; j < nsets; j++) {
|
|
/* Only allow moves from large sets to small sets, or */
|
|
/* moves which preserve balance. */
|
|
if (i != j) {
|
|
/* Find the best move from i to j. */
|
|
for (k = tops[i][j]; k >= 0 && buckets[i][j][k] == NULL; k--) {
|
|
;
|
|
}
|
|
tops[i][j] = k;
|
|
|
|
if (k >= 0) {
|
|
l = (j > i) ? j - 1 : j;
|
|
vtx = ((int)(buckets[i][j][k] - listspace[l])) / size;
|
|
vweight = graph[vtx]->vwgt;
|
|
|
|
if ((enforce_balance_hard && weightsum[i] >= goal[i] && weightsum[j] <= goal[j] &&
|
|
weightsum[i] - goal[i] - (weightsum[j] - goal[j]) > max_dev) ||
|
|
(!enforce_balance_hard && weightsum[i] >= goal[i] && weightsum[j] <= goal[j]) ||
|
|
(!enforce_balance_hard &&
|
|
weightsum[i] - vweight - goal[i] > -(double)((max_dev + 1) / 2) &&
|
|
weightsum[j] + vweight - goal[j] < (double)((max_dev + 1) / 2))) {
|
|
|
|
/* Is it the best move seen so far? */
|
|
if (k - maxdval > bestval) {
|
|
bestval = k - maxdval;
|
|
bestvtx = vtx;
|
|
bestto = j;
|
|
/* DO I NEED ALL THIS DATA? Just to break ties. */
|
|
bestdelta = fabs(weightsum[i] - vweight - goal[i]) +
|
|
fabs(weightsum[j] + vweight - goal[j]);
|
|
beststuck1 = min(loose[i], goal[j] - locked[j]);
|
|
beststuck2 = max(loose[i], goal[j] - locked[j]);
|
|
}
|
|
|
|
else if (k - maxdval == bestval) {
|
|
/* Tied. Is better balanced than current best? */
|
|
/* If tied, move among sets with most freedom. */
|
|
stuck1st = min(loose[i], goal[j] - locked[j]);
|
|
stuck2nd = max(loose[i], goal[j] - locked[j]);
|
|
delta = fabs(weightsum[i] - vweight - goal[i]) +
|
|
fabs(weightsum[j] + vweight - goal[j]);
|
|
|
|
/* NOTE: Randomization in this check isn't ideal */
|
|
/* if more than two guys are tied. */
|
|
if (delta < bestdelta ||
|
|
(delta == bestdelta &&
|
|
(stuck1st > beststuck1 ||
|
|
(stuck1st == beststuck1 &&
|
|
(stuck2nd > beststuck2 ||
|
|
(stuck2nd == beststuck2 && (KL_RANDOM && drandom() < .5))))))) {
|
|
bestval = k - maxdval;
|
|
bestvtx = vtx;
|
|
bestto = j;
|
|
bestdelta = delta;
|
|
beststuck1 = stuck1st;
|
|
beststuck2 = stuck2nd;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (bestval == -maxdval - 1) { /* No allowed moves */
|
|
if (DEBUG_KL > 0) {
|
|
printf("No KL moves at step %d. bestg = %g at step %d.\n", step, bestg, beststep);
|
|
}
|
|
break;
|
|
}
|
|
|
|
bestptr = &(listspace[0][bestvtx]);
|
|
bestfrom = sets[bestvtx];
|
|
|
|
vweight = graph[bestvtx]->vwgt;
|
|
weightsum[bestto] += vweight;
|
|
weightsum[bestfrom] -= vweight;
|
|
loose[bestfrom] -= vweight;
|
|
locked[bestto] += vweight;
|
|
|
|
if (enforce_balance) { /* Check if this partition is balanced. */
|
|
deltaminus = deltaplus = 0;
|
|
for (i = 0; i < nsets; i++) {
|
|
if (weightsum[i] - goal[i] > deltaplus) {
|
|
deltaplus = weightsum[i] - goal[i];
|
|
}
|
|
else if (goal[i] - weightsum[i] > deltaminus) {
|
|
deltaminus = goal[i] - weightsum[i];
|
|
}
|
|
}
|
|
balance_val = deltaminus + deltaplus;
|
|
temp_balanced = (balance_val <= max_dev);
|
|
ever_balanced = (ever_balanced || temp_balanced);
|
|
}
|
|
|
|
gtotal += bestval;
|
|
if (((gtotal > bestg && (!enforce_balance || temp_balanced)) ||
|
|
(enforce_balance_hard && balance_val < balance_best)) &&
|
|
step != nvtxs) {
|
|
bestg = gtotal;
|
|
beststep = step;
|
|
if (enforce_balance_hard) {
|
|
balance_best = balance_val;
|
|
}
|
|
if (temp_balanced) {
|
|
enforce_balance_hard = FALSE;
|
|
}
|
|
}
|
|
|
|
if (DEBUG_KL > 1) {
|
|
printf("At KL step %d, bestvtx=%d, bestval=%d (%d-> %d)\n", step, bestvtx, bestval,
|
|
bestfrom, bestto);
|
|
}
|
|
|
|
/* Monitor the stopping criteria. */
|
|
if (bestval < 0) {
|
|
if (!enforce_balance || ever_balanced) {
|
|
neg_steps++;
|
|
}
|
|
if (bestg != bestg_min) {
|
|
neg_cost = bestg - gtotal;
|
|
}
|
|
else {
|
|
neg_cost = -maxdval - 1;
|
|
}
|
|
if ((neg_steps > step_cutoff || neg_cost > cost_cutoff) &&
|
|
!(enforce_balance && bestg == bestg_min) && (beststep != step)) {
|
|
if (DEBUG_KL > 0) {
|
|
if (neg_steps > step_cutoff) {
|
|
printf("KL step cutoff at step %d. bestg = %g at step %d.\n", step, bestg, beststep);
|
|
}
|
|
else if (neg_cost > cost_cutoff) {
|
|
printf("KL cost cutoff at step %d. bestg = %g at step %d.\n", step, bestg, beststep);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
else if (bestval > 0) {
|
|
neg_steps = 0;
|
|
}
|
|
|
|
/* Remove vertex from its buckets, and flag it as finished. */
|
|
l = 0;
|
|
for (k = 0; k < nsets; k++) {
|
|
if (k != bestfrom) {
|
|
dval = dvals[bestvtx][l] + maxdval;
|
|
removebilist(&listspace[l][bestvtx], &buckets[bestfrom][k][dval]);
|
|
l++;
|
|
}
|
|
}
|
|
|
|
/* Is there a better way to do this? */
|
|
sets[bestvtx] = -sets[bestvtx] - 1;
|
|
|
|
/* Set up the linked list of moved vertices. */
|
|
bestptr->next = NULL;
|
|
bestptr->prev = (struct bilist *)(unsigned long long)bestto;
|
|
*endlist = bestptr;
|
|
endlist = &(bestptr->next);
|
|
|
|
/* Now update the d-values of all the neighbors */
|
|
edges = graph[bestvtx]->edges;
|
|
if (using_ewgts) {
|
|
ewptr = graph[bestvtx]->ewgts;
|
|
}
|
|
for (j = graph[bestvtx]->nedges - 1; j; j--) {
|
|
neighbor = *(++edges);
|
|
if (using_ewgts) {
|
|
eweight = *(++ewptr) * cut_cost + .5;
|
|
}
|
|
|
|
/* First make sure neighbor is alive. */
|
|
if (sets[neighbor] >= 0) {
|
|
group = sets[neighbor];
|
|
|
|
if (dvals[neighbor][0] >= 3 * maxdval) {
|
|
/* New vertex, not yet in buckets. */
|
|
/* Can't be neighbor of moved vtx, so compute */
|
|
/* initial dvals and buckets, then update. */
|
|
bucketsort1(graph, neighbor, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets,
|
|
hops, using_ewgts);
|
|
}
|
|
|
|
l = 0;
|
|
for (k = 0; k < nsets; k++) {
|
|
if (k != group) {
|
|
diff = eweight * (hops[k][bestfrom] - hops[group][bestfrom] + hops[group][bestto] -
|
|
hops[k][bestto]);
|
|
dval = dvals[neighbor][l] + maxdval;
|
|
movebilist(&listspace[l][neighbor], &buckets[group][k][dval],
|
|
&buckets[group][k][dval + diff]);
|
|
dvals[neighbor][l] += diff;
|
|
dval += diff;
|
|
if (dval > tops[group][k]) {
|
|
tops[group][k] = dval;
|
|
}
|
|
l++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (DEBUG_KL > 2) {
|
|
pbuckets(buckets, listspace, maxdval, nsets);
|
|
}
|
|
}
|
|
|
|
/* Done with a pass; should we actually perform any swaps? */
|
|
bptr = movelist;
|
|
if (bestg > 0 || (bestg != bestg_min && !balanced && enforce_balance) ||
|
|
(bestg != bestg_min && balance_trouble)) {
|
|
improved += bestg;
|
|
for (i = 1; i <= beststep; i++) {
|
|
vtx = ((int)(bptr - listspace[0])) / size;
|
|
bestto = (int)(unsigned long long)bptr->prev;
|
|
startweight[bestto] += graph[vtx]->vwgt;
|
|
startweight[-sets[vtx] - 1] -= graph[vtx]->vwgt;
|
|
sets[vtx] = bestto;
|
|
bptr = bptr->next;
|
|
}
|
|
|
|
deltaminus = deltaplus = 0;
|
|
for (i = 0; i < nsets; i++) {
|
|
if (startweight[i] - goal[i] > deltaplus) {
|
|
deltaplus = startweight[i] - goal[i];
|
|
}
|
|
else if (goal[i] - startweight[i] > deltaminus) {
|
|
deltaminus = goal[i] - startweight[i];
|
|
}
|
|
}
|
|
/*
|
|
printf(" deltaplus = %f, deltaminus = %f, max_dev = %d\n", deltaplus, deltaminus, max_dev);
|
|
*/
|
|
balanced = (deltaplus + deltaminus <= max_dev);
|
|
}
|
|
else {
|
|
nbad++;
|
|
}
|
|
|
|
if (!balanced || bptr == movelist) {
|
|
if (enforce_balance) {
|
|
if (enforce_balance_hard) {
|
|
balance_trouble = TRUE;
|
|
}
|
|
enforce_balance_hard = TRUE;
|
|
}
|
|
enforce_balance = TRUE;
|
|
nbad++;
|
|
}
|
|
|
|
worth_undoing = (step < undo_frac * nvtxs);
|
|
done = (nbad >= nbadtries && balanced);
|
|
if (KL_MAX_PASS > 0) {
|
|
done = done || (npass == KL_MAX_PASS && balanced);
|
|
}
|
|
if (!done) { /* Prepare for next pass. */
|
|
if (KL_UNDO_LIST && worth_undoing && !balance_trouble) {
|
|
/* Make a list of modified vertices for next bucketsort. */
|
|
/* Also, ensure these vertices are removed from their buckets. */
|
|
list_length =
|
|
make_kl_list(graph, movelist, buckets, listspace, sets, nsets, bspace, dvals, maxdval);
|
|
}
|
|
}
|
|
if (done || !(KL_UNDO_LIST && worth_undoing && !balance_trouble)) {
|
|
/* Restore set numbers of remaining, altered vertices. */
|
|
while (bptr != NULL) {
|
|
vtx = ((int)(bptr - listspace[0])) / size;
|
|
sets[vtx] = -sets[vtx] - 1;
|
|
bptr = bptr->next;
|
|
}
|
|
list_length = nvtxs;
|
|
}
|
|
|
|
if (done && *bndy_list != NULL) {
|
|
make_bndy_list(graph, movelist, buckets, listspace, sets, nsets, bspace, tops, bndy_list);
|
|
}
|
|
}
|
|
|
|
if (DEBUG_KL > 0) {
|
|
printf(" KL required %d passes to improve by %d.\n", npass, improved);
|
|
}
|
|
|
|
sfree(loose);
|
|
sfree(locked);
|
|
sfree(weightsum);
|
|
sfree(bspace);
|
|
return (0);
|
|
}
|
|
|