/* * Copyright(C) 1999-2020, 2022, 2023 National Technology & Engineering Solutions * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with * NTESS, the U.S. Government retains certain rights in this software. * * See packages/seacas/LICENSE for details */ #include "defs.h" #include "params.h" #include "smalloc.h" #include "structs.h" #include #include /* Idea: 'buckets[i][j]' is a set of buckets to sort moves from i to j. listspace[i] is space for lists in buckets[i][j]. Loop through all nonequal pairs [i][j], taking the first element in each list. Compare them all to find the largest allowed move. Make that move, and save it in movelist. */ int KL_MAX_PASS = -1; /* max KL passes; infinite if <= 0 */ int nway_kl(struct vtx_data **graph, /* data structure for graph */ int nvtxs, /* number of vtxs in graph */ struct bilist ****buckets, /* array of lists for bucket sort */ struct bilist **listspace, /* list data structure for each vertex */ int **tops, /* 2-D array of top of each set of buckets */ int **dvals, /* d-values for each transition */ int *sets, /* processor each vertex is assigned to */ int maxdval, /* maximum d-value for a vertex */ int nsets, /* number of sets divided into */ double *goal, /* desired set sizes */ float *term_wgts[], /* weights for terminal propagation */ int (*hops)[MAXSETS], /* cost of set transitions */ int max_dev, /* largest allowed deviation from balance */ int using_ewgts, /* are edge weights being used? */ int **bndy_list, /* list of vertices on boundary (0 ends) */ double *startweight /* sum of vweights in each set (in and out) */ ) /* Suaris and Kedem algorithm for quadrisection, generalized to an */ /* arbitrary number of sets, with intra-set cost function specified by hops. */ /* Note: this is for a single divide step. */ /* Also, sets contains an initial (possibly crummy) partitioning. */ { extern double kl_bucket_time; /* time spent in KL bucketsort */ extern int KL_BAD_MOVES; /* # bad moves in a row to stop KL */ extern int DEBUG_KL; /* debug flag for KL */ extern int KL_RANDOM; /* use randomness in KL? */ extern int KL_NTRIES_BAD; /* number of unhelpful passes before quitting */ extern int KL_UNDO_LIST; /* should I back out of changes or start over? */ extern int KL_MAX_PASS; /* maximum number of outer KL loops */ extern double CUT_TO_HOP_COST; /* if term_prop; cut/hop importance */ struct bilist *movelist; /* list of vtxs to be moved */ struct bilist **endlist; /* end of movelists */ struct bilist *bestptr; /* best vertex in linked list */ struct bilist *bptr; /* loops through bucket list */ float *ewptr = NULL; /* loops through edge weights */ double *locked = NULL; /* weight of vertices locked in a set */ double *loose = NULL; /* weight of vtxs that can move from a set */ int *bspace = NULL; /* list of active vertices for bucketsort */ double *weightsum = NULL; /* sum of vweights for each partition */ int *edges = NULL; /* edge list for a vertex */ int *bdy_ptr = NULL; /* loops through bndy_list */ double time; /* timing parameter */ double delta; /* desire of sets to change size */ double bestdelta = -1; /* strongest delta value */ double deltaplus; /* largest negative deviation from goal size */ double deltaminus; /* largest negative deviation from goal size */ int list_length; /* how long is list of vertices to bucketsort? */ int balanced; /* is partition balanced? */ int temp_balanced; /* is intermediate partition balanced? */ int ever_balanced; /* has any partition been balanced? */ int bestvtx = -1; /* best vertex to move */ int bestval = -1; /* best change in value for a vtx move */ int bestfrom = -1, bestto = -1; /* sets best vertex moves between */ int vweight; /* weight of best vertex */ int gtotal; /* sum of changes from moving */ int improved; /* total improvement from KL */ double balance_val = 0.0; /* how imbalanced is it */ double balance_best; /* best balance yet if trying hard */ double bestg; /* maximum gtotal found in KL loop */ double bestg_min; /* smaller than any possible bestg */ int beststep; /* step where maximum value occurred */ int neighbor; /* neighbor of a vertex */ int step_cutoff; /* number of negative steps in a row allowed */ int cost_cutoff; /* amount of negative d-values allowed */ int neg_steps; /* number of negative steps in a row */ int neg_cost; /* decrease in sum of d-values */ int vtx; /* vertex number */ int dval; /* dval of a vertex */ int group; /* set that a vertex is assigned to */ double cut_cost; /* if term_prop; relative cut/hop importance */ int diff; /* change in a d-value */ int stuck1st, stuck2nd; /* how soon will moves be disallowed? */ int beststuck1 = -1, beststuck2 = -1; /* best stuck values for tie-breaking */ int eweight; /* a particular edge weight */ int worth_undoing; /* is it worth undoing list? */ float undo_frac; /* fraction of vtxs indicating worth of undoing */ int step; /* loops through movements of vertices */ int parity; /* sort forwards or backwards? */ int done; /* has termination criteria been achieved? */ int nbad; /* number of unhelpful passes in a row */ int npass; /* total number of passes */ int nbadtries; /* number of unhelpful passes before quitting */ int enforce_balance; /* force a balanced partition? */ int enforce_balance_hard; /* really force a balanced partition? */ int balance_trouble; /* even balance_hard isn't working */ int size; /* array spacing */ int i, j, k, l; /* loop counters */ nbadtries = KL_NTRIES_BAD; enforce_balance = FALSE; temp_balanced = FALSE; enforce_balance_hard = FALSE; balance_trouble = FALSE; size = (int)(&(listspace[0][1]) - &(listspace[0][0])); undo_frac = .3; cut_cost = 1; if (term_wgts[1] != NULL) { if (CUT_TO_HOP_COST > 1) { cut_cost = CUT_TO_HOP_COST; } } bspace = smalloc_ret(nvtxs * sizeof(int)); weightsum = smalloc_ret(nsets * sizeof(double)); locked = smalloc_ret(nsets * sizeof(double)); loose = smalloc_ret(nsets * sizeof(double)); if (bspace == NULL || weightsum == NULL || locked == NULL || loose == NULL) { sfree(loose); sfree(locked); sfree(weightsum); sfree(bspace); return (1); } if (*bndy_list != NULL) { bdy_ptr = *bndy_list; list_length = 0; while (*bdy_ptr != 0) { bspace[list_length++] = *bdy_ptr++; } sfree(*bndy_list); if (list_length == 0) { /* No boundary -> make everybody bndy. */ for (i = 0; i < nvtxs; i++) { bspace[i] = i + 1; } list_length = nvtxs; } /* Set dvals to flag uninitialized vertices. */ for (i = 1; i <= nvtxs; i++) { dvals[i][0] = 3 * maxdval; } } else { list_length = nvtxs; } step_cutoff = KL_BAD_MOVES; cost_cutoff = maxdval * step_cutoff / 7; if (cost_cutoff < step_cutoff) { cost_cutoff = step_cutoff; } deltaminus = deltaplus = 0; for (i = 0; i < nsets; i++) { if (startweight[i] - goal[i] > deltaplus) { deltaplus = startweight[i] - goal[i]; } else if (goal[i] - startweight[i] > deltaminus) { deltaminus = goal[i] - startweight[i]; } } balanced = (deltaplus + deltaminus <= max_dev); bestg_min = -2.0 * nvtxs * maxdval; parity = FALSE; eweight = cut_cost + .5; nbad = 0; npass = 0; improved = 0; done = FALSE; while (!done) { npass++; ever_balanced = FALSE; /* Initialize various quantities. */ balance_best = 0; for (i = 0; i < nsets; i++) { for (j = 0; j < nsets; j++) { tops[i][j] = 2 * maxdval; } weightsum[i] = startweight[i]; loose[i] = weightsum[i]; locked[i] = 0; balance_best += goal[i]; } gtotal = 0; bestg = bestg_min; beststep = -1; movelist = NULL; endlist = &movelist; neg_steps = 0; /* Compute the initial d-values, and bucket-sort them. */ time = seconds(); if (nsets == 2) { bucketsorts_bi(graph, nvtxs, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets, parity, hops, bspace, list_length, npass, using_ewgts); } else { bucketsorts(graph, nvtxs, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets, parity, hops, bspace, list_length, npass, using_ewgts); } parity = !parity; kl_bucket_time += seconds() - time; if (DEBUG_KL > 2) { pbuckets(buckets, listspace, maxdval, nsets); } /* Now determine the set of K-L moves. */ for (step = 1;; step++) { /* Find the highest d-value in each set. */ /* But only consider moves from large to small sets, or moves */ /* in which balance is preserved. */ /* Break ties in some nonarbitrary manner. */ bestval = -maxdval - 1; for (i = 0; i < nsets; i++) { for (j = 0; j < nsets; j++) { /* Only allow moves from large sets to small sets, or */ /* moves which preserve balance. */ if (i != j) { /* Find the best move from i to j. */ for (k = tops[i][j]; k >= 0 && buckets[i][j][k] == NULL; k--) { ; } tops[i][j] = k; if (k >= 0) { l = (j > i) ? j - 1 : j; vtx = ((int)(buckets[i][j][k] - listspace[l])) / size; vweight = graph[vtx]->vwgt; if ((enforce_balance_hard && weightsum[i] >= goal[i] && weightsum[j] <= goal[j] && weightsum[i] - goal[i] - (weightsum[j] - goal[j]) > max_dev) || (!enforce_balance_hard && weightsum[i] >= goal[i] && weightsum[j] <= goal[j]) || (!enforce_balance_hard && weightsum[i] - vweight - goal[i] > -(double)((max_dev + 1) / 2) && weightsum[j] + vweight - goal[j] < (double)((max_dev + 1) / 2))) { /* Is it the best move seen so far? */ if (k - maxdval > bestval) { bestval = k - maxdval; bestvtx = vtx; bestto = j; /* DO I NEED ALL THIS DATA? Just to break ties. */ bestdelta = fabs(weightsum[i] - vweight - goal[i]) + fabs(weightsum[j] + vweight - goal[j]); beststuck1 = min(loose[i], goal[j] - locked[j]); beststuck2 = max(loose[i], goal[j] - locked[j]); } else if (k - maxdval == bestval) { /* Tied. Is better balanced than current best? */ /* If tied, move among sets with most freedom. */ stuck1st = min(loose[i], goal[j] - locked[j]); stuck2nd = max(loose[i], goal[j] - locked[j]); delta = fabs(weightsum[i] - vweight - goal[i]) + fabs(weightsum[j] + vweight - goal[j]); /* NOTE: Randomization in this check isn't ideal */ /* if more than two guys are tied. */ if (delta < bestdelta || (delta == bestdelta && (stuck1st > beststuck1 || (stuck1st == beststuck1 && (stuck2nd > beststuck2 || (stuck2nd == beststuck2 && (KL_RANDOM && drandom() < .5))))))) { bestval = k - maxdval; bestvtx = vtx; bestto = j; bestdelta = delta; beststuck1 = stuck1st; beststuck2 = stuck2nd; } } } } } } } if (bestval == -maxdval - 1) { /* No allowed moves */ if (DEBUG_KL > 0) { printf("No KL moves at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } break; } bestptr = &(listspace[0][bestvtx]); bestfrom = sets[bestvtx]; vweight = graph[bestvtx]->vwgt; weightsum[bestto] += vweight; weightsum[bestfrom] -= vweight; loose[bestfrom] -= vweight; locked[bestto] += vweight; if (enforce_balance) { /* Check if this partition is balanced. */ deltaminus = deltaplus = 0; for (i = 0; i < nsets; i++) { if (weightsum[i] - goal[i] > deltaplus) { deltaplus = weightsum[i] - goal[i]; } else if (goal[i] - weightsum[i] > deltaminus) { deltaminus = goal[i] - weightsum[i]; } } balance_val = deltaminus + deltaplus; temp_balanced = (balance_val <= max_dev); ever_balanced = (ever_balanced || temp_balanced); } gtotal += bestval; if (((gtotal > bestg && (!enforce_balance || temp_balanced)) || (enforce_balance_hard && balance_val < balance_best)) && step != nvtxs) { bestg = gtotal; beststep = step; if (enforce_balance_hard) { balance_best = balance_val; } if (temp_balanced) { enforce_balance_hard = FALSE; } } if (DEBUG_KL > 1) { printf("At KL step %d, bestvtx=%d, bestval=%d (%d-> %d)\n", step, bestvtx, bestval, bestfrom, bestto); } /* Monitor the stopping criteria. */ if (bestval < 0) { if (!enforce_balance || ever_balanced) { neg_steps++; } if (bestg != bestg_min) { neg_cost = bestg - gtotal; } else { neg_cost = -maxdval - 1; } if ((neg_steps > step_cutoff || neg_cost > cost_cutoff) && !(enforce_balance && bestg == bestg_min) && (beststep != step)) { if (DEBUG_KL > 0) { if (neg_steps > step_cutoff) { printf("KL step cutoff at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } else if (neg_cost > cost_cutoff) { printf("KL cost cutoff at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } } break; } } else if (bestval > 0) { neg_steps = 0; } /* Remove vertex from its buckets, and flag it as finished. */ l = 0; for (k = 0; k < nsets; k++) { if (k != bestfrom) { dval = dvals[bestvtx][l] + maxdval; removebilist(&listspace[l][bestvtx], &buckets[bestfrom][k][dval]); l++; } } /* Is there a better way to do this? */ sets[bestvtx] = -sets[bestvtx] - 1; /* Set up the linked list of moved vertices. */ bestptr->next = NULL; bestptr->prev = (struct bilist *)(unsigned long long)bestto; *endlist = bestptr; endlist = &(bestptr->next); /* Now update the d-values of all the neighbors */ edges = graph[bestvtx]->edges; if (using_ewgts) { ewptr = graph[bestvtx]->ewgts; } for (j = graph[bestvtx]->nedges - 1; j; j--) { neighbor = *(++edges); if (using_ewgts) { eweight = *(++ewptr) * cut_cost + .5; } /* First make sure neighbor is alive. */ if (sets[neighbor] >= 0) { group = sets[neighbor]; if (dvals[neighbor][0] >= 3 * maxdval) { /* New vertex, not yet in buckets. */ /* Can't be neighbor of moved vtx, so compute */ /* initial dvals and buckets, then update. */ bucketsort1(graph, neighbor, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets, hops, using_ewgts); } l = 0; for (k = 0; k < nsets; k++) { if (k != group) { diff = eweight * (hops[k][bestfrom] - hops[group][bestfrom] + hops[group][bestto] - hops[k][bestto]); dval = dvals[neighbor][l] + maxdval; movebilist(&listspace[l][neighbor], &buckets[group][k][dval], &buckets[group][k][dval + diff]); dvals[neighbor][l] += diff; dval += diff; if (dval > tops[group][k]) { tops[group][k] = dval; } l++; } } } } if (DEBUG_KL > 2) { pbuckets(buckets, listspace, maxdval, nsets); } } /* Done with a pass; should we actually perform any swaps? */ bptr = movelist; if (bestg > 0 || (bestg != bestg_min && !balanced && enforce_balance) || (bestg != bestg_min && balance_trouble)) { improved += bestg; for (i = 1; i <= beststep; i++) { vtx = ((int)(bptr - listspace[0])) / size; bestto = (int)(unsigned long long)bptr->prev; startweight[bestto] += graph[vtx]->vwgt; startweight[-sets[vtx] - 1] -= graph[vtx]->vwgt; sets[vtx] = bestto; bptr = bptr->next; } deltaminus = deltaplus = 0; for (i = 0; i < nsets; i++) { if (startweight[i] - goal[i] > deltaplus) { deltaplus = startweight[i] - goal[i]; } else if (goal[i] - startweight[i] > deltaminus) { deltaminus = goal[i] - startweight[i]; } } /* printf(" deltaplus = %f, deltaminus = %f, max_dev = %d\n", deltaplus, deltaminus, max_dev); */ balanced = (deltaplus + deltaminus <= max_dev); } else { nbad++; } if (!balanced || bptr == movelist) { if (enforce_balance) { if (enforce_balance_hard) { balance_trouble = TRUE; } enforce_balance_hard = TRUE; } enforce_balance = TRUE; nbad++; } worth_undoing = (step < undo_frac * nvtxs); done = (nbad >= nbadtries && balanced); if (KL_MAX_PASS > 0) { done = done || (npass == KL_MAX_PASS && balanced); } if (!done) { /* Prepare for next pass. */ if (KL_UNDO_LIST && worth_undoing && !balance_trouble) { /* Make a list of modified vertices for next bucketsort. */ /* Also, ensure these vertices are removed from their buckets. */ list_length = make_kl_list(graph, movelist, buckets, listspace, sets, nsets, bspace, dvals, maxdval); } } if (done || !(KL_UNDO_LIST && worth_undoing && !balance_trouble)) { /* Restore set numbers of remaining, altered vertices. */ while (bptr != NULL) { vtx = ((int)(bptr - listspace[0])) / size; sets[vtx] = -sets[vtx] - 1; bptr = bptr->next; } list_length = nvtxs; } if (done && *bndy_list != NULL) { make_bndy_list(graph, movelist, buckets, listspace, sets, nsets, bspace, tops, bndy_list); } } if (DEBUG_KL > 0) { printf(" KL required %d passes to improve by %d.\n", npass, improved); } sfree(loose); sfree(locked); sfree(weightsum); sfree(bspace); return (0); }