You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
401 lines
15 KiB
401 lines
15 KiB
* Copyright(C) 1999-2021, 2023 National Technology & Engineering Solutions
* of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
* NTESS, the U.S. Government retains certain rights in this software.
* See packages/seacas/LICENSE for details
#include "defs.h"
#include "params.h"
#include "smalloc.h"
#include "structs.h"
#include <stdio.h>
void balance(struct vtx_data **graph, /* data structure for graph */
int nvtxs, /* number of vertices in full graph */
int nedges, /* number of edges in graph */
int using_vwgts, /* are vertex weights being used? */
int using_ewgts, /* are edge weights being used? */
double *vwsqrt, /* sqrt of vertex weights (length nvtxs+1) */
int igeom, /* geometric dimension for inertial method */
float **coords, /* coordinates for inertial method */
int *assignment, /* set number of each vtx (length n) */
double *goal, /* desired set sizes */
int architecture, /* 0=> hypercube, d=> d-dimensional mesh */
int ndims_tot, /* number of cuts to make in total */
int *mesh_dims, /* shape of mesh */
int global_method, /* global partitioning algorithm */
int local_method, /* local partitioning algorithm */
int rqi_flag, /* should I use multilevel eigensolver? */
int vmax, /* if so, how many vertices to coarsen down to? */
int ndims, /* number of eigenvectors (2^d sets) */
double eigtol, /* tolerance on eigenvectors */
int (*hops)[MAXSETS] /* between-set hop cost for KL */
extern int TERM_PROP; /* invoking terminal propagation? */
extern int DEBUG_TRACE; /* trace the execution of the code */
extern int MATCH_TYPE; /* type of matching to use when coarsening */
struct vtx_data **subgraph = NULL; /* data structure for subgraph */
struct set_info *set_info = NULL; /* information about each processor subset */
struct set_info **set_buckets = NULL; /* buckets for sorting processor sets */
struct set_info *set = NULL; /* current processor set information */
int hops_special[MAXSETS][MAXSETS]; /* hop mtx for nonstandard cases */
float *term_wgts; /* net pull of terminal propagation */
float *save_term_wgts; /* saved location of term_wgts */
float *all_term_wgts[MAXSETS]; /* net pull on all sets */
int *loc2glob; /* mapping from subgraph to graph numbering */
int *glob2loc; /* mapping from graph to subgraph numbering */
int *setlists; /* space for linked lists of vertices in sets */
int *list_ptrs; /* headers of each linked list */
int *degree; /* degrees of graph vertices from a subgraph */
int subsets[MAXSETS]; /* subsets being created at current step */
int setsize[MAXSETS]; /* sizes of sets created by division */
double merged_goal[MAXSETS]; /* sizes of sets at this partition level */
double sub_vwgt_sum; /* sum of subgraph vertex weights */
int cut_dirs[MAXDIMS]; /* directions of processor cuts if mesh */
int hops_flag; /* use normal or special hops? */
int ndims_real; /* actual dimension of partitioning */
int nsets_real; /* actual # subsets to create */
int maxsize; /* size of largest subgraph */
int nsets_tot; /* total sets to divide subgraph into */
int subnvtxs; /* number of vertices in subgraph */
int subnedges; /* number of edgess in subgraph */
double *subvwsqrt = NULL; /* vwsqrt array for subgraph */
int *subassign = NULL; /* set assignments for subgraph */
float **subcoords = NULL; /* coordinates for subgraph */
int striping; /* partition with parallel cuts? */
int pass; /* counts passes through loop */
int max_proc_size; /* size of largest processor set */
int old_max_proc_size; /* previous size of largest processor set */
int new_dir; /* new cut direction? => no terminal prop */
int nsets; /* typical number of sets at each cut */
int done_dir[3]; /* mesh directions already cut */
int i; /* loop counter */
if (DEBUG_TRACE > 0) {
printf("<Entering balance>\n");
if (global_method != 7) { /* Not read from file. */
for (i = 1; i <= nvtxs; i++) {
assignment[i] = 0;
if (nvtxs <= 1) {
/* Compute some simple parameters. */
save_term_wgts = NULL;
maxsize = 0;
nsets = 1 << ndims;
subsets[2] = 2; /* Needed for vertex separators */
nsets_tot = 0;
if (architecture > 0) {
nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2];
else if (architecture == 0) {
nsets_tot = 1 << ndims_tot;
/* Construct data structures for keeping track of processor sets. */
set_buckets = smalloc((nsets_tot + 1) * sizeof(struct set_info *));
set_info = smalloc(nsets_tot * sizeof(struct set_info));
for (i = 0; i < nsets_tot; i++) {
set_info[i].setnum = i;
set_info[i].ndims = -1;
set_info[i].span[0] = -1;
set_info[i].next = NULL;
set_buckets[i + 1] = NULL;
set_buckets[nsets_tot] = &(set_info[0]);
if (architecture > 0) {
set_info[0].low[0] = set_info[0].low[1] = set_info[0].low[2] = 0;
set_info[0].span[0] = mesh_dims[0];
set_info[0].span[1] = mesh_dims[1];
set_info[0].span[2] = mesh_dims[2];
else if (architecture == 0) {
set_info[0].ndims = ndims_tot;
done_dir[0] = done_dir[1] = done_dir[2] = FALSE;
pass = 0;
loc2glob = NULL;
degree = NULL;
glob2loc = NULL;
setlists = NULL;
list_ptrs = NULL;
old_max_proc_size = 2 * nsets_tot;
max_proc_size = nsets_tot;
while (max_proc_size > 1) { /* Some set still needs to be divided. */
set = set_buckets[max_proc_size];
set_buckets[max_proc_size] = set->next;
/* Divide the processors. */
hops_flag =
divide_procs(architecture, ndims, ndims_tot, set_info, set, subsets, (global_method == 3),
&ndims_real, &nsets_real, &striping, cut_dirs, mesh_dims, hops_special);
/* If new cut direction, turn off terminal propagation. */
new_dir = FALSE;
if (architecture == 0) {
if (old_max_proc_size != max_proc_size) {
new_dir = TRUE;
else if (architecture > 0) {
if (!done_dir[cut_dirs[0]]) {
new_dir = TRUE;
done_dir[cut_dirs[0]] = TRUE;
old_max_proc_size = max_proc_size;
/* Now place the new sets into the set_info data structure. */
/* This is indexed by number of processors in set. */
for (i = 0; i < nsets_real; i++) {
int j = 0;
if (architecture > 0) {
j = set_info[subsets[i]].span[0] * set_info[subsets[i]].span[1] *
else if (architecture == 0) {
j = 1 << set_info[subsets[i]].ndims;
set_info[subsets[i]].next = set_buckets[j];
set_buckets[j] = &(set_info[subsets[i]]);
/* Construct desired set sizes for this division step. */
if (pass == 1) { /* First partition. */
subgraph = graph;
subnvtxs = nvtxs;
subnedges = nedges;
subvwsqrt = vwsqrt;
subcoords = coords;
subassign = assignment;
all_term_wgts[1] = NULL;
if (!using_vwgts) {
sub_vwgt_sum = subnvtxs;
else {
sub_vwgt_sum = 0;
for (i = 1; i <= subnvtxs; i++) {
sub_vwgt_sum += subgraph[i]->vwgt;
merge_goals(goal, merged_goal, set_info, subsets, nsets_real, ndims_tot, architecture,
mesh_dims, sub_vwgt_sum);
else { /* Not the first cut. */
/* After first cut, allocate all space we'll need. */
if (pass == 2) {
glob2loc = smalloc((nvtxs + 1) * sizeof(int));
loc2glob = smalloc((maxsize + 1) * sizeof(int));
if (!using_vwgts) {
subvwsqrt = NULL;
else {
subvwsqrt = smalloc((maxsize + 1) * sizeof(double));
if (graph != NULL) {
subgraph = smalloc((maxsize + 1) * sizeof(struct vtx_data *));
degree = smalloc((maxsize + 1) * sizeof(int));
else {
subgraph = NULL;
subassign = smalloc((maxsize + 1) * sizeof(int));
if (global_method == 3 ||
(MATCH_TYPE == 5 && (global_method == 1 || (global_method == 2 && rqi_flag)))) {
subcoords = smalloc(3 * sizeof(float *));
subcoords[1] = subcoords[2] = NULL;
subcoords[0] = smalloc((maxsize + 1) * sizeof(float));
if (igeom > 1) {
subcoords[1] = smalloc((maxsize + 1) * sizeof(float));
if (igeom > 2) {
subcoords[2] = smalloc((maxsize + 1) * sizeof(float));
else {
subcoords = NULL;
if (TERM_PROP && graph != NULL) {
term_wgts = smalloc((nsets - 1) * (maxsize + 1) * sizeof(float));
save_term_wgts = term_wgts;
for (i = 1; i < nsets; i++) {
all_term_wgts[i] = term_wgts;
term_wgts += maxsize + 1;
/* Construct mappings between local and global vertex numbering */
subnvtxs = make_maps(setlists, list_ptrs, set->setnum, glob2loc, loc2glob);
if (TERM_PROP && !new_dir && graph != NULL) {
all_term_wgts[1] = save_term_wgts;
make_term_props(graph, subnvtxs, loc2glob, assignment, architecture, ndims_tot, ndims_real,
set_info, set->setnum, nsets_real, nsets_tot, subsets, all_term_wgts,
else {
all_term_wgts[1] = NULL;
/* Form the subgraph in our graph format. */
if (graph != NULL) {
make_subgraph(graph, subgraph, subnvtxs, &subnedges, assignment, set->setnum, glob2loc,
loc2glob, degree, using_ewgts);
else {
subnedges = 0; /* Otherwise some output is garbage */
if (!using_vwgts) {
sub_vwgt_sum = subnvtxs;
else {
sub_vwgt_sum = 0;
for (i = 1; i <= subnvtxs; i++) {
sub_vwgt_sum += subgraph[i]->vwgt;
merge_goals(goal, merged_goal, set_info, subsets, nsets_real, ndims_tot, architecture,
mesh_dims, sub_vwgt_sum);
/* Condense the relevant vertex weight array. */
if (using_vwgts && vwsqrt != NULL) {
make_subvector(vwsqrt, subvwsqrt, subnvtxs, loc2glob);
if (global_method == 3 ||
(MATCH_TYPE == 5 && (global_method == 1 || (global_method == 2 && rqi_flag)))) {
make_subgeom(igeom, coords, subcoords, subnvtxs, loc2glob);
if (DEBUG_TRACE > 1) {
printf("About to call divide with nvtxs = %d, nedges = %d, ", subnvtxs, subnedges);
if (!architecture) {
printf("ndims = %d\n", set->ndims);
else if (architecture == 1) {
printf("mesh = %d\n", set->span[0]);
else if (architecture == 2) {
printf("mesh = %dx%d\n", set->span[0], set->span[1]);
else if (architecture == 3) {
printf("mesh = %dx%dx%d\n", set->span[0], set->span[1], set->span[2]);
/* Perform a single division step. */
chaco_divide(subgraph, subnvtxs, subnedges, using_vwgts, using_ewgts, subvwsqrt, igeom,
subcoords, subassign, merged_goal, architecture, all_term_wgts, global_method,
local_method, rqi_flag, vmax, ndims_real, eigtol,
(hops_flag ? hops_special : hops), nsets_real, striping);
/* Undo the subgraph construction. */
if (pass != 1 && graph != NULL) {
remake_graph(subgraph, subnvtxs, loc2glob, degree, using_ewgts);
/* Prepare for next division */
while (max_proc_size > 1 && set_buckets[max_proc_size] == NULL) {
/* Merge the subgraph partitioning with the graph partitioning. */
if (pass == 1) {
subgraph = NULL;
subvwsqrt = NULL;
subcoords = NULL;
subassign = NULL;
/* Find size of largest subgraph for recursing. */
if (max_proc_size > 1) {
for (i = 0; i < nsets; i++) {
setsize[i] = 0;
for (i = 1; i <= nvtxs; i++) {
maxsize = 0;
for (i = 0; i < nsets; i++) {
if (setsize[i] > maxsize) {
maxsize = setsize[i];
for (i = 1; i <= nvtxs; i++) {
assignment[i] = subsets[assignment[i]];
/* Construct list of vertices in sets for make_maps. */
if (max_proc_size > 1) {
setlists = smalloc((nvtxs + 1) * sizeof(int));
list_ptrs = smalloc(nsets_tot * sizeof(int));
make_setlists(setlists, list_ptrs, nsets_real, subsets, assignment, loc2glob, nvtxs, TRUE);
else {
/* Construct list of vertices in sets for make_maps. */
if (max_proc_size > 1) {
make_setlists(setlists, list_ptrs, nsets_real, subsets, subassign, loc2glob, subnvtxs,
merge_assignments(assignment, subassign, subsets, subnvtxs, loc2glob);
/* Free everything allocated for subgraphs. */
if (graph != NULL) {
if (subgraph != NULL) {
if (subvwsqrt != NULL) {
if (subcoords != NULL) {
if (subcoords[0] != NULL) {
if (subcoords[1] != NULL) {
if (subcoords[2] != NULL) {