You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2851 lines
101 KiB
2851 lines
101 KiB
2 years ago
|
/*
|
||
|
* Copyright(C) 1999-2021, 2023 National Technology & Engineering Solutions
|
||
|
* of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||
|
* NTESS, the U.S. Government retains certain rights in this software.
|
||
|
*
|
||
|
* See packages/seacas/LICENSE for details
|
||
|
*/
|
||
|
|
||
|
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||
|
* Functions contained in this file:
|
||
|
* generate_loadbal()
|
||
|
* generate_maps()
|
||
|
* nodal_dist()
|
||
|
* elemental_dist()
|
||
|
* ilog2i()
|
||
|
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
|
||
|
#include <cassert> // for assert
|
||
|
#include <cfloat> // for FLT_MAX
|
||
|
#include <climits> // for INT_MAX
|
||
|
#include <cmath> /* Needed for ZPINCH_assign */
|
||
|
#include <copy_string_cpp.h>
|
||
|
#include <cstdlib> // for malloc, free, realloc, exit, etc
|
||
|
#include <cstring> // for strcat
|
||
|
#include <fmt/ostream.h>
|
||
|
|
||
|
#include "chaco.h" // for input_assign, interface
|
||
|
#include "elb.h" // for LB_Description<INT>, etc
|
||
|
#include "elb_elem.h" // for E_Type, get_elem_info, etc
|
||
|
#include "elb_err.h" // for Gen_Error
|
||
|
#include "elb_graph.h" // for generate_graph
|
||
|
#include "elb_groups.h" // for get_group_info
|
||
|
#include "elb_loadbal.h"
|
||
|
#include "elb_util.h" // for find_inter, etc
|
||
|
#include "fix_column_partitions.h"
|
||
|
|
||
|
#ifndef M_PI
|
||
|
#define M_PI 3.14159265358979323846264338327
|
||
|
#define M_PI_2 (M_PI / 2.0)
|
||
|
#endif
|
||
|
|
||
|
#ifdef USE_ZOLTAN
|
||
|
#include "zoltan.h" // for Zoltan_Set_Param, etc
|
||
|
#include "zoltan_types.h" // for ZOLTAN_ID_PTR*, ZOLTAN_OK, etc
|
||
|
#include <mpi.h> // for MPI_Finalize, etc
|
||
|
#endif
|
||
|
|
||
|
int ilog2i(size_t n)
|
||
|
{
|
||
|
size_t i = 0;
|
||
|
size_t n1 = n;
|
||
|
while ((n1 >>= 1) != 0U) {
|
||
|
++i;
|
||
|
}
|
||
|
|
||
|
if (static_cast<size_t>(1) << i != n) {
|
||
|
return (-1);
|
||
|
}
|
||
|
return (i);
|
||
|
}
|
||
|
|
||
|
namespace {
|
||
|
template <typename INT>
|
||
|
int nodal_dist(LB_Description<INT> * /*lb*/, Machine_Description * /*machine*/,
|
||
|
Mesh_Description<INT> * /*mesh*/, Graph_Description<INT> * /*graph*/);
|
||
|
|
||
|
template <typename INT>
|
||
|
int elemental_dist(LB_Description<INT> * /*lb*/, Machine_Description * /*machine*/,
|
||
|
Mesh_Description<INT> * /*mesh*/, Graph_Description<INT> * /*graph*/,
|
||
|
Problem_Description * /*problem*/);
|
||
|
|
||
|
/* ZPINCH partitioning interface */
|
||
|
int ZPINCH_assign(Machine_Description * /*machine*/, int /*ndot*/, const float * /*x*/,
|
||
|
const float * /*y*/, const float * /*z*/, int * /*part*/);
|
||
|
|
||
|
/* BRICK partitioning interface */
|
||
|
int BRICK_assign(Machine_Description * /*machine*/, int /*ndot*/, const float * /*x*/,
|
||
|
const float * /*y*/, const float * /*z*/, int * /*part*/);
|
||
|
|
||
|
#ifdef USE_ZOLTAN
|
||
|
/* ZOLTAN_RCB partitioning interface */
|
||
|
int ZOLTAN_assign(const char * /*method*/, int /*totalproc*/, size_t /*ndot*/, int * /*vwgt*/,
|
||
|
float * /*x*/, float * /*y*/, float * /*z*/, int /*ignore_z*/, int * /*part*/,
|
||
|
int /*argc*/, char ** /*argv*/);
|
||
|
#endif
|
||
|
void BALANCE_STATS(Machine_Description * /*machine*/, const int * /*wgt*/, size_t /*ndot*/,
|
||
|
int * /*part*/);
|
||
|
|
||
|
template <typename INT>
|
||
|
int identify_mechanisms(Machine_Description *machine, Problem_Description *problem,
|
||
|
Mesh_Description<INT> *mesh, LB_Description<INT> *lb,
|
||
|
Graph_Description<INT> *graph, int check_type);
|
||
|
|
||
|
int extract_connected_lists(int nrow, const int *columns, const int *rows, int *list,
|
||
|
std::vector<int> &list_ptr);
|
||
|
} // namespace
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/* Function generate_loadbal() begins
|
||
|
*----------------------------------------------------------------------------
|
||
|
* This function calls Chaco to load balance the given problem.
|
||
|
*****************************************************************************/
|
||
|
template int generate_loadbal(Machine_Description *machine, Problem_Description *problem,
|
||
|
Mesh_Description<int> *mesh, LB_Description<int> *lb,
|
||
|
Solver_Description *solve, Graph_Description<int> *graph,
|
||
|
Weight_Description<int> *weight, Sphere_Info *sphere, int argc,
|
||
|
char *argv[]);
|
||
|
template int generate_loadbal(Machine_Description *machine, Problem_Description *problem,
|
||
|
Mesh_Description<int64_t> *mesh, LB_Description<int64_t> *lb,
|
||
|
Solver_Description *solve, Graph_Description<int64_t> *graph,
|
||
|
Weight_Description<int64_t> *weight, Sphere_Info *sphere, int argc,
|
||
|
char *argv[]);
|
||
|
|
||
|
/* Variables used in Chaco */
|
||
|
extern "C" {
|
||
|
extern int FREE_GRAPH;
|
||
|
extern int CONNECTED_DOMAINS;
|
||
|
extern int OUTPUT_ASSIGN;
|
||
|
}
|
||
|
|
||
|
template <typename INT>
|
||
|
int generate_loadbal(Machine_Description *machine, Problem_Description *problem,
|
||
|
Mesh_Description<INT> *mesh, LB_Description<INT> *lb,
|
||
|
Solver_Description *solve, Graph_Description<INT> *graph,
|
||
|
Weight_Description<INT> *weight, Sphere_Info *sphere, int argc, char *argv[])
|
||
|
{
|
||
|
const char *assignfile = nullptr;
|
||
|
|
||
|
int flag = 0;
|
||
|
int arch = 0;
|
||
|
int refine = 0;
|
||
|
int num_level = 0;
|
||
|
int totalproc = 0;
|
||
|
int glob_method = 0;
|
||
|
int start_proc = 0;
|
||
|
|
||
|
INT *tmp_start = nullptr;
|
||
|
INT *tmp_adj = nullptr;
|
||
|
int *tmp_vwgts = nullptr;
|
||
|
size_t tmp_nv;
|
||
|
std::vector<int> nprocg;
|
||
|
std::vector<int> nelemg;
|
||
|
std::vector<int> nadjg;
|
||
|
size_t max_vtx;
|
||
|
size_t max_adj;
|
||
|
int group;
|
||
|
std::vector<INT> elem_map;
|
||
|
int dim[1];
|
||
|
int tmpdim[3];
|
||
|
int tmp_arch;
|
||
|
int tmp_lev;
|
||
|
int *tmp_v2p = nullptr;
|
||
|
|
||
|
float *x_node_ptr = nullptr;
|
||
|
float *y_node_ptr = nullptr;
|
||
|
float *z_node_ptr = nullptr;
|
||
|
std::vector<float> x_elem_ptr;
|
||
|
std::vector<float> y_elem_ptr;
|
||
|
std::vector<float> z_elem_ptr;
|
||
|
float *tmp_x = nullptr;
|
||
|
float *tmp_y = nullptr;
|
||
|
float *tmp_z = nullptr;
|
||
|
float *tmp_ewgts = nullptr;
|
||
|
|
||
|
long seed = 1;
|
||
|
double *goal = nullptr;
|
||
|
double time1;
|
||
|
double time2;
|
||
|
FILE *fp = nullptr;
|
||
|
|
||
|
/*-----------------------------Execution Begins------------------------------*/
|
||
|
|
||
|
tmpdim[0] = 0;
|
||
|
tmpdim[1] = 0;
|
||
|
tmpdim[2] = 0;
|
||
|
|
||
|
/* if user requests, check for mechanisms in the original mesh before
|
||
|
working on the loadbalance
|
||
|
*/
|
||
|
|
||
|
if (problem->type == ELEMENTAL && problem->global_mech == 1 && problem->alloc_graph == ELB_TRUE) {
|
||
|
fmt::print("\n==============Looking For Global Issues=================\n");
|
||
|
identify_mechanisms(machine, problem, mesh, lb, graph, GLOBAL_ISSUES);
|
||
|
fmt::print("============================================================\n");
|
||
|
fmt::print("\n");
|
||
|
}
|
||
|
|
||
|
/* Allocate the graph structure as Chaco expects it */
|
||
|
if (problem->alloc_graph == ELB_TRUE) {
|
||
|
/*
|
||
|
* Increment the graph vertices to start a "1" instead of "0",
|
||
|
* as expected by Chaco.
|
||
|
*/
|
||
|
for (size_t cnt = 0; cnt < graph->nadj; cnt++) {
|
||
|
graph->adj[cnt]++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (((problem->type == NODAL) && (lb->type == INERTIAL)) ||
|
||
|
((problem->type == ELEMENTAL) &&
|
||
|
(lb->type == INERTIAL || lb->type == ZPINCH || lb->type == BRICK || lb->type == ZOLTAN_RCB ||
|
||
|
lb->type == ZOLTAN_RIB || lb->type == ZOLTAN_HSFC))) {
|
||
|
if (problem->read_coords != ELB_TRUE) {
|
||
|
Gen_Error(0, "FATAL: internal logic error. Reading coordinates, but read_coords not set");
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
switch (mesh->num_dims) {
|
||
|
case 3:
|
||
|
x_node_ptr = mesh->coords.data();
|
||
|
y_node_ptr = mesh->coords.data() + (mesh->num_nodes);
|
||
|
z_node_ptr = mesh->coords.data() + 2 * (mesh->num_nodes);
|
||
|
break;
|
||
|
|
||
|
case 2:
|
||
|
x_node_ptr = mesh->coords.data();
|
||
|
y_node_ptr = mesh->coords.data() + (mesh->num_nodes);
|
||
|
z_node_ptr = (float *)calloc(mesh->num_nodes, sizeof(float));
|
||
|
break;
|
||
|
|
||
|
case 1:
|
||
|
x_node_ptr = mesh->coords.data();
|
||
|
y_node_ptr = (float *)calloc(mesh->num_nodes, sizeof(float));
|
||
|
z_node_ptr = (float *)calloc(mesh->num_nodes, sizeof(float));
|
||
|
break;
|
||
|
|
||
|
default: Gen_Error(0, "FATAL: Invalid mesh dimension. Must be 1, 2, or 3."); return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* now set the pointers that are being sent to Chaco */
|
||
|
float *x_ptr = x_node_ptr;
|
||
|
float *y_ptr = y_node_ptr;
|
||
|
float *z_ptr = z_node_ptr;
|
||
|
|
||
|
/*
|
||
|
* For an elemental decomposition using inertial, ZPINCH, BRICK
|
||
|
* or ZOLTAN geometric load balancing,
|
||
|
* I need to come up with coordinates for the elements.
|
||
|
*/
|
||
|
if ((problem->type == ELEMENTAL) &&
|
||
|
(lb->type == INERTIAL || lb->type == ZPINCH || lb->type == BRICK || lb->type == ZOLTAN_RCB ||
|
||
|
lb->type == ZOLTAN_RIB || lb->type == ZOLTAN_HSFC)) {
|
||
|
if (problem->read_coords != ELB_TRUE) {
|
||
|
Gen_Error(0, "FATAL: internal logic error. Reading coordinates, but read_coords not set");
|
||
|
return 0;
|
||
|
}
|
||
|
/* just check to make sure that there are vertices to get coords for */
|
||
|
if (problem->num_vertices > 0) {
|
||
|
/* allocate memory for element coordinates */
|
||
|
x_elem_ptr.resize(problem->num_vertices);
|
||
|
y_elem_ptr.resize(problem->num_vertices);
|
||
|
z_elem_ptr.resize(problem->num_vertices);
|
||
|
|
||
|
size_t cnt = 0;
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
|
||
|
if (mesh->elem_type[ecnt] != SPHERE || problem->no_sph == 1) {
|
||
|
/*
|
||
|
* for our purposes, the coordinate of the element will
|
||
|
* be the average of the coordinates of the nodes that make
|
||
|
* up that element
|
||
|
*/
|
||
|
size_t nnodes = get_elem_info(NNODES, mesh->elem_type[cnt]);
|
||
|
for (size_t ncnt = 0; ncnt < nnodes; ncnt++) {
|
||
|
x_elem_ptr[cnt] += x_ptr[(mesh->connect[ecnt][ncnt])];
|
||
|
y_elem_ptr[cnt] += y_ptr[(mesh->connect[ecnt][ncnt])];
|
||
|
z_elem_ptr[cnt] += z_ptr[(mesh->connect[ecnt][ncnt])];
|
||
|
}
|
||
|
x_elem_ptr[cnt] /= nnodes;
|
||
|
y_elem_ptr[cnt] /= nnodes;
|
||
|
z_elem_ptr[cnt] /= nnodes;
|
||
|
cnt++;
|
||
|
}
|
||
|
|
||
|
} /* End "for (cnt=0; cnt < mesh->num_elem; cnt++)" */
|
||
|
|
||
|
/* and use different pointers for Chaco */
|
||
|
x_ptr = x_elem_ptr.data();
|
||
|
y_ptr = y_elem_ptr.data();
|
||
|
z_ptr = z_elem_ptr.data();
|
||
|
|
||
|
} /* End "if (problem->num_vertices > 0)" */
|
||
|
} /* End "if ((problem->type == ELEMENTAL) &&
|
||
|
(lb->type==INERTIAL||ZPINCH||BRICK||ZOLTAN))"*/
|
||
|
|
||
|
/* Allocate memory for the vertex to processor vector */
|
||
|
if (problem->type == ELEMENTAL) {
|
||
|
lb->vertex2proc =
|
||
|
reinterpret_cast<int *>(malloc(((problem->num_vertices) + sphere->num) * sizeof(int)));
|
||
|
}
|
||
|
else {
|
||
|
lb->vertex2proc = reinterpret_cast<int *>(malloc(problem->num_vertices * sizeof(int)));
|
||
|
}
|
||
|
|
||
|
if (!(lb->vertex2proc)) {
|
||
|
Gen_Error(0, "fatal: insufficient memory");
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if (machine->type == HCUBE) {
|
||
|
arch = 0;
|
||
|
num_level = ilog2i(static_cast<unsigned int>(machine->procs_per_box));
|
||
|
}
|
||
|
if (machine->type == MESH) {
|
||
|
arch = machine->num_dims;
|
||
|
num_level = 0;
|
||
|
}
|
||
|
|
||
|
if (lb->refine == KL_REFINE) {
|
||
|
refine = 1;
|
||
|
}
|
||
|
else {
|
||
|
refine = 2;
|
||
|
}
|
||
|
|
||
|
if (lb->type == INFILE) {
|
||
|
assignfile = lb->file.c_str();
|
||
|
fp = fopen(assignfile, "r");
|
||
|
if (fp == nullptr) {
|
||
|
Gen_Error(0, "fatal: could not open assignment file");
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (lb->outfile) {
|
||
|
assignfile = lb->file.c_str();
|
||
|
OUTPUT_ASSIGN = 1;
|
||
|
}
|
||
|
|
||
|
switch (lb->type) {
|
||
|
case MULTIKL: glob_method = 1; break;
|
||
|
case SPECTRAL: glob_method = 2; break;
|
||
|
case INERTIAL: glob_method = 3; break;
|
||
|
case ZPINCH:
|
||
|
case BRICK:
|
||
|
case ZOLTAN_RCB:
|
||
|
case ZOLTAN_RIB:
|
||
|
case ZOLTAN_HSFC:
|
||
|
glob_method = 999; /* Chaco methods don't apply to ZPINCH, BRICK
|
||
|
ZOLTAN_RCB, ZOLTAN_RIB, ZOLTAN_HSFC */
|
||
|
break;
|
||
|
case LINEAR: glob_method = 4; break;
|
||
|
case RANDOM: glob_method = 5; break;
|
||
|
case SCATTERED: glob_method = 6; break;
|
||
|
case INFILE: glob_method = 7; break;
|
||
|
}
|
||
|
|
||
|
/* check if Chaco is supposed to make sure that the domains are connected */
|
||
|
if (lb->cnctd_dom) {
|
||
|
CONNECTED_DOMAINS = 1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* check if the data needs to be sent to Chaco in groups, and
|
||
|
* do some preliminary calculations and memory allocation
|
||
|
*
|
||
|
* Group or box designations for elements are stored in the
|
||
|
* vertex2proc array as negative numbers. So, group 1 will
|
||
|
* be designated as -1, etc.
|
||
|
*
|
||
|
* by default nloops = 1
|
||
|
*/
|
||
|
size_t nloops = 1;
|
||
|
if (problem->num_groups > 1) {
|
||
|
/* allocate space to hold number of procs and elem per group */
|
||
|
nprocg.resize(problem->num_groups);
|
||
|
nelemg.resize(problem->num_groups);
|
||
|
|
||
|
if (!get_group_info(machine, problem, mesh, graph, lb->vertex2proc, nprocg.data(),
|
||
|
nelemg.data(), &max_vtx, &max_adj)) {
|
||
|
Gen_Error(0, "fatal: Error obtaining group information.");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
|
||
|
nloops = problem->num_groups;
|
||
|
}
|
||
|
|
||
|
if (machine->num_boxes > 1) {
|
||
|
/* allocate space to hold number of procs and elem per box */
|
||
|
nprocg.resize(machine->num_boxes);
|
||
|
nelemg.resize(machine->num_boxes);
|
||
|
nadjg.resize(machine->num_boxes);
|
||
|
|
||
|
/*
|
||
|
* Call Chaco to get the initial breakdown of the vertices
|
||
|
* onto the boxes. The vertex2proc array can then be used
|
||
|
* to assign elements to groups that can be used in the
|
||
|
* Chaco calls below.
|
||
|
*/
|
||
|
tmp_arch = 1;
|
||
|
tmp_lev = 0;
|
||
|
dim[0] = machine->num_boxes;
|
||
|
FREE_GRAPH = 0; /* Don't have Chaco to free the adjacency */
|
||
|
|
||
|
fmt::print("=======================Call Chaco===========================\n");
|
||
|
time1 = get_time();
|
||
|
if (lb->type == INFILE) {
|
||
|
flag =
|
||
|
input_assign(fp, const_cast<char *>(assignfile), problem->num_vertices, lb->vertex2proc);
|
||
|
}
|
||
|
if (lb->type == ZPINCH || lb->type == BRICK || lb->type == ZOLTAN_RCB ||
|
||
|
lb->type == ZOLTAN_RIB || lb->type == ZOLTAN_HSFC) {
|
||
|
fmt::print(stderr, "KDD -- ZPINCH, BRICK, ZOLTAN_RCB, ZOLTAN_RIB, and "
|
||
|
"ZOLTAN_HSFC not supported with num_boxes > 1.\n");
|
||
|
fmt::print(stderr, "KDD -- Contact Karen Devine, kddevin@sandia.gov.\n");
|
||
|
exit(-1);
|
||
|
}
|
||
|
else {
|
||
|
flag = INTER_FACE(problem->num_vertices, (int *)graph->start.data(), (int *)graph->adj.data(),
|
||
|
weight->vertices.data(), weight->edges.data(), x_ptr, y_ptr, z_ptr,
|
||
|
const_cast<char *>(assignfile), (char *)nullptr, lb->vertex2proc, tmp_arch,
|
||
|
tmp_lev, dim, goal, glob_method, refine, solve->rqi_flag, solve->vmax,
|
||
|
lb->num_sects, solve->tolerance, seed);
|
||
|
}
|
||
|
|
||
|
time2 = get_time();
|
||
|
fmt::print("============================================================\n");
|
||
|
fmt::print("Time in Chaco: {}s\n", time2 - time1);
|
||
|
|
||
|
if (flag != 0) {
|
||
|
Gen_Error(0, "fatal: Chaco returned an error");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
|
||
|
nloops = machine->num_boxes;
|
||
|
for (size_t iloop = 0; iloop < nloops; iloop++) {
|
||
|
nelemg[iloop] = nadjg[iloop] = 0;
|
||
|
nprocg[iloop] = machine->procs_per_box;
|
||
|
}
|
||
|
|
||
|
for (size_t ecnt = 0; ecnt < problem->num_vertices; ecnt++) {
|
||
|
nelemg[lb->vertex2proc[ecnt]]++;
|
||
|
if (problem->alloc_graph == ELB_TRUE) {
|
||
|
nadjg[lb->vertex2proc[ecnt]] += graph->start[ecnt + 1] - graph->start[ecnt];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* use negative numbers to specify the groups, in order to
|
||
|
* avoid having a problem with group 0, add 1 to the group
|
||
|
* number
|
||
|
*/
|
||
|
lb->vertex2proc[ecnt] = -(lb->vertex2proc[ecnt] + 1);
|
||
|
}
|
||
|
|
||
|
max_vtx = 0;
|
||
|
max_adj = 0;
|
||
|
for (size_t iloop = 0; iloop < nloops; iloop++) {
|
||
|
if (nelemg[iloop] > static_cast<int>(max_vtx)) {
|
||
|
max_vtx = nelemg[iloop];
|
||
|
}
|
||
|
if (problem->alloc_graph == ELB_TRUE) {
|
||
|
if (nadjg[iloop] > static_cast<int>(max_adj)) {
|
||
|
max_adj = nadjg[iloop];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (nloops > 1) {
|
||
|
|
||
|
/*
|
||
|
* now allocate temporary arrays to hold information
|
||
|
* to pass into Chaco
|
||
|
*/
|
||
|
if (problem->alloc_graph == ELB_TRUE) {
|
||
|
tmp_start = (INT *)malloc((max_vtx + 1) * sizeof(INT));
|
||
|
tmp_adj = (INT *)malloc(max_adj * sizeof(INT));
|
||
|
if (!tmp_start || !tmp_adj) {
|
||
|
Gen_Error(0, "fatal: insufficient memory");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
/* and need a group map for the elements */
|
||
|
elem_map.resize(problem->num_vertices);
|
||
|
}
|
||
|
if (!weight->vertices.empty()) {
|
||
|
tmp_vwgts = reinterpret_cast<int *>(malloc(max_adj * sizeof(int)));
|
||
|
if (!tmp_vwgts) {
|
||
|
Gen_Error(0, "fatal: insufficient memory");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
}
|
||
|
if (!weight->edges.empty()) {
|
||
|
tmp_ewgts = reinterpret_cast<float *>(malloc(max_adj * sizeof(float)));
|
||
|
if (!tmp_ewgts) {
|
||
|
Gen_Error(0, "fatal: insufficient memory");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
}
|
||
|
if (x_ptr != nullptr) {
|
||
|
tmp_x = reinterpret_cast<float *>(malloc(max_vtx * sizeof(float)));
|
||
|
if (!tmp_x) {
|
||
|
Gen_Error(0, "fatal: insufficient memory");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
}
|
||
|
if (y_ptr != nullptr) {
|
||
|
tmp_y = reinterpret_cast<float *>(malloc(max_vtx * sizeof(float)));
|
||
|
if (!tmp_y) {
|
||
|
Gen_Error(0, "fatal: insufficient memory");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
}
|
||
|
if (z_ptr != nullptr) {
|
||
|
tmp_z = reinterpret_cast<float *>(malloc(max_vtx * sizeof(float)));
|
||
|
if (!tmp_z) {
|
||
|
Gen_Error(0, "fatal: insufficient memory");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
}
|
||
|
tmp_v2p = reinterpret_cast<int *>(malloc(max_vtx * sizeof(int)));
|
||
|
if (!tmp_v2p) {
|
||
|
Gen_Error(0, "fatal: insufficient memory");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
start_proc = 0; /* counter to keep track of processors for each group */
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* need to check to make sure that the mesh is not made up
|
||
|
* entirely of SPHERE elements. If it is, don't call Chaco.
|
||
|
* The spheres are linearly distributed after chaco is called.
|
||
|
*/
|
||
|
|
||
|
if (sphere->num < mesh->num_elems) {
|
||
|
|
||
|
FREE_GRAPH = 0; /* Don't have Chaco to free the adjacency */
|
||
|
/* now loop over the number of groups */
|
||
|
for (size_t iloop = 0; iloop < nloops; iloop++) {
|
||
|
|
||
|
/* group #'s 1 based */
|
||
|
group = iloop + 1;
|
||
|
|
||
|
/* Call chaco to generate the load balance */
|
||
|
|
||
|
if (nloops > 1) {
|
||
|
if (nelemg[iloop] <= 0) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
tmp_nv = nelemg[iloop];
|
||
|
|
||
|
if (problem->alloc_graph == ELB_TRUE) {
|
||
|
/*
|
||
|
* Build an element map that is local to this group. This
|
||
|
* is necessary so that the adjacency list for each element in
|
||
|
* this group can is correctly mapped. Elements that are in this
|
||
|
* group are mapped 1:n and elements that are not in the group
|
||
|
* are assigned -1.
|
||
|
*/
|
||
|
INT elemp = 1;
|
||
|
for (size_t cnt = 0; cnt < problem->num_vertices; cnt++) {
|
||
|
if (group == -lb->vertex2proc[cnt]) {
|
||
|
elem_map[cnt] = elemp++;
|
||
|
}
|
||
|
else {
|
||
|
elem_map[cnt] = -1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
tmp_start[0] = 0;
|
||
|
}
|
||
|
|
||
|
size_t adjp = 0;
|
||
|
size_t elemp = 0;
|
||
|
|
||
|
/* fill the temporary vectors */
|
||
|
for (size_t ecnt = 0; ecnt < problem->num_vertices; ecnt++) {
|
||
|
if (group == -lb->vertex2proc[ecnt]) {
|
||
|
if (problem->alloc_graph == ELB_TRUE) {
|
||
|
for (int cnt = graph->start[ecnt]; cnt < graph->start[ecnt + 1]; cnt++) {
|
||
|
if (elem_map[graph->adj[cnt] - 1] > 0) {
|
||
|
tmp_adj[adjp] = elem_map[graph->adj[cnt] - 1];
|
||
|
if (!weight->edges.empty() && tmp_ewgts != nullptr) {
|
||
|
tmp_ewgts[adjp] = weight->edges[cnt];
|
||
|
}
|
||
|
adjp++;
|
||
|
}
|
||
|
}
|
||
|
tmp_start[elemp + 1] = adjp;
|
||
|
}
|
||
|
if (!weight->vertices.empty() && tmp_vwgts != nullptr) {
|
||
|
tmp_vwgts[elemp] = weight->vertices[ecnt];
|
||
|
}
|
||
|
|
||
|
if (x_ptr) {
|
||
|
tmp_x[elemp] = x_ptr[ecnt];
|
||
|
}
|
||
|
if (y_ptr) {
|
||
|
tmp_y[elemp] = y_ptr[ecnt];
|
||
|
}
|
||
|
if (z_ptr) {
|
||
|
tmp_z[elemp] = z_ptr[ecnt];
|
||
|
}
|
||
|
|
||
|
elemp++; /* now increment the element # for the group */
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* set number of procs for this group
|
||
|
* If this is a cluster machine, then machine-dim, arch, and
|
||
|
* num_level are already set for each box.
|
||
|
*/
|
||
|
if (problem->num_groups > 1) {
|
||
|
if (machine->type == MESH) {
|
||
|
|
||
|
/*
|
||
|
* mesh and groups are in conflict. the only way to resolve
|
||
|
* this is to assume a 1-d mesh. For example if group 1 requires
|
||
|
* 7 processors, it is impossible to apply this to a 2d mesh.
|
||
|
*/
|
||
|
|
||
|
tmpdim[0] = nprocg[iloop];
|
||
|
tmpdim[1] = 1;
|
||
|
tmpdim[2] = 1;
|
||
|
}
|
||
|
else {
|
||
|
num_level = nprocg[iloop];
|
||
|
}
|
||
|
totalproc = nprocg[iloop];
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
tmp_nv = problem->num_vertices;
|
||
|
if (!graph->start.empty()) {
|
||
|
tmp_start = &graph->start[0];
|
||
|
}
|
||
|
if (!graph->adj.empty()) {
|
||
|
tmp_adj = &graph->adj[0];
|
||
|
}
|
||
|
if (!weight->vertices.empty()) {
|
||
|
tmp_vwgts = &weight->vertices[0];
|
||
|
}
|
||
|
if (!weight->edges.empty()) {
|
||
|
tmp_ewgts = &weight->edges[0];
|
||
|
}
|
||
|
tmp_x = x_ptr;
|
||
|
tmp_y = y_ptr;
|
||
|
tmp_z = z_ptr;
|
||
|
tmp_v2p = lb->vertex2proc;
|
||
|
|
||
|
int upper = machine->num_dims > 3 ? 3 : machine->num_dims;
|
||
|
for (int cnt = 0; cnt < upper; cnt++) {
|
||
|
tmpdim[cnt] = machine->dim[cnt];
|
||
|
}
|
||
|
if (machine->type == MESH) {
|
||
|
totalproc = tmpdim[0];
|
||
|
if (tmpdim[1] != 0) {
|
||
|
totalproc *= tmpdim[1];
|
||
|
}
|
||
|
if (tmpdim[2] != 0) {
|
||
|
totalproc *= tmpdim[2];
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
totalproc = num_level;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (lb->type == INFILE) {
|
||
|
flag = input_assign(fp, const_cast<char *>(assignfile), tmp_nv, tmp_v2p);
|
||
|
}
|
||
|
else if (lb->type == ZPINCH) {
|
||
|
flag = ZPINCH_assign(machine, tmp_nv, tmp_x, tmp_y, tmp_z, tmp_v2p);
|
||
|
BALANCE_STATS(machine, nullptr, tmp_nv, tmp_v2p);
|
||
|
}
|
||
|
else if (lb->type == BRICK) {
|
||
|
flag = BRICK_assign(machine, tmp_nv, tmp_x, tmp_y, tmp_z, tmp_v2p);
|
||
|
BALANCE_STATS(machine, nullptr, tmp_nv, tmp_v2p);
|
||
|
}
|
||
|
#ifdef USE_ZOLTAN
|
||
|
else if (lb->type == ZOLTAN_RCB) {
|
||
|
flag = ZOLTAN_assign("RCB", totalproc, tmp_nv, tmp_vwgts, tmp_x, tmp_y, tmp_z, lb->ignore_z,
|
||
|
tmp_v2p, argc, argv);
|
||
|
BALANCE_STATS(machine, tmp_vwgts, tmp_nv, tmp_v2p);
|
||
|
}
|
||
|
else if (lb->type == ZOLTAN_RIB) {
|
||
|
flag = ZOLTAN_assign("RIB", totalproc, tmp_nv, tmp_vwgts, tmp_x, tmp_y, tmp_z, lb->ignore_z,
|
||
|
tmp_v2p, argc, argv);
|
||
|
BALANCE_STATS(machine, tmp_vwgts, tmp_nv, tmp_v2p);
|
||
|
}
|
||
|
else if (lb->type == ZOLTAN_HSFC) {
|
||
|
flag = ZOLTAN_assign("HSFC", totalproc, tmp_nv, tmp_vwgts, tmp_x, tmp_y, tmp_z,
|
||
|
lb->ignore_z, tmp_v2p, argc, argv);
|
||
|
BALANCE_STATS(machine, tmp_vwgts, tmp_nv, tmp_v2p);
|
||
|
}
|
||
|
#endif
|
||
|
else if (lb->type == LINEAR) {
|
||
|
fmt::print(stderr, "Using internal linear decomposition\n");
|
||
|
/* assign the elements to the processors linearly */
|
||
|
size_t cnt = mesh->num_elems / machine->num_procs;
|
||
|
|
||
|
/* The left overs */
|
||
|
int extra = (mesh->num_elems) % (machine->num_procs);
|
||
|
|
||
|
size_t end = 0;
|
||
|
for (int iproc = 0; iproc < machine->num_procs; iproc++) {
|
||
|
size_t start = end;
|
||
|
end += cnt;
|
||
|
if (iproc < extra) {
|
||
|
end++;
|
||
|
}
|
||
|
|
||
|
for (size_t ecnt = start; ecnt < end; ecnt++) {
|
||
|
lb->vertex2proc[ecnt] = iproc;
|
||
|
}
|
||
|
}
|
||
|
FREE_GRAPH = 0; /* Chaco not called, */
|
||
|
|
||
|
for (size_t i = 0; i < mesh->num_elems; i++) {
|
||
|
assert(lb->vertex2proc[i] < machine->num_procs);
|
||
|
}
|
||
|
}
|
||
|
else if (lb->type == SCATTERED) {
|
||
|
// Bad decomposition, useful for maximizing communication for testing algorithms...
|
||
|
/* each element 'e' assigned to processor 'e' % 'num_proc' */
|
||
|
|
||
|
for (size_t e = 0; e < mesh->num_elems; e += machine->num_procs) {
|
||
|
size_t iproc = 0;
|
||
|
size_t elem = e;
|
||
|
size_t end = elem + machine->num_procs;
|
||
|
if (end > mesh->num_elems) {
|
||
|
end = mesh->num_elems;
|
||
|
}
|
||
|
while (elem < end) {
|
||
|
lb->vertex2proc[elem++] = iproc++;
|
||
|
}
|
||
|
}
|
||
|
FREE_GRAPH = 0; /* Chaco not called, */
|
||
|
}
|
||
|
else {
|
||
|
fmt::print("===================Call Chaco===========================\n");
|
||
|
time1 = get_time();
|
||
|
flag = INTER_FACE(tmp_nv, (int *)tmp_start, (int *)tmp_adj, tmp_vwgts, tmp_ewgts, tmp_x,
|
||
|
tmp_y, tmp_z, const_cast<char *>(assignfile), (char *)nullptr, tmp_v2p,
|
||
|
arch, num_level, tmpdim, goal, glob_method, refine, solve->rqi_flag,
|
||
|
solve->vmax, lb->num_sects, solve->tolerance, seed);
|
||
|
time2 = get_time();
|
||
|
fmt::print("========================================================\n");
|
||
|
fmt::print("Time in Chaco: {}s\n", time2 - time1);
|
||
|
}
|
||
|
|
||
|
if (flag != 0) {
|
||
|
Gen_Error(0, "fatal: Partitioner returned an error");
|
||
|
goto cleanup;
|
||
|
}
|
||
|
|
||
|
if (nloops > 1) {
|
||
|
for (int ecnt = 0; ecnt < nelemg[iloop]; ecnt++) {
|
||
|
tmp_v2p[ecnt] += start_proc;
|
||
|
}
|
||
|
start_proc += nprocg[iloop];
|
||
|
/* copy the assignment data back into assignment */
|
||
|
size_t elemp = 0;
|
||
|
for (size_t ecnt = 0; ecnt < problem->num_vertices; ecnt++) {
|
||
|
if (-lb->vertex2proc[ecnt] == group) {
|
||
|
lb->vertex2proc[ecnt] = tmp_v2p[elemp++];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} /* End: "for (iloop = 0; iloop < nloops; iloop++)" */
|
||
|
} /* End: "if (sphere->num < mesh->num_elems)" */
|
||
|
|
||
|
/* Free up coordinates if used */
|
||
|
if (problem->read_coords == ELB_TRUE) {
|
||
|
switch (mesh->num_dims) {
|
||
|
case 1:
|
||
|
free(y_node_ptr);
|
||
|
y_node_ptr = nullptr;
|
||
|
FALL_THROUGH;
|
||
|
case 2: free(z_node_ptr); z_node_ptr = nullptr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* free up memory for groups */
|
||
|
if (nloops > 1) {
|
||
|
if (problem->alloc_graph == ELB_TRUE) {
|
||
|
free(tmp_start);
|
||
|
free(tmp_adj);
|
||
|
}
|
||
|
free(tmp_v2p);
|
||
|
// FREE_GRAPH always 0 if nloops > 1
|
||
|
if (tmp_vwgts) {
|
||
|
free(tmp_vwgts);
|
||
|
tmp_vwgts = nullptr;
|
||
|
}
|
||
|
if (tmp_ewgts) {
|
||
|
free(tmp_ewgts);
|
||
|
tmp_ewgts = nullptr;
|
||
|
}
|
||
|
if (tmp_x) {
|
||
|
free(tmp_x);
|
||
|
}
|
||
|
if (tmp_y) {
|
||
|
free(tmp_y);
|
||
|
}
|
||
|
if (tmp_z) {
|
||
|
free(tmp_z);
|
||
|
}
|
||
|
problem->group_no.clear();
|
||
|
vec_free(mesh->eb_cnts);
|
||
|
/* since Chaco didn't free the graph, need to do it here */
|
||
|
vec_free(graph->start);
|
||
|
vec_free(graph->adj);
|
||
|
vec_free(weight->vertices);
|
||
|
vec_free(weight->edges);
|
||
|
}
|
||
|
|
||
|
if (problem->type == ELEMENTAL) {
|
||
|
/*
|
||
|
* If this is an elemental load balance and there are spheres present
|
||
|
* then adjust lb->vertex2proc accordingly. The spheres are then
|
||
|
* distributed linearly to processors.
|
||
|
*/
|
||
|
if (sphere->num > 0) {
|
||
|
// If type == LINEAR || SCATTERED, then the spheres were handled above (unless the model
|
||
|
// is
|
||
|
// all spheres...)
|
||
|
if ((lb->type != LINEAR && lb->type != SCATTERED) || sphere->num == mesh->num_elems) {
|
||
|
|
||
|
if (sphere->num != mesh->num_elems) {
|
||
|
// If all spheres, don't need to open up space...
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
/*
|
||
|
* First generate "holes" in the vertex2proc vector where the
|
||
|
* sphere assignments will be.
|
||
|
*/
|
||
|
if (mesh->elem_type[ecnt] == SPHERE) {
|
||
|
for (size_t cnt = (problem->num_vertices); cnt > ecnt; cnt--) {
|
||
|
lb->vertex2proc[cnt] = lb->vertex2proc[cnt - 1];
|
||
|
}
|
||
|
|
||
|
lb->vertex2proc[ecnt] = -1;
|
||
|
(problem->num_vertices)++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Now assign the spheres to the processors linearly */
|
||
|
size_t cnt = sphere->num / machine->num_procs;
|
||
|
|
||
|
/* The left overs */
|
||
|
flag = (sphere->num) % (machine->num_procs);
|
||
|
|
||
|
size_t iproc = 0;
|
||
|
size_t cnt2 = 0;
|
||
|
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
if (mesh->elem_type[ecnt] == SPHERE) {
|
||
|
lb->vertex2proc[ecnt] = iproc;
|
||
|
cnt2++;
|
||
|
|
||
|
/*
|
||
|
* If the processor ID is lower than the remainder then that
|
||
|
* processor gets an extra sphere
|
||
|
*/
|
||
|
if ((iproc + 1) <= static_cast<size_t>(flag)) {
|
||
|
if (cnt2 >= (cnt + 1)) {
|
||
|
iproc++;
|
||
|
cnt2 = 0;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
if (cnt2 >= cnt) {
|
||
|
iproc++;
|
||
|
cnt2 = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} /* End "if(sphere->num > 0)" */
|
||
|
|
||
|
if (problem->local_mech == 1) {
|
||
|
fmt::print("\n==============Looking For Local Issues==================\n");
|
||
|
|
||
|
if (problem->face_adj == 1 && problem->alloc_graph == ELB_TRUE) {
|
||
|
identify_mechanisms(machine, problem, mesh, lb, graph, LOCAL_ISSUES);
|
||
|
}
|
||
|
else {
|
||
|
/* need to free the bigger graph and create a face adjacent graph */
|
||
|
int tmp_alloc_graph;
|
||
|
int tmp_adjacency;
|
||
|
|
||
|
vec_free(graph->start);
|
||
|
vec_free(graph->adj);
|
||
|
vec_free(weight->vertices);
|
||
|
vec_free(weight->edges);
|
||
|
|
||
|
if (!graph->sur_elem.empty()) {
|
||
|
for (size_t cnt = 0; cnt < mesh->num_nodes; cnt++) {
|
||
|
vec_free(graph->sur_elem[cnt]);
|
||
|
}
|
||
|
vec_free(graph->sur_elem);
|
||
|
}
|
||
|
|
||
|
tmp_alloc_graph = problem->alloc_graph;
|
||
|
tmp_adjacency = problem->face_adj;
|
||
|
|
||
|
problem->alloc_graph = ELB_TRUE;
|
||
|
problem->face_adj = 1;
|
||
|
|
||
|
generate_graph(problem, mesh, graph, weight, sphere);
|
||
|
|
||
|
/*
|
||
|
* adjacancy graph sent to identify_mechanisms must be 1 based
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
for (size_t cnt = 0; cnt < graph->nadj; cnt++) {
|
||
|
graph->adj[cnt]++;
|
||
|
}
|
||
|
|
||
|
identify_mechanisms(machine, problem, mesh, lb, graph, LOCAL_ISSUES);
|
||
|
|
||
|
problem->alloc_graph = tmp_alloc_graph;
|
||
|
problem->face_adj = tmp_adjacency;
|
||
|
}
|
||
|
|
||
|
fmt::print("============================================================\n");
|
||
|
fmt::print("\n");
|
||
|
}
|
||
|
|
||
|
// If requested, try to discover vertical columnar structures in
|
||
|
// the mesh and tweak the partitioning so that elements of a
|
||
|
// column are not on multiple processors
|
||
|
if (problem->fix_columns) {
|
||
|
Gen_Error(1, "INFO: Attempting to discover columns and fix their partitioning");
|
||
|
int nmoved = fix_column_partitions(lb, mesh, graph);
|
||
|
std::string mesg;
|
||
|
mesg = fmt::format("INFO: Reassigned partitions of {} elements", nmoved);
|
||
|
Gen_Error(1, mesg);
|
||
|
}
|
||
|
|
||
|
} // problem->type == ELEMENTAL
|
||
|
|
||
|
/* since Chaco didn't free the graph, need to do it here */
|
||
|
if (!FREE_GRAPH) {
|
||
|
vec_free(graph->start);
|
||
|
vec_free(graph->adj);
|
||
|
vec_free(weight->vertices);
|
||
|
vec_free(weight->edges);
|
||
|
}
|
||
|
if (fp != nullptr) {
|
||
|
fclose(fp);
|
||
|
}
|
||
|
return 1;
|
||
|
|
||
|
cleanup:
|
||
|
if (problem->read_coords == ELB_TRUE) {
|
||
|
switch (mesh->num_dims) {
|
||
|
case 1: free(y_node_ptr); FALL_THROUGH;
|
||
|
case 2: free(z_node_ptr);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (lb->type == INFILE) {
|
||
|
fclose(fp);
|
||
|
}
|
||
|
|
||
|
if (nloops > 1) {
|
||
|
if (problem->alloc_graph == ELB_TRUE) {
|
||
|
free(tmp_start);
|
||
|
free(tmp_adj);
|
||
|
}
|
||
|
free(tmp_v2p);
|
||
|
// FREE_GRAPH always zero if nloops > 1
|
||
|
if (tmp_vwgts) {
|
||
|
free(tmp_vwgts);
|
||
|
}
|
||
|
if (tmp_ewgts) {
|
||
|
free(tmp_ewgts);
|
||
|
}
|
||
|
if (tmp_x) {
|
||
|
free(tmp_x);
|
||
|
}
|
||
|
if (tmp_y) {
|
||
|
free(tmp_y);
|
||
|
}
|
||
|
if (tmp_z) {
|
||
|
free(tmp_z);
|
||
|
}
|
||
|
problem->group_no.clear();
|
||
|
vec_free(mesh->eb_cnts);
|
||
|
/* since Chaco didn't free the graph, need to do it here */
|
||
|
vec_free(graph->start);
|
||
|
vec_free(graph->adj);
|
||
|
vec_free(weight->vertices);
|
||
|
vec_free(weight->edges);
|
||
|
}
|
||
|
return 0;
|
||
|
} /*---------------------- End generate_loadbal() ---------------------------*/
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/* Function assign_fem() begins:
|
||
|
*----------------------------------------------------------------------------
|
||
|
* This function takes the load balance information generated by
|
||
|
* generate_loadbal() and assigns the FEM quantities to processors based on
|
||
|
* that load balance.
|
||
|
*****************************************************************************/
|
||
|
template int generate_maps(Machine_Description *machine, Problem_Description *problem,
|
||
|
Mesh_Description<int> *mesh, LB_Description<int> *lb,
|
||
|
Graph_Description<int> *graph);
|
||
|
|
||
|
template int generate_maps(Machine_Description *machine, Problem_Description *problem,
|
||
|
Mesh_Description<int64_t> *mesh, LB_Description<int64_t> *lb,
|
||
|
Graph_Description<int64_t> *graph);
|
||
|
|
||
|
template <typename INT>
|
||
|
int generate_maps(Machine_Description *machine, Problem_Description *problem,
|
||
|
Mesh_Description<INT> *mesh, LB_Description<INT> *lb,
|
||
|
Graph_Description<INT> *graph)
|
||
|
{
|
||
|
|
||
|
/*-----------------------------Execution Begins------------------------------*/
|
||
|
|
||
|
/* Generate the map for a nodal load balance */
|
||
|
if (problem->type == NODAL) {
|
||
|
/*
|
||
|
* Generate the nodal and elemental distribution for a nodal
|
||
|
* decomposition.
|
||
|
*/
|
||
|
if (!nodal_dist(lb, machine, mesh, graph)) {
|
||
|
Gen_Error(0, "fatal: unable to find nodal distribution");
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
/*
|
||
|
* Generate the nodal and elemental distribution for an elemental
|
||
|
* decomposition.
|
||
|
*/
|
||
|
if (!elemental_dist(lb, machine, mesh, graph, problem)) {
|
||
|
Gen_Error(0, "fatal: unable to find elemental distribution");
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 1;
|
||
|
|
||
|
} /*----------------------End generate_maps()----------------------------*/
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
namespace {
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************
|
||
|
*----------------------------------------------------------------------------
|
||
|
* This function looks for 3D elements that are connected to other elements
|
||
|
* by only a node or edge resulting in a boundary face.
|
||
|
*****************************************************************************/
|
||
|
template <typename INT>
|
||
|
int identify_mechanisms(Machine_Description *machine, Problem_Description *problem,
|
||
|
Mesh_Description<INT> *mesh, LB_Description<INT> *lb,
|
||
|
Graph_Description<INT> *graph, int check_type)
|
||
|
{
|
||
|
std::vector<int> list_ptr;
|
||
|
|
||
|
/*
|
||
|
* look for discontinuities in the graph
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
if (problem->find_cnt_domains == 1) {
|
||
|
if (check_type == GLOBAL_ISSUES) {
|
||
|
size_t nrow = mesh->num_elems;
|
||
|
std::vector<int> rows(nrow + 1);
|
||
|
std::vector<int> list(nrow);
|
||
|
|
||
|
rows[0] = 1;
|
||
|
for (size_t ecnt = 1; ecnt < mesh->num_elems; ecnt++) {
|
||
|
auto distance = graph->start[ecnt] - graph->start[ecnt - 1] + 1;
|
||
|
rows[ecnt] = rows[ecnt - 1] + distance;
|
||
|
}
|
||
|
rows[nrow] = graph->nadj + mesh->num_elems + 1;
|
||
|
size_t nedges = rows[nrow] - 1;
|
||
|
std::vector<int> columns(nedges);
|
||
|
|
||
|
size_t ki = 0;
|
||
|
size_t kf = 0;
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
columns[kf++] = ecnt + 1;
|
||
|
size_t distance = rows[ecnt + 1] - rows[ecnt] - 1;
|
||
|
for (size_t i = 0; i < distance; i++) {
|
||
|
columns[kf++] = graph->adj[ki++] + 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
size_t components =
|
||
|
extract_connected_lists(nrow, columns.data(), rows.data(), list.data(), list_ptr);
|
||
|
|
||
|
if (components) {
|
||
|
fmt::print("There are {} connected components.\n", components);
|
||
|
for (size_t i = 0; i < components; i++) {
|
||
|
ki = list_ptr[i];
|
||
|
kf = list_ptr[i + 1] - 1;
|
||
|
size_t distance = kf - ki + 1;
|
||
|
fmt::print("Connection {} #elements {}\n", i + 1, distance);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (check_type == LOCAL_ISSUES) {
|
||
|
|
||
|
std::vector<int> proc_cnt(machine->num_procs);
|
||
|
std::vector<INT> local_number(mesh->num_elems);
|
||
|
|
||
|
/*
|
||
|
* after running through chaco, the adjacency graph entries have
|
||
|
* been changed to 1 based
|
||
|
*/
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
int proc = lb->vertex2proc[ecnt];
|
||
|
proc_cnt[proc]++;
|
||
|
local_number[ecnt] = proc_cnt[proc];
|
||
|
}
|
||
|
|
||
|
int tmp_procs = machine->num_procs;
|
||
|
for (int pcnt = 0; pcnt < tmp_procs; pcnt++) {
|
||
|
if (proc_cnt[pcnt]) {
|
||
|
int nrow = proc_cnt[pcnt];
|
||
|
std::vector<int> rows(nrow + 1);
|
||
|
std::vector<int> global_index(nrow);
|
||
|
std::vector<int> list(nrow);
|
||
|
|
||
|
rows[0] = 1;
|
||
|
size_t cnt = 0;
|
||
|
size_t tcnt = 0;
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
int proc = lb->vertex2proc[ecnt];
|
||
|
assert(proc < machine->num_procs);
|
||
|
if (proc == pcnt) {
|
||
|
size_t end = 0;
|
||
|
if (ecnt < (mesh->num_elems - 1)) {
|
||
|
end = graph->start[ecnt + 1];
|
||
|
}
|
||
|
else {
|
||
|
end = graph->nadj;
|
||
|
}
|
||
|
size_t distance = 1;
|
||
|
for (size_t i = graph->start[ecnt]; i < end; i++) {
|
||
|
int proc2 = lb->vertex2proc[graph->adj[i] - 1];
|
||
|
assert(proc2 < machine->num_procs);
|
||
|
if (proc2 == proc) {
|
||
|
distance++;
|
||
|
}
|
||
|
}
|
||
|
cnt++;
|
||
|
rows[cnt] = rows[cnt - 1] + distance;
|
||
|
tcnt += distance;
|
||
|
}
|
||
|
}
|
||
|
rows[nrow] = tcnt + 1;
|
||
|
size_t nedges = rows[nrow] - 1;
|
||
|
std::vector<int> columns(nedges);
|
||
|
{
|
||
|
size_t kf = 0;
|
||
|
size_t ki = 0;
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
int proc = lb->vertex2proc[ecnt];
|
||
|
assert(proc < machine->num_procs);
|
||
|
if (proc == pcnt) {
|
||
|
global_index[ki++] = ecnt;
|
||
|
columns[kf++] = local_number[ecnt];
|
||
|
size_t end = 0;
|
||
|
if (ecnt < (mesh->num_elems - 1)) {
|
||
|
end = graph->start[ecnt + 1];
|
||
|
}
|
||
|
else {
|
||
|
end = graph->nadj;
|
||
|
}
|
||
|
for (size_t i = graph->start[ecnt]; i < end; i++) {
|
||
|
int proc2 = lb->vertex2proc[graph->adj[i] - 1];
|
||
|
assert(proc2 < machine->num_procs);
|
||
|
if (proc2 == proc) {
|
||
|
columns[kf++] = local_number[graph->adj[i] - 1];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int components =
|
||
|
extract_connected_lists(nrow, columns.data(), rows.data(), list.data(), list_ptr);
|
||
|
|
||
|
if (components > 0) {
|
||
|
fmt::print("For Processor {} there are {} connected components.\n", pcnt, components);
|
||
|
for (int i = 0; i < components; i++) {
|
||
|
int ki = list_ptr[i];
|
||
|
int kf = list_ptr[i + 1] - 1;
|
||
|
int distance = kf - ki + 1;
|
||
|
fmt::print("Connection {} #elements {}\n", i + 1, distance);
|
||
|
}
|
||
|
if (problem->dsd_add_procs == 1) {
|
||
|
for (int i = 1; i < components; i++) {
|
||
|
int ki = list_ptr[i];
|
||
|
int kf = list_ptr[i + 1] - 1;
|
||
|
for (int k = ki; k <= kf; k++) {
|
||
|
lb->vertex2proc[global_index[list[k] - 1]] = machine->num_procs;
|
||
|
}
|
||
|
machine->num_procs++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (tmp_procs != machine->num_procs) {
|
||
|
fmt::print("\n!!! Processor count increased to {} processors\n", machine->num_procs);
|
||
|
fmt::print("!!! in order to make connected subdomains\n\n");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* determine local element numbers for diagnostic output
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
if (problem->global_mech == 1 || problem->local_mech == 1) {
|
||
|
|
||
|
std::vector<INT> pt_list(graph->max_nsur);
|
||
|
std::vector<INT> hold_elem(graph->max_nsur);
|
||
|
std::vector<int> problems(mesh->num_nodes);
|
||
|
std::vector<int> proc_cnt(machine->num_procs);
|
||
|
std::vector<INT> local_number(mesh->num_elems);
|
||
|
|
||
|
if (check_type == LOCAL_ISSUES) {
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
int proc = lb->vertex2proc[ecnt];
|
||
|
assert(proc < machine->num_procs);
|
||
|
proc_cnt[proc]++;
|
||
|
local_number[ecnt] = proc_cnt[proc];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
size_t num_found = 0;
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
E_Type etype = mesh->elem_type[ecnt];
|
||
|
|
||
|
/* need to check for volume elements */
|
||
|
if (is_3d_element(etype)) {
|
||
|
|
||
|
int nnodes = get_elem_info(NNODES, mesh->elem_type[ecnt]);
|
||
|
|
||
|
for (int ncnt = 0; ncnt < nnodes; ncnt++) {
|
||
|
size_t node = mesh->connect[ecnt][ncnt];
|
||
|
problems[node] = 0;
|
||
|
}
|
||
|
|
||
|
int nsides = get_elem_info(NSIDES, etype);
|
||
|
int proc = 0;
|
||
|
if (check_type == LOCAL_ISSUES) {
|
||
|
proc = lb->vertex2proc[ecnt];
|
||
|
}
|
||
|
else {
|
||
|
proc = 0;
|
||
|
}
|
||
|
assert(proc < machine->num_procs);
|
||
|
|
||
|
/* check each side of this element */
|
||
|
|
||
|
INT side_nodes[MAX_SIDE_NODES];
|
||
|
INT side_nodes2[MAX_SIDE_NODES];
|
||
|
for (int nscnt = 0; nscnt < nsides; nscnt++) {
|
||
|
|
||
|
/* get the list of nodes on this side set */
|
||
|
int side_cnt = ss_to_node_list(etype, mesh->connect[ecnt], (nscnt + 1), side_nodes);
|
||
|
|
||
|
for (int ncnt = 0; ncnt < side_cnt; ncnt++) {
|
||
|
|
||
|
size_t node = side_nodes[ncnt];
|
||
|
size_t nhold = graph->sur_elem[node].size();
|
||
|
|
||
|
/*
|
||
|
* look for the following cases
|
||
|
* 1) bar elements connected to a volume element
|
||
|
* 2) shell element connected to a edge or point of a volume
|
||
|
* element and both its faces are boundaries
|
||
|
*
|
||
|
*/
|
||
|
for (size_t pcnt = 0; pcnt < nhold; pcnt++) {
|
||
|
|
||
|
size_t el2 = graph->sur_elem[node][pcnt];
|
||
|
E_Type etype2 = mesh->elem_type[el2];
|
||
|
|
||
|
int proc2 = 0;
|
||
|
if (check_type == LOCAL_ISSUES) {
|
||
|
proc2 = lb->vertex2proc[el2];
|
||
|
}
|
||
|
assert(proc2 < machine->num_procs);
|
||
|
|
||
|
if (ecnt != el2 && proc == proc2) {
|
||
|
if (etype2 == BAR2 || etype2 == BAR3 || etype2 == SHELL2 || etype2 == SHELL3) {
|
||
|
problems[node] = el2 + 1;
|
||
|
}
|
||
|
else if (etype2 == SHELL4 || etype2 == SHELL8 || etype2 == TSHELL3 ||
|
||
|
etype2 == TSHELL6) {
|
||
|
/*
|
||
|
* look for an element connect to one of the shells faces (not edges)
|
||
|
* one can look at elements connected to 3 of the nodes - cannot use
|
||
|
* diagonals due to triangular shells
|
||
|
*/
|
||
|
|
||
|
int nsides2 = get_elem_info(NSIDES, etype2);
|
||
|
|
||
|
size_t count = 0;
|
||
|
for (int cnt = 0; cnt < nsides2; cnt++) {
|
||
|
|
||
|
ss_to_node_list(etype2, mesh->connect[el2], (cnt + 1), side_nodes2);
|
||
|
|
||
|
int nhold2 =
|
||
|
find_inter(graph->sur_elem[side_nodes2[0]].data(),
|
||
|
graph->sur_elem[side_nodes2[1]].data(),
|
||
|
graph->sur_elem[side_nodes2[0]].size(),
|
||
|
graph->sur_elem[side_nodes2[1]].size(), pt_list.data());
|
||
|
|
||
|
for (int i = 0; i < nhold2; i++) {
|
||
|
hold_elem[i] = graph->sur_elem[side_nodes2[0]][pt_list[i]];
|
||
|
}
|
||
|
|
||
|
size_t nelem = find_inter(
|
||
|
hold_elem.data(), graph->sur_elem[side_nodes2[2]].data(), nhold2,
|
||
|
graph->sur_elem[side_nodes2[2]].size(), pt_list.data());
|
||
|
|
||
|
if (nelem >= 1) {
|
||
|
count++;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* at this point, a shell was connected to this
|
||
|
* volume element. if count, then the shell has a
|
||
|
* element connect to one of its faces and thus
|
||
|
* this is not a mechanism. If !count, then this
|
||
|
* is a mechanism.
|
||
|
*/
|
||
|
|
||
|
if (!count) {
|
||
|
problems[node] = el2 + 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (int ncnt = 0; ncnt < nnodes; ncnt++) {
|
||
|
size_t node = mesh->connect[ecnt][ncnt];
|
||
|
if (problems[node]) {
|
||
|
size_t el2 = problems[node] - 1;
|
||
|
E_Type etype2 = mesh->elem_type[el2];
|
||
|
|
||
|
if (check_type == LOCAL_ISSUES) {
|
||
|
fmt::print("WARNING: On Processor {} Local Element {}"
|
||
|
" ({}) has a mechanism through Global Node {}"
|
||
|
" with Local Element {} ({})\n",
|
||
|
proc, (size_t)local_number[ecnt], elem_name_from_enum(etype), node,
|
||
|
(size_t)local_number[el2], elem_name_from_enum(etype2));
|
||
|
if (problem->mech_add_procs == 1) {
|
||
|
lb->vertex2proc[el2] = machine->num_procs;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
fmt::print("WARNING: Element {} ({}) has a mechanism through Node {} with Element "
|
||
|
"{} ({})\n",
|
||
|
ecnt + 1, elem_name_from_enum(etype), node, el2 + 1,
|
||
|
elem_name_from_enum(etype2));
|
||
|
}
|
||
|
num_found++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (num_found) {
|
||
|
fmt::print("Total mechanisms found = {}\n", num_found);
|
||
|
if (check_type == LOCAL_ISSUES) {
|
||
|
if (problem->mech_add_procs == 1) {
|
||
|
machine->num_procs++;
|
||
|
fmt::print("\n!!! Processor count increased to {} processors\n", machine->num_procs);
|
||
|
fmt::print("!!! to move mechanisms to another processor\n\n");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
fmt::print("NO mechanisms found\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
template <typename INT>
|
||
|
int nodal_dist(LB_Description<INT> *lb, Machine_Description *machine, Mesh_Description<INT> *mesh,
|
||
|
Graph_Description<INT> *graph)
|
||
|
{
|
||
|
double time1 = get_time();
|
||
|
lb->int_nodes.resize(machine->num_procs);
|
||
|
lb->bor_nodes.resize(machine->num_procs);
|
||
|
lb->ext_nodes.resize(machine->num_procs);
|
||
|
lb->int_elems.resize(machine->num_procs);
|
||
|
lb->bor_elems.resize(machine->num_procs); // Not used in nodal dist.
|
||
|
lb->ext_procs.resize(machine->num_procs);
|
||
|
|
||
|
double time2 = get_time();
|
||
|
fmt::print("Allocation time: {}s\n", time2 - time1);
|
||
|
|
||
|
/* Find the internal, border and external nodes */
|
||
|
time1 = get_time();
|
||
|
for (size_t ncnt = 0; ncnt < mesh->num_nodes; ncnt++) {
|
||
|
int proc = lb->vertex2proc[ncnt];
|
||
|
assert(proc < machine->num_procs);
|
||
|
int internal = 1;
|
||
|
int flag = 0;
|
||
|
for (size_t ecnt = 0; ecnt < graph->sur_elem[ncnt].size(); ecnt++) {
|
||
|
int elem = graph->sur_elem[ncnt][ecnt];
|
||
|
E_Type etype = mesh->elem_type[elem];
|
||
|
int nnodes = get_elem_info(NNODES, etype);
|
||
|
for (size_t i = 0; i < static_cast<size_t>(nnodes); i++) {
|
||
|
int proc_n = lb->vertex2proc[mesh->connect[elem][i]];
|
||
|
assert(proc_n < machine->num_procs);
|
||
|
if (proc_n != proc) {
|
||
|
/* "ncnt" is a border node and is an external node to proc_n */
|
||
|
internal = 0;
|
||
|
if (!flag) {
|
||
|
flag = 1;
|
||
|
lb->bor_nodes[proc].push_back(ncnt);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
** to make sure that this node has not already been put
|
||
|
** in the external node list for proc_n I need to check
|
||
|
** only the last element in the current list
|
||
|
*/
|
||
|
if (lb->ext_nodes[proc_n].empty() || (lb->ext_nodes[proc_n].back() != (INT)ncnt)) {
|
||
|
lb->ext_nodes[proc_n].push_back(ncnt);
|
||
|
lb->ext_procs[proc_n].push_back(proc);
|
||
|
}
|
||
|
}
|
||
|
} /* End "for(i=0; i < nnodes; i++)" */
|
||
|
} /* End "for(ecnt=0; ecnt < graph->nsur_elem[ncnt]; ecnt++)" */
|
||
|
|
||
|
if (internal) {
|
||
|
/* "ncnt" is an internal node */
|
||
|
lb->int_nodes[proc].push_back(ncnt);
|
||
|
}
|
||
|
} /* End "for(ncnt=0; ncnt < mesh->num_nodes; ncnt++)" */
|
||
|
time2 = get_time();
|
||
|
fmt::print("Time for nodal categorization: {}s\n", time2 - time1);
|
||
|
|
||
|
/* Find the internal elements */
|
||
|
time1 = get_time();
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
E_Type etype = mesh->elem_type[ecnt];
|
||
|
int nnodes = get_elem_info(NNODES, etype);
|
||
|
for (size_t ncnt = 0; ncnt < static_cast<size_t>(nnodes); ncnt++) {
|
||
|
int node = mesh->connect[ecnt][ncnt];
|
||
|
int proc = lb->vertex2proc[node];
|
||
|
assert(proc < machine->num_procs);
|
||
|
/*
|
||
|
** since the outer loop is on the elements, I don't need to
|
||
|
** search over the entire list to find out if this element is
|
||
|
** already in it. If the element is in the processors list,
|
||
|
** then it must be the last element.
|
||
|
*/
|
||
|
if ((lb->int_elems[proc].empty()) || (lb->int_elems[proc].back() != (INT)ecnt)) {
|
||
|
lb->int_elems[proc].push_back(ecnt);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
time2 = get_time();
|
||
|
fmt::print("Elemental categorization: {}s\n", time2 - time1);
|
||
|
return 1;
|
||
|
} /*-----------------------------End nodal_dist()----------------------------*/
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
template <typename INT>
|
||
|
int elemental_dist(LB_Description<INT> *lb, Machine_Description *machine,
|
||
|
Mesh_Description<INT> *mesh, Graph_Description<INT> *graph,
|
||
|
Problem_Description *problem)
|
||
|
{
|
||
|
int sid;
|
||
|
int hflag1;
|
||
|
int hflag2;
|
||
|
int tflag1;
|
||
|
int tflag2;
|
||
|
int dflag;
|
||
|
|
||
|
INT nelem;
|
||
|
INT side_nodes[MAX_SIDE_NODES];
|
||
|
INT mirror_nodes[MAX_SIDE_NODES];
|
||
|
int side_cnt;
|
||
|
|
||
|
double time2;
|
||
|
|
||
|
std::string cmesg;
|
||
|
std::string tmpstr;
|
||
|
|
||
|
/*-----------------------------Execution Begins------------------------------*/
|
||
|
|
||
|
/* Allocate memory */
|
||
|
lb->int_nodes.resize(machine->num_procs);
|
||
|
lb->bor_nodes.resize(machine->num_procs);
|
||
|
lb->ext_nodes.resize(machine->num_procs); /* Not used in elemental dist */
|
||
|
|
||
|
lb->int_elems.resize(machine->num_procs);
|
||
|
lb->bor_elems.resize(machine->num_procs);
|
||
|
|
||
|
lb->ext_procs.resize(machine->num_procs);
|
||
|
lb->born_procs.resize(machine->num_procs);
|
||
|
|
||
|
lb->e_cmap_elems.resize(machine->num_procs);
|
||
|
lb->e_cmap_sides.resize(machine->num_procs);
|
||
|
lb->e_cmap_procs.resize(machine->num_procs);
|
||
|
lb->e_cmap_neigh.resize(machine->num_procs);
|
||
|
|
||
|
/* allocate space to hold info about surrounding elements */
|
||
|
std::vector<INT> pt_list(graph->max_nsur);
|
||
|
std::vector<INT> hold_elem(graph->max_nsur);
|
||
|
|
||
|
/* Find the internal and border elements */
|
||
|
double time1 = get_time();
|
||
|
|
||
|
for (size_t ecnt = 0; ecnt < mesh->num_elems; ecnt++) {
|
||
|
int proc = lb->vertex2proc[ecnt];
|
||
|
assert(proc < machine->num_procs);
|
||
|
bool internal = true;
|
||
|
int flag = 0;
|
||
|
E_Type etype = mesh->elem_type[ecnt];
|
||
|
int dim1 = get_elem_info(NDIM, etype);
|
||
|
|
||
|
/* need to check for hex's or tet's */
|
||
|
hflag1 = is_hex(etype);
|
||
|
|
||
|
/* a TET10 cannot connect to a HEX */
|
||
|
tflag1 = is_tet(etype);
|
||
|
|
||
|
int nsides = get_elem_info(NSIDES, etype);
|
||
|
|
||
|
/* check each side of this element */
|
||
|
for (int nscnt = 0; nscnt < nsides; nscnt++) {
|
||
|
|
||
|
/* get the list of nodes on this element side */
|
||
|
side_cnt = ss_to_node_list(etype, mesh->connect[ecnt], (nscnt + 1), side_nodes);
|
||
|
|
||
|
/*
|
||
|
* now determine how many side set nodes are needed to
|
||
|
* determine if there is an element connected to this side.
|
||
|
*
|
||
|
* 2-D - need two nodes, so find one intersection
|
||
|
* 3-D - need three nodes, so find two intersections
|
||
|
* NOTE: must check to make sure that this number is not
|
||
|
* larger than the number of nodes on the sides (ie - SHELL).
|
||
|
*/
|
||
|
|
||
|
int nnodes = mesh->num_dims;
|
||
|
if (side_cnt < nnodes) {
|
||
|
nnodes = side_cnt;
|
||
|
}
|
||
|
nnodes--; /* decrement to find the number of intersections needed */
|
||
|
|
||
|
nelem = 0; /* reset this in case no intersections are needed */
|
||
|
|
||
|
/*
|
||
|
* need to handle hex's differently because of
|
||
|
* the tet/hex combination
|
||
|
*/
|
||
|
|
||
|
if (!hflag1) { /* Not a hex */
|
||
|
|
||
|
/* ignore degenerate bars */
|
||
|
|
||
|
if (!((etype == BAR2 || etype == SHELL2) && side_nodes[0] == side_nodes[1])) {
|
||
|
|
||
|
size_t nhold = graph->sur_elem[side_nodes[0]].size();
|
||
|
for (size_t ncnt = 0; ncnt < nhold; ncnt++) {
|
||
|
hold_elem[ncnt] = graph->sur_elem[side_nodes[0]][ncnt];
|
||
|
}
|
||
|
|
||
|
for (int ncnt = 0; ncnt < nnodes; ncnt++) {
|
||
|
/* Find elements connected to both node '0' and node 'ncnt+1' */
|
||
|
nelem =
|
||
|
find_inter(hold_elem.data(), graph->sur_elem[side_nodes[(ncnt + 1)]].data(),
|
||
|
nhold, graph->sur_elem[side_nodes[(ncnt + 1)]].size(), pt_list.data());
|
||
|
|
||
|
if (nelem < 2) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
nhold = nelem;
|
||
|
for (int ncnt2 = 0; ncnt2 < nelem; ncnt2++) {
|
||
|
hold_elem[ncnt2] = hold_elem[pt_list[ncnt2]];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
fmt::print("WARNING: Element = {} is a DEGENERATE BAR\n", ecnt + 1);
|
||
|
}
|
||
|
}
|
||
|
else { /* Is a hex */
|
||
|
|
||
|
/*
|
||
|
* Hex faces are fairly complicated now. There are two
|
||
|
* exceptions to the standard case:
|
||
|
* 1. it is a degenerate hex (mimics a wedge). this has
|
||
|
* two special faces, the first is a triangle, and the
|
||
|
* second is a 2d line
|
||
|
* 2. two tets are connected to this hex face
|
||
|
*/
|
||
|
|
||
|
/* first need to check for a degenerate element */
|
||
|
dflag = 0;
|
||
|
if (side_nodes[0] == side_nodes[1] || side_nodes[0] == side_nodes[3]) {
|
||
|
dflag++;
|
||
|
}
|
||
|
if (side_nodes[2] == side_nodes[1] || side_nodes[2] == side_nodes[3]) {
|
||
|
dflag++;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* if both flags are set, then this face is the 2d line,
|
||
|
* and should be ignored with respect to elemental
|
||
|
* communication maps
|
||
|
*/
|
||
|
if (dflag == 2) {
|
||
|
nelem = 1;
|
||
|
}
|
||
|
else {
|
||
|
/*
|
||
|
* In order to check for two tets connected to this face,
|
||
|
* check the intersection of opposite corners of this face.
|
||
|
* Both tets should show up in the intersection of one of the
|
||
|
* sets of opposite corners (nothing should show up in the
|
||
|
* other).
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* Initial check is side nodes 0 and 2 which are
|
||
|
* diagonally opposite
|
||
|
*/
|
||
|
int inode = 0;
|
||
|
int node = 2;
|
||
|
size_t nhold = 0;
|
||
|
for (int ncnt = 0; ncnt < nnodes; ncnt++) {
|
||
|
/* Find elements connected to both node 'inode' and node 'node' */
|
||
|
nelem = find_inter(graph->sur_elem[side_nodes[inode]].data(),
|
||
|
graph->sur_elem[side_nodes[node]].data(),
|
||
|
graph->sur_elem[side_nodes[inode]].size(),
|
||
|
graph->sur_elem[side_nodes[node]].size(), pt_list.data());
|
||
|
|
||
|
if (nelem > 1) {
|
||
|
if (ncnt == 0) {
|
||
|
nhold = nelem;
|
||
|
for (int ncnt2 = 0; ncnt2 < nelem; ncnt2++) {
|
||
|
hold_elem[ncnt2] = graph->sur_elem[side_nodes[inode]][pt_list[ncnt2]];
|
||
|
}
|
||
|
|
||
|
if (dflag) {
|
||
|
/*
|
||
|
* in this case, need to get an intersection with
|
||
|
* another (unique) point since nodes 0 and 2
|
||
|
* may represent an edge and not the diagonal
|
||
|
*/
|
||
|
if (side_nodes[1] != side_nodes[0] && side_nodes[1] != side_nodes[2]) {
|
||
|
node = 1;
|
||
|
}
|
||
|
else {
|
||
|
node = 3;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
/*
|
||
|
* in the non-degenerate case, if an element is connected
|
||
|
* to two opposite nodes, then it must share a face.
|
||
|
*/
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
/* This is the second or later time through this
|
||
|
loop and each time through, there have been two
|
||
|
or more elements that are connected to 'node'
|
||
|
(which changes) and 'inode'. We want to make
|
||
|
sure that the elements matched this time through
|
||
|
were also in the list the first time through so
|
||
|
that the elements contain nodes 0 1 2 of the face
|
||
|
and not just 0 1 and 0 2...
|
||
|
So, this time, only put an element in the list if
|
||
|
it was in the list before.
|
||
|
*/
|
||
|
for (size_t ncnt2 = 0; ncnt2 < nhold; ncnt2++) {
|
||
|
hold_elem[ncnt2] = -hold_elem[ncnt2];
|
||
|
}
|
||
|
for (int ncnt3 = 0; ncnt3 < nelem; ncnt3++) {
|
||
|
for (size_t ncnt2 = 0; ncnt2 < nhold; ncnt2++) {
|
||
|
if (-hold_elem[ncnt2] == graph->sur_elem[side_nodes[inode]][pt_list[ncnt3]]) {
|
||
|
hold_elem[ncnt2] = graph->sur_elem[side_nodes[inode]][pt_list[ncnt3]];
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/* Now, go through list and cull out element < 0 */
|
||
|
size_t ncnt3 = 0;
|
||
|
for (size_t ncnt2 = 0; ncnt2 < nhold; ncnt2++) {
|
||
|
if (hold_elem[ncnt2] >= 0) {
|
||
|
hold_elem[ncnt3] = hold_elem[ncnt2];
|
||
|
ncnt3++;
|
||
|
}
|
||
|
}
|
||
|
nelem = ncnt3;
|
||
|
if (!dflag && nelem > 2) {
|
||
|
fmt::print(
|
||
|
stderr,
|
||
|
"Possible corrupted mesh detected at element {}, strange connectivity.\n",
|
||
|
ecnt);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else { /* nelem == 1 or 0 */
|
||
|
if (!dflag) {
|
||
|
nhold = graph->sur_elem[side_nodes[1]].size();
|
||
|
for (size_t ncnt2 = 0; ncnt2 < nhold; ncnt2++) {
|
||
|
hold_elem[ncnt2] = graph->sur_elem[side_nodes[1]][ncnt2];
|
||
|
}
|
||
|
}
|
||
|
inode = 1;
|
||
|
node = 3; /* The node diagonally opposite node 1 */
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} /* "if (!hflag1)" */
|
||
|
|
||
|
/*
|
||
|
* if there is an element on this side of ecnt, then there
|
||
|
* will be at least two elements in the intersection (one
|
||
|
* will be ecnt)
|
||
|
*/
|
||
|
if (nelem > 1) {
|
||
|
|
||
|
/*
|
||
|
* now go through and check each element in the list to see
|
||
|
* if it on a different processor than ecnt. Don't need to
|
||
|
* worry about ecnt (which is in the list) since it is on
|
||
|
* the same processor as itself. Note that due to filtering
|
||
|
* done above, we are guaranteed to either have an element
|
||
|
* on a different processor or elem==ecnt.
|
||
|
*/
|
||
|
for (int ncnt = 0; ncnt < nelem; ncnt++) {
|
||
|
|
||
|
INT elem = hold_elem[ncnt];
|
||
|
int proc2 = lb->vertex2proc[elem];
|
||
|
assert(proc2 < machine->num_procs);
|
||
|
|
||
|
if (proc != proc2) {
|
||
|
|
||
|
E_Type etype2 = mesh->elem_type[elem];
|
||
|
|
||
|
int dim2 = get_elem_info(NDIM, etype2);
|
||
|
|
||
|
int diff = abs(dim1 - dim2);
|
||
|
|
||
|
/*
|
||
|
* hex's to shells - ok
|
||
|
* shells to bar - ok
|
||
|
* hex to bar - BAD since a BAR will see a HEX but a HEX will not
|
||
|
* see a BAR
|
||
|
*/
|
||
|
|
||
|
if (diff < 2) {
|
||
|
|
||
|
/* need to check for hex's */
|
||
|
hflag2 = is_hex(etype2);
|
||
|
tflag2 = is_tet(etype2);
|
||
|
|
||
|
/* check here for tet/hex combinations */
|
||
|
if ((tflag1 && hflag2) || (hflag1 && tflag2)) {
|
||
|
/*
|
||
|
* have to call a special function to get the side id
|
||
|
* in these cases. In both cases, the number of side
|
||
|
* nodes for the element will not be consistent with
|
||
|
* side_cnt, and:
|
||
|
*
|
||
|
* TET/HEX - side_nodes only contains three of the
|
||
|
* the side nodes of the hex.
|
||
|
*
|
||
|
* HEX/TET - Have to check that this tet shares a side
|
||
|
* with the hex.
|
||
|
*/
|
||
|
sid = get_side_id_hex_tet(mesh->elem_type[elem], mesh->connect[elem], side_cnt,
|
||
|
side_nodes);
|
||
|
}
|
||
|
else {
|
||
|
/*
|
||
|
* get the side id of elem. Make sure that ecnt is
|
||
|
* trying to communicate to a valid side of elem
|
||
|
*/
|
||
|
side_cnt = get_ss_mirror(etype, side_nodes, (nscnt + 1), mirror_nodes);
|
||
|
|
||
|
/*
|
||
|
* small kludge to handle 6 node faces butted up against
|
||
|
* 4 node faces
|
||
|
*/
|
||
|
if (etype == HEXSHELL && side_cnt == 6) {
|
||
|
side_cnt = 4;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* in order to get the correct side order for elem,
|
||
|
* get the mirror of the side of ecnt
|
||
|
*/
|
||
|
sid = get_side_id(mesh->elem_type[elem], mesh->connect[elem], side_cnt,
|
||
|
mirror_nodes, problem->skip_checks, problem->partial_adj);
|
||
|
}
|
||
|
|
||
|
if (sid > 0) {
|
||
|
/* Element is a border element */
|
||
|
internal = false;
|
||
|
if (!flag) {
|
||
|
flag = 1;
|
||
|
lb->bor_elems[proc].push_back(ecnt);
|
||
|
}
|
||
|
|
||
|
/* now put ecnt into proc2's communications map */
|
||
|
lb->e_cmap_elems[proc2].push_back(elem);
|
||
|
lb->e_cmap_sides[proc2].push_back(sid);
|
||
|
lb->e_cmap_procs[proc2].push_back(proc);
|
||
|
lb->e_cmap_neigh[proc2].push_back(ecnt);
|
||
|
}
|
||
|
else if ((sid < 0) && (!problem->skip_checks)) {
|
||
|
/*
|
||
|
* too many errors with bad meshes, print out
|
||
|
* more information here for diagnostics
|
||
|
*/
|
||
|
cmesg =
|
||
|
fmt::format("Error returned while getting side id for communication map.");
|
||
|
Gen_Error(0, cmesg);
|
||
|
cmesg = fmt::format("Element 1: {}", (ecnt + 1));
|
||
|
Gen_Error(0, cmesg);
|
||
|
nnodes = get_elem_info(NNODES, etype);
|
||
|
cmesg = "connect table:";
|
||
|
for (int i = 0; i < nnodes; i++) {
|
||
|
tmpstr = fmt::format(" {}", (size_t)(mesh->connect[ecnt][i] + 1));
|
||
|
cmesg += tmpstr;
|
||
|
}
|
||
|
Gen_Error(0, cmesg);
|
||
|
cmesg = fmt::format("side id: {}", static_cast<size_t>(nscnt + 1));
|
||
|
Gen_Error(0, cmesg);
|
||
|
cmesg = "side nodes:";
|
||
|
for (int i = 0; i < side_cnt; i++) {
|
||
|
tmpstr = fmt::format(" {}", (size_t)(side_nodes[i] + 1));
|
||
|
cmesg += tmpstr;
|
||
|
}
|
||
|
Gen_Error(0, cmesg);
|
||
|
cmesg = fmt::format("Element 2: {}", (size_t)(elem + 1));
|
||
|
Gen_Error(0, cmesg);
|
||
|
nnodes = get_elem_info(NNODES, etype2);
|
||
|
cmesg = "connect table:";
|
||
|
for (int i = 0; i < nnodes; i++) {
|
||
|
tmpstr = fmt::format(" {}", (size_t)(mesh->connect[elem][i] + 1));
|
||
|
cmesg += tmpstr;
|
||
|
}
|
||
|
Gen_Error(0, cmesg);
|
||
|
return 0; /* and get out of here */
|
||
|
|
||
|
} /* End "if sid < 0 && !problem>skip_checks" */
|
||
|
} /* End "if (sid > 0)" */
|
||
|
} /* End "if (proc != proc2)" */
|
||
|
} /* End "for (ncnt = 0; ncnt < nelem; ncnt++)" */
|
||
|
} /* End "if (nelem > 1)" */
|
||
|
} /* End "for (nscnt = 0; nscnt < nsides; nscnt++)" */
|
||
|
|
||
|
if (internal) {
|
||
|
lb->int_elems[proc].push_back(ecnt);
|
||
|
}
|
||
|
} /* End "for(ecnt=0; ecnt < mesh->num_elems; ecnt++)" */
|
||
|
|
||
|
time2 = get_time();
|
||
|
fmt::print("Time for elemental categorization: {}s\n", time2 - time1);
|
||
|
|
||
|
/* Find the internal and border nodes */
|
||
|
|
||
|
time1 = get_time();
|
||
|
for (size_t ncnt = 0; ncnt < mesh->num_nodes; ncnt++) {
|
||
|
bool internal = true;
|
||
|
int proc = 0;
|
||
|
|
||
|
/* If a node is not connected to any elements (graph->nsur_elem[ncnt] == 0),
|
||
|
then it will be assigned to processor 0 and treated as internal.
|
||
|
*/
|
||
|
|
||
|
if (!graph->sur_elem[ncnt].empty()) {
|
||
|
size_t elem = graph->sur_elem[ncnt][0];
|
||
|
|
||
|
proc = lb->vertex2proc[elem];
|
||
|
assert(proc < machine->num_procs);
|
||
|
int flag = 0;
|
||
|
for (size_t ecnt = 1; ecnt < graph->sur_elem[ncnt].size(); ecnt++) {
|
||
|
int proc2 = lb->vertex2proc[graph->sur_elem[ncnt][ecnt]];
|
||
|
assert(proc2 < machine->num_procs);
|
||
|
/* check if the processor for any two surrounding elems differ */
|
||
|
if (proc != proc2) {
|
||
|
/* ncnt is a border node of proc */
|
||
|
internal = false;
|
||
|
/* first, I have to deal with node being border for proc */
|
||
|
if (!flag) {
|
||
|
flag = 1; /* only want to do this once */
|
||
|
lb->bor_nodes[proc].push_back(ncnt);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* now I have to put ncnt in the border list for proc2
|
||
|
* I need to check to make sure that this node has not
|
||
|
* already been added to this list. If it has, then it
|
||
|
* is in the last position in the array
|
||
|
*/
|
||
|
if ((lb->bor_nodes[proc2].empty()) || ((INT)ncnt != lb->bor_nodes[proc2].back())) {
|
||
|
lb->bor_nodes[proc2].push_back(ncnt);
|
||
|
}
|
||
|
} /* if (proc != lb->vertex2proc[graph->sur_elem[ncnt][ecnt]]) */
|
||
|
} /* for(ecnt=1; ecnt < graph->nsur_elem[ncnt]; ecnt++) */
|
||
|
} /* if(graph->nsur_elem[ncnt]) */
|
||
|
|
||
|
if (internal) {
|
||
|
/*
|
||
|
* NOTE: if all of the processors above were the same, then
|
||
|
* the one held in proc is the correct one
|
||
|
*/
|
||
|
lb->int_nodes[proc].push_back(ncnt);
|
||
|
}
|
||
|
} /* for(ncnt=0; ncnt < machine->num_nodes; ncnt++) */
|
||
|
|
||
|
time2 = get_time();
|
||
|
fmt::print("Nodal categorization: {}s\n", time2 - time1);
|
||
|
|
||
|
/* Allocate memory for the border node processor IDs */
|
||
|
for (int proc = 0; proc < machine->num_procs; proc++) {
|
||
|
if (!lb->bor_nodes[proc].empty()) {
|
||
|
lb->born_procs[proc].resize(lb->bor_nodes[proc].size());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Now find the processor(s) associated with each border node */
|
||
|
time1 = get_time();
|
||
|
for (int pcnt = 0; pcnt < machine->num_procs; pcnt++) {
|
||
|
for (size_t ncnt = 0; ncnt < lb->bor_nodes[pcnt].size(); ncnt++) {
|
||
|
size_t node = lb->bor_nodes[pcnt][ncnt];
|
||
|
|
||
|
for (size_t ecnt = 0; ecnt < graph->sur_elem[node].size(); ecnt++) {
|
||
|
size_t elem = graph->sur_elem[node][ecnt];
|
||
|
INT proc = lb->vertex2proc[elem];
|
||
|
assert(proc < machine->num_procs);
|
||
|
if (proc != pcnt) {
|
||
|
if (in_list(proc, lb->born_procs[pcnt][ncnt]) < 0) {
|
||
|
lb->born_procs[pcnt][ncnt].push_back(proc);
|
||
|
}
|
||
|
} /* End "if(proc != pcnt)" */
|
||
|
} /* End "for(ecnt=0; ecnt < graph->nsur_elems[node]; ecnt++)" */
|
||
|
} /* End "for(ncnt=0; ncnt < lb->num_bor_nodes[pcnt]; ncnt++)" */
|
||
|
} /* End "for(pcnt=0; pcnt < machine->num_procs; pcnt++)" */
|
||
|
|
||
|
time2 = get_time();
|
||
|
fmt::print("Find procs for border nodes: {}s\n", time2 - time1);
|
||
|
|
||
|
/* Order the element communication maps by processor */
|
||
|
time1 = get_time();
|
||
|
for (int pcnt = 0; pcnt < machine->num_procs; pcnt++) {
|
||
|
/* Note that this sort is multi-key */
|
||
|
qsort4(&lb->e_cmap_procs[pcnt][0], /* 1st key */
|
||
|
&lb->e_cmap_elems[pcnt][0], /* 2nd key */
|
||
|
&lb->e_cmap_neigh[pcnt][0], /* 3rd key */
|
||
|
&lb->e_cmap_sides[pcnt][0], /* 4th key */
|
||
|
lb->e_cmap_procs[pcnt].size()); /* Size */
|
||
|
}
|
||
|
/*
|
||
|
* At this point, each processors arrays are sorted on three keys:
|
||
|
* [processor, element, neighbor]
|
||
|
*/
|
||
|
|
||
|
time2 = get_time();
|
||
|
fmt::print("Order elem cmaps: {}s\n", time2 - time1);
|
||
|
|
||
|
/*
|
||
|
* Now order the elemental communication maps so that they are
|
||
|
* consistent between processors.
|
||
|
*/
|
||
|
time1 = get_time();
|
||
|
for (INT pcnt = 1; pcnt < machine->num_procs; pcnt++) {
|
||
|
int save_fv1 = 0;
|
||
|
int64_t size =
|
||
|
static_cast<int64_t>(lb->e_cmap_procs[pcnt].size()); /* Define shortcuts size and procs */
|
||
|
std::vector<INT> procs = lb->e_cmap_procs[pcnt];
|
||
|
|
||
|
INT fv1 = -1;
|
||
|
INT lv1 = -1;
|
||
|
|
||
|
for (int pcnt2 = 0; pcnt2 < pcnt; pcnt2++) {
|
||
|
|
||
|
/*
|
||
|
* Find the first and last entries for processor "pcnt2" in
|
||
|
* the list of processor "pcnt".
|
||
|
*/
|
||
|
|
||
|
/* lb->c_cmap_procs[pcnt] is sorted based on processor.
|
||
|
* From point 'save_fv1' search for 'pcnt2'
|
||
|
* If not found, value is -1; else search for !pcnt2 from
|
||
|
* that point forward.
|
||
|
*/
|
||
|
int64_t i = save_fv1;
|
||
|
while (i < size && procs[i] < pcnt2) {
|
||
|
i++;
|
||
|
}
|
||
|
if (i >= size || procs[i] != pcnt2) {
|
||
|
fv1 = -1;
|
||
|
lv1 = -1;
|
||
|
}
|
||
|
else {
|
||
|
fv1 = i;
|
||
|
assert(procs[i] == pcnt2);
|
||
|
for (lv1 = fv1; lv1 < size; lv1++) {
|
||
|
if (procs[lv1] != pcnt2) {
|
||
|
lv1 = lv1 - 1;
|
||
|
assert(procs[lv1] == pcnt2);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (lv1 >= size) {
|
||
|
lv1 = size - 1;
|
||
|
}
|
||
|
|
||
|
if (lv1 != -1) {
|
||
|
save_fv1 = lv1 + 1;
|
||
|
}
|
||
|
|
||
|
#if 0
|
||
|
/* Old method -- can use for verification by uncommenting this if block */
|
||
|
{
|
||
|
int tst_fv1, tst_lv1;
|
||
|
find_first_last(pcnt2, size, procs, &tst_fv1, &tst_lv1);
|
||
|
assert(tst_fv1 == fv1);
|
||
|
assert(tst_lv1 == lv1);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (fv1 >= 0) {
|
||
|
/* Sort based on neighbor element */
|
||
|
sort3(lv1 - fv1 + 1, (&lb->e_cmap_neigh[pcnt][fv1]), (&lb->e_cmap_elems[pcnt][fv1]),
|
||
|
(&lb->e_cmap_sides[pcnt][fv1]));
|
||
|
/*
|
||
|
* Find the first and last entries for processor "pcnt" in
|
||
|
* the list of processor "pcnt2".
|
||
|
*/
|
||
|
INT fv2 = -1;
|
||
|
INT lv2 = -1;
|
||
|
find_first_last(pcnt, lb->e_cmap_procs[pcnt2].size(), &lb->e_cmap_procs[pcnt2][0], &fv2,
|
||
|
&lv2);
|
||
|
#if 1
|
||
|
if (lv2 - fv2 != lv1 - fv1) {
|
||
|
fmt::print(stderr, "{}: {} to {}\n", static_cast<size_t>(pcnt2), (size_t)fv1,
|
||
|
(size_t)lv1);
|
||
|
for (i = fv1; i <= lv1; i++) {
|
||
|
fmt::print(stderr, "{}: {}\t{}\t{}\t{}\n", static_cast<size_t>(i),
|
||
|
(size_t)lb->e_cmap_elems[pcnt][i], (size_t)lb->e_cmap_neigh[pcnt][i],
|
||
|
(size_t)lb->e_cmap_procs[pcnt][i], (size_t)lb->e_cmap_sides[pcnt][i]);
|
||
|
}
|
||
|
fmt::print(stderr, "{}: {} to {}\n", (size_t)pcnt, (size_t)fv2, (size_t)lv2);
|
||
|
for (i = fv2; i <= lv2; i++) {
|
||
|
fmt::print(stderr, "{}: {}\t{}\t{}\t{}\n", static_cast<size_t>(i),
|
||
|
(size_t)lb->e_cmap_elems[pcnt2][i], (size_t)lb->e_cmap_neigh[pcnt2][i],
|
||
|
(size_t)lb->e_cmap_procs[pcnt2][i], (size_t)lb->e_cmap_sides[pcnt2][i]);
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
assert(lv2 - fv2 == lv1 - fv1);
|
||
|
|
||
|
/* Sort based on element -- This will then match order of
|
||
|
* the fv1->lv1 arrays.
|
||
|
*/
|
||
|
sort3(lv2 - fv2 + 1, (&lb->e_cmap_elems[pcnt2][fv2]), (&lb->e_cmap_neigh[pcnt2][fv2]),
|
||
|
(&lb->e_cmap_sides[pcnt2][fv2]));
|
||
|
|
||
|
} /* End "if(fv1 >= 0)" */
|
||
|
} /* End "for(pcnt2=0; pcnt2 < pcnt; pcnt2++)" */
|
||
|
} /* End "for(pcnt=0; pcnt < machine->num_procs; pcnt++)" */
|
||
|
|
||
|
time2 = get_time();
|
||
|
fmt::print("Make cmaps consistent: {}s\n", time2 - time1);
|
||
|
|
||
|
return 1;
|
||
|
} /*--------------------------End elemental_dist()---------------------------*/
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/*****************************************************************************/
|
||
|
/*
|
||
|
* Determine a graph's connected components
|
||
|
* Input:
|
||
|
* nrow = number of elements in the problem
|
||
|
* rows = the pointer into columns. 1 based and of dimension nrow+1
|
||
|
* The last value=1+dimension of columns.
|
||
|
* columns = the element to element connectivity. Also 1 based
|
||
|
* and includes the self connectivity.
|
||
|
* Output:
|
||
|
* return value = number of pieces
|
||
|
* list = all the elements grouped by connectivity, allocated
|
||
|
* before calling extract_connected_lists.
|
||
|
* list_ptr = pointers to the start of each connected component in
|
||
|
* list[]. The dimension of *list_ptr is the number of
|
||
|
* connected components. Memory is allocated within the
|
||
|
* routine. Memory must be freed using free(). List_ptr
|
||
|
* is zero based.
|
||
|
* Example:
|
||
|
*
|
||
|
* 1 2 3 4 5
|
||
|
* 1 * * *
|
||
|
* 2 *
|
||
|
* 3 * *
|
||
|
* 4 * *
|
||
|
* 5 *
|
||
|
* There are three connected components: { 1 3 4 }, { 2 } and { 5 }
|
||
|
*
|
||
|
* Input:
|
||
|
* nrow=5
|
||
|
* rows={ 1 4 5 7 9 10}
|
||
|
* columns={1 3 4 2 1 3 1 4 5}
|
||
|
*
|
||
|
* Output:
|
||
|
* returns 3
|
||
|
* list= { 1 3 4 2 5 }
|
||
|
* list_ptr={ 0 3 4 }
|
||
|
*/
|
||
|
|
||
|
int extract_connected_lists(int nrow, const int *columns, const int *rows, int *list,
|
||
|
std::vector<int> &list_ptr)
|
||
|
{
|
||
|
int root;
|
||
|
int nordered;
|
||
|
int ni;
|
||
|
int nf;
|
||
|
int nni;
|
||
|
int nnf;
|
||
|
int i;
|
||
|
int ki;
|
||
|
int kf;
|
||
|
int k;
|
||
|
int j;
|
||
|
int components;
|
||
|
int too_many_components = 0;
|
||
|
if (nrow == 0) {
|
||
|
return (0);
|
||
|
}
|
||
|
|
||
|
/* Reasonable guess for number of components */
|
||
|
int pieces = 10 + (nrow / 10);
|
||
|
list_ptr.resize(pieces);
|
||
|
std::vector<int> mask(nrow, 1);
|
||
|
root = 1;
|
||
|
|
||
|
components = 1;
|
||
|
nordered = 1;
|
||
|
list_ptr[components - 1] = nordered - 1;
|
||
|
list[nordered - 1] = root;
|
||
|
mask[root - 1] = 0;
|
||
|
ni = 1;
|
||
|
nf = 1;
|
||
|
while (nordered < nrow) {
|
||
|
if (nf == ni - 1) {
|
||
|
++components;
|
||
|
if (components < pieces) {
|
||
|
list_ptr[components - 1] = nordered;
|
||
|
}
|
||
|
else {
|
||
|
too_many_components = 1;
|
||
|
}
|
||
|
for (i = 0; i < nrow; i++) {
|
||
|
if (mask[i] == 1) {
|
||
|
++nordered;
|
||
|
list[nordered - 1] = i + 1;
|
||
|
mask[i] = 0;
|
||
|
nf = ni;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
nni = nf + 1;
|
||
|
nnf = nni - 1;
|
||
|
for (i = ni; i <= nf; i++) {
|
||
|
ki = rows[list[i - 1] - 1] - 1;
|
||
|
kf = rows[list[i - 1]] - 2;
|
||
|
for (k = ki; k <= kf; k++) {
|
||
|
j = columns[k];
|
||
|
if (mask[j - 1] == 1) {
|
||
|
++nnf;
|
||
|
++nordered;
|
||
|
mask[j - 1] = 0;
|
||
|
list[nordered - 1] = j;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
ni = nni;
|
||
|
nf = nnf;
|
||
|
}
|
||
|
if (too_many_components == 0) {
|
||
|
list_ptr[components] = nordered;
|
||
|
return (components);
|
||
|
}
|
||
|
|
||
|
/* Start over with list_ptr correctly allocated */
|
||
|
list_ptr.resize(components);
|
||
|
list_ptr.shrink_to_fit();
|
||
|
for (i = 0; i < nrow; i++) {
|
||
|
mask[i] = 1;
|
||
|
}
|
||
|
|
||
|
components = 1;
|
||
|
nordered = 1;
|
||
|
list_ptr[components - 1] = nordered - 1;
|
||
|
list[nordered - 1] = root;
|
||
|
mask[root - 1] = 0;
|
||
|
ni = 1;
|
||
|
nf = 1;
|
||
|
while (nordered < nrow) {
|
||
|
if (nf == ni - 1) {
|
||
|
++components;
|
||
|
list_ptr[components - 1] = nordered;
|
||
|
for (i = 0; i < nrow; i++) {
|
||
|
if (mask[i] == 1) {
|
||
|
++nordered;
|
||
|
list[nordered - 1] = i + 1;
|
||
|
mask[i] = 0;
|
||
|
nf = ni;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
nni = nf + 1;
|
||
|
nnf = nni - 1;
|
||
|
for (i = ni; i <= nf; i++) {
|
||
|
ki = rows[list[i - 1] - 1] - 1;
|
||
|
kf = rows[list[i - 1]] - 2;
|
||
|
for (k = ki; k <= kf; k++) {
|
||
|
j = columns[k];
|
||
|
if (mask[j - 1] == 1) {
|
||
|
++nnf;
|
||
|
++nordered;
|
||
|
mask[j - 1] = 0;
|
||
|
list[nordered - 1] = j;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
ni = nni;
|
||
|
nf = nnf;
|
||
|
}
|
||
|
list_ptr[components] = nordered;
|
||
|
return (components);
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
int ZPINCH_assign(Machine_Description *machine, /* Machine MESH = nwedge * nslice */
|
||
|
int ndot, /* Length of x, y, z, and part (== # of elements) */
|
||
|
const float *x, /* x-coordinates */
|
||
|
const float *y, /* y-coordinates */
|
||
|
const float *z, /* z-coordinates */
|
||
|
int *part /* Output: partition assignments for each element */
|
||
|
)
|
||
|
{
|
||
|
/* Routine to generate a partition of a cylinder.
|
||
|
* Assumptions:
|
||
|
* Height of cylinder is aligned with z-axis.
|
||
|
* Face of cylinder is centered at (x,y) = (0,0).
|
||
|
* The x,y-plane is partitioned into wedge-shaped slices
|
||
|
* (just like cutting a pie!).
|
||
|
* The z-directon is partitioned into cylindrical slices
|
||
|
* (just like cutting a hot dog!).
|
||
|
*
|
||
|
* Decomposition requested by Chris Garasi for Z-Pinch simulations,
|
||
|
* NNSA Level-1 milestone for Petaflop Computer justification.
|
||
|
* Decomposition implemented by Karen Devine, kddevin@sandia.gov.
|
||
|
* March 18, 2004
|
||
|
*/
|
||
|
|
||
|
double theta; /* angle of (x,y) (polar coordinates) */
|
||
|
|
||
|
if (ndot > 0 && (x == nullptr || y == nullptr || z == nullptr || part == nullptr)) {
|
||
|
fmt::print(stderr, "KDD -- Bad input to ZPINCH_assign.\n");
|
||
|
fmt::print(stderr, "KDD -- Contact Karen Devine, kddevin@sandia.gov.\n");
|
||
|
exit(-1);
|
||
|
}
|
||
|
|
||
|
if (machine->type != MESH) {
|
||
|
fmt::print(stderr, "KDD -- Machine must be a MESH "
|
||
|
"with # wedges * # slices processors.\n");
|
||
|
fmt::print(stderr, "KDD -- Use nem_slice argument -m mesh=AxB, \n");
|
||
|
fmt::print(stderr, "KDD -- where A = # wedges in x,y-plane and \n");
|
||
|
fmt::print(stderr, "KDD -- B = # slices along z-axis.\n");
|
||
|
exit(-1);
|
||
|
}
|
||
|
int nwedge = machine->dim[0];
|
||
|
int nslice = machine->dim[1];
|
||
|
fmt::print("ZPINCH: Computing\n"
|
||
|
" {} slices in the z-direction\n"
|
||
|
" {} wedges in the x,y-plane\n"
|
||
|
" {} partitions total\n",
|
||
|
nslice, nwedge, nslice * nwedge);
|
||
|
|
||
|
/* Compute the maximum values of z */
|
||
|
float zmin = FLT_MAX; /* Minimum and maximum z-coordinate values */
|
||
|
float zmax = -FLT_MAX;
|
||
|
for (int i = 0; i < ndot; i++) {
|
||
|
if (z[i] > zmax) {
|
||
|
zmax = z[i];
|
||
|
}
|
||
|
if (z[i] < zmin) {
|
||
|
zmin = z[i];
|
||
|
}
|
||
|
}
|
||
|
double dz = zmax - zmin;
|
||
|
|
||
|
/* Compute maximum z for each slice, using uniform partition of zmin - zmax */
|
||
|
std::vector<double> slice_max_z(nslice);
|
||
|
for (int i = 0; i < nslice; i++) {
|
||
|
slice_max_z[i] =
|
||
|
static_cast<double>(zmin) + static_cast<double>(i + 1) * dz / static_cast<double>(nslice);
|
||
|
}
|
||
|
|
||
|
/* Compute maximum angle for each wedge, using uniform partition of 2*M_PI */
|
||
|
std::vector<double> wedge_max_theta(nwedge);
|
||
|
for (int i = 0; i < nwedge; i++) {
|
||
|
wedge_max_theta[i] = static_cast<double>(i + 1) * (2. * M_PI) / static_cast<double>(nwedge);
|
||
|
}
|
||
|
|
||
|
/* Compute the partition assignment for each set of coordinates */
|
||
|
double epsilon = 1e-07; /* tolerance that allows a point to be in wedge */
|
||
|
for (int i = 0; i < ndot; i++) {
|
||
|
|
||
|
/* Compute the z slice that the element is in. */
|
||
|
int slice = 0;
|
||
|
if (dz > 0.) {
|
||
|
slice =
|
||
|
static_cast<int>(nslice * (static_cast<double>(z[i]) - static_cast<double>(zmin)) / dz);
|
||
|
if (slice == nslice) {
|
||
|
slice--; /* Handles z[i] == zmax correctly */
|
||
|
}
|
||
|
|
||
|
/* Move dots within epsilon of upper end of slice into next slice */
|
||
|
/* This step reduces jagged edges due to roundoff in coordinate values */
|
||
|
if (slice != nslice - 1 && static_cast<double>(z[i]) > (slice_max_z[slice] - epsilon)) {
|
||
|
slice++;
|
||
|
}
|
||
|
}
|
||
|
else { /* 2D problem */
|
||
|
slice = 0;
|
||
|
}
|
||
|
|
||
|
/* Compute polar coordinate theta in x,y-plane for the element. */
|
||
|
if (x[i] == 0.0F) {
|
||
|
if (y[i] >= 0.0F) {
|
||
|
theta = M_PI_2;
|
||
|
}
|
||
|
else {
|
||
|
theta = 3. * M_PI_2;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
theta = std::atan2(y[i], x[i]); /* In range -M_PI_2 to M_PI_2 */
|
||
|
|
||
|
/* Convert to range 0 to 2*M_PI */
|
||
|
if (x[i] < 0.0F) {
|
||
|
theta += M_PI;
|
||
|
}
|
||
|
else if (y[i] < 0.0F) {
|
||
|
theta += 2 * M_PI;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Compute the wedge that the element is in. */
|
||
|
int wedge = static_cast<int>(nwedge * theta / (2 * M_PI));
|
||
|
if (wedge == nwedge) {
|
||
|
wedge--; /* Handles theta == 2*M_PI correctly */
|
||
|
}
|
||
|
|
||
|
/* Move dots within epsilon of upper angle of wedge into next wedge */
|
||
|
/* This step reduces jagged edges due to roundoff in coordinate values */
|
||
|
if (theta > wedge_max_theta[wedge] - epsilon) {
|
||
|
wedge = (wedge + 1) % nwedge;
|
||
|
}
|
||
|
|
||
|
/* Compute the part that the element is in. */
|
||
|
part[i] = (slice * nwedge + wedge);
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
void BRICK_slices(int nslices_d, /* # of subdomains in this dimension */
|
||
|
int ndot, /* # of dots */
|
||
|
const float *d, /* Array of ndot coordinates in this dimension */
|
||
|
float *dmin, /* Output: Smallest value in d[] */
|
||
|
float *dmax, /* Output: Largest value in d[] */
|
||
|
double *delta, /* Output: dmax - dmin */
|
||
|
std::vector<double> &slices_d /* Output: maximum d for each slice in dimension
|
||
|
using uniform partition of dmax - dmin */
|
||
|
)
|
||
|
{
|
||
|
/* Compute the min, max, delta and slices values for a single dimension. */
|
||
|
/* KDDKDD Note: This routine could also be used by ZPINCH in the z-direction,
|
||
|
* KDDKDD Note: but I don't want to tamper with code that already works. */
|
||
|
int i;
|
||
|
|
||
|
*dmin = FLT_MAX; /* Minimum and maximum coordinate values */
|
||
|
*dmax = -FLT_MAX;
|
||
|
|
||
|
/* Compute the minimum and maximum coordinate values */
|
||
|
for (i = 0; i < ndot; i++) {
|
||
|
if (d[i] > *dmax) {
|
||
|
*dmax = d[i];
|
||
|
}
|
||
|
if (d[i] < *dmin) {
|
||
|
*dmin = d[i];
|
||
|
}
|
||
|
}
|
||
|
*delta = *dmax - *dmin;
|
||
|
|
||
|
/* Compute maximum coordinate value for each slice,
|
||
|
using uniform partition of dmax - dmin */
|
||
|
slices_d.reserve(nslices_d);
|
||
|
for (i = 0; i < nslices_d; i++) {
|
||
|
slices_d.push_back(static_cast<double>(*dmin) +
|
||
|
static_cast<double>(i + 1) * *delta / static_cast<double>(nslices_d));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
int BRICK_which_slice(int nslices_d, /* # of subdomains in this dimension */
|
||
|
float d, /* Coordinate of dot in this dimension */
|
||
|
float dmin, /* Smallest value in d[] */
|
||
|
double delta, /* dmax - dmin */
|
||
|
std::vector<double> &slices_d /* Maximum d for each slice in dimension using
|
||
|
uniform partition of dmax - dmin */
|
||
|
)
|
||
|
{
|
||
|
/* Function returning in which slice a coordinate d lies. */
|
||
|
/* KDDKDD Note: This routine could also be used by ZPINCH in the z-direction,
|
||
|
* KDDKDD Note: but I don't want to tamper with code that already works. */
|
||
|
|
||
|
int d_slice; /* Return value: The slice to which the coordinate is assigned */
|
||
|
if (delta > 0.) {
|
||
|
d_slice = static_cast<int>(nslices_d * static_cast<double>((d - dmin)) / delta);
|
||
|
if (d_slice == nslices_d) {
|
||
|
d_slice--; /* Handles d == dmax correctly */
|
||
|
}
|
||
|
|
||
|
/* Move dots within epsilon of upper end of slice into next slice */
|
||
|
/* This step reduces jagged edges due to roundoff in coordinate values */
|
||
|
double epsilon = 5e-06; /* tolerance that allows a point to be in subdomain*/
|
||
|
if (d_slice != nslices_d - 1 && static_cast<double>(d) > (slices_d[d_slice] - epsilon)) {
|
||
|
d_slice++;
|
||
|
}
|
||
|
}
|
||
|
else { /* not a 3D problem */
|
||
|
d_slice = 0;
|
||
|
}
|
||
|
|
||
|
return d_slice;
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
int BRICK_assign(Machine_Description *machine, /* Machine MESH = nx * ny * nz */
|
||
|
int ndot, /* Length of x, y, z, and part (== # of elements) */
|
||
|
const float *x, /* x-coordinates */
|
||
|
const float *y, /* y-coordinates */
|
||
|
const float *z, /* z-coordinates */
|
||
|
int *part /* Output: partition assignments for each element */
|
||
|
)
|
||
|
{
|
||
|
/* Routine to generate a partition of an axis-aligned hexahedral domain.
|
||
|
* Assumptions:
|
||
|
* Hexahedral domain is axis-aligned.
|
||
|
* For good balance, the number of elements in each dimension should
|
||
|
* be divisible by the number of processors used in that dimension.
|
||
|
* Each dimension is partitioned into equally sized regions, generating
|
||
|
* subdomains that are axis-aligned hexahedra.
|
||
|
*
|
||
|
* Decomposition requested by Tom Brunner and Chris Garasi for
|
||
|
* NNSA Level-1 milestone for Petaflop Computer justification.
|
||
|
* Decomposition implemented by Karen Devine, kddevin@sandia.gov.
|
||
|
* June 10, 2005
|
||
|
*/
|
||
|
|
||
|
if (ndot > 0 && (x == nullptr || y == nullptr || z == nullptr || part == nullptr)) {
|
||
|
fmt::print(stderr, "KDD -- Bad input to BRICK_assign.\n");
|
||
|
fmt::print(stderr, "KDD -- Contact Karen Devine, kddevin@sandia.gov.\n");
|
||
|
exit(-1);
|
||
|
}
|
||
|
|
||
|
if (machine->type != MESH) {
|
||
|
fmt::print(stderr, "KDD -- Machine must be a MESH "
|
||
|
"with nx * ny * nz processors.\n");
|
||
|
fmt::print(stderr, "KDD -- Use nem_slice argument -m mesh=AxBxC, \n");
|
||
|
fmt::print(stderr, "KDD -- where A = nx, B = ny, and "
|
||
|
"C = nz\n");
|
||
|
exit(-1);
|
||
|
}
|
||
|
int nx = machine->dim[0];
|
||
|
int ny = machine->dim[1];
|
||
|
int nz = machine->dim[2];
|
||
|
|
||
|
fmt::print("BRICK: Computing\n"
|
||
|
" {} subdomains in the x direction\n"
|
||
|
" {} subdomains in the y direction\n"
|
||
|
" {} subdomains in the z direction\n"
|
||
|
" {} partitions total\n",
|
||
|
nx, ny, nz, nx * ny * nz);
|
||
|
|
||
|
float xmin;
|
||
|
float xmax; /* Minimum and maximum x-coordinate values */
|
||
|
float ymin;
|
||
|
float ymax; /* Minimum and maximum y-coordinate values */
|
||
|
float zmin;
|
||
|
float zmax; /* Minimum and maximum z-coordinate values */
|
||
|
double dx; /* xmax - xmin */
|
||
|
double dy; /* ymax - ymin */
|
||
|
double dz; /* zmax - zmin */
|
||
|
std::vector<double> slices_x; /* max x value in each slice. */
|
||
|
std::vector<double> slices_y; /* max y value in each slice. */
|
||
|
std::vector<double> slices_z; /* max z value in each slice. */
|
||
|
|
||
|
/* Compute bounds of subdomains in each dimension */
|
||
|
BRICK_slices(nx, ndot, x, &xmin, &xmax, &dx, slices_x);
|
||
|
BRICK_slices(ny, ndot, y, &ymin, &ymax, &dy, slices_y);
|
||
|
BRICK_slices(nz, ndot, z, &zmin, &zmax, &dz, slices_z);
|
||
|
|
||
|
/* Compute the partition assignment for each set of coordinates */
|
||
|
for (int i = 0; i < ndot; i++) {
|
||
|
|
||
|
/* Compute the slice in each dimension that the element is in */
|
||
|
int x_slice = BRICK_which_slice(nx, x[i], xmin, dx, slices_x);
|
||
|
int y_slice = BRICK_which_slice(ny, y[i], ymin, dy, slices_y);
|
||
|
int z_slice = BRICK_which_slice(nz, z[i], zmin, dz, slices_z);
|
||
|
|
||
|
/* Compute the part that the element is in. */
|
||
|
part[i] = (z_slice * (nx * ny) + y_slice * nx + x_slice);
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
#ifdef USE_ZOLTAN
|
||
|
/*****************************************************************************/
|
||
|
/***** Global data structure used by Zoltan callbacks. *****/
|
||
|
/***** Could implement Zoltan callbacks without global data structure, *****/
|
||
|
/***** but using the global data structure makes implementation quick. *****/
|
||
|
|
||
|
struct
|
||
|
{
|
||
|
size_t ndot; /* Length of x, y, z, and part (== # of elements) */
|
||
|
int *vwgt; /* vertex weights */
|
||
|
float *x; /* x-coordinates */
|
||
|
float *y; /* y-coordinates */
|
||
|
float *z; /* z-coordinates */
|
||
|
} Zoltan_Data;
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
/***** ZOLTAN CALLBACK FUNCTIONS *****/
|
||
|
|
||
|
int zoltan_num_dim(void * /*data*/, int *ierr)
|
||
|
{
|
||
|
/* Return dimensionality of coordinate data.
|
||
|
* Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign.
|
||
|
*/
|
||
|
*ierr = ZOLTAN_OK;
|
||
|
if (Zoltan_Data.z != nullptr) {
|
||
|
return 3;
|
||
|
}
|
||
|
if (Zoltan_Data.y != nullptr) {
|
||
|
return 2;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
int zoltan_num_obj(void * /*data*/, int *ierr)
|
||
|
{
|
||
|
/* Return number of objects.
|
||
|
* Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign.
|
||
|
*/
|
||
|
*ierr = ZOLTAN_OK;
|
||
|
return Zoltan_Data.ndot;
|
||
|
}
|
||
|
|
||
|
void zoltan_obj_list(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, ZOLTAN_ID_PTR gids,
|
||
|
ZOLTAN_ID_PTR /*lids*/, int wdim, float *wgts, int *ierr)
|
||
|
{
|
||
|
/* Return list of object IDs.
|
||
|
* Return only global IDs; don't need local IDs since running in serial.
|
||
|
* gids are array indices for coordinate and vwgts arrays.
|
||
|
* Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign.
|
||
|
*/
|
||
|
for (size_t i = 0; i < Zoltan_Data.ndot; i++) {
|
||
|
gids[i] = i;
|
||
|
if (wdim != 0) {
|
||
|
wgts[i] = static_cast<float>(Zoltan_Data.vwgt[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*ierr = ZOLTAN_OK;
|
||
|
}
|
||
|
|
||
|
void zoltan_geom(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, int nobj,
|
||
|
const ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR /*lids*/, int ndim, double *geom,
|
||
|
int *ierr)
|
||
|
{
|
||
|
/* Return coordinates for objects.
|
||
|
* gids are array indices for coordinate arrays.
|
||
|
* Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign.
|
||
|
*/
|
||
|
|
||
|
for (int i = 0; i < nobj; i++) {
|
||
|
size_t j = gids[i];
|
||
|
geom[i * ndim] = Zoltan_Data.x[j];
|
||
|
if (ndim > 1) {
|
||
|
geom[i * ndim + 1] = Zoltan_Data.y[j];
|
||
|
}
|
||
|
if (ndim > 2) {
|
||
|
geom[i * ndim + 2] = Zoltan_Data.z[j];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*ierr = ZOLTAN_OK;
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
int ZOLTAN_assign(const char *method, /* Zoltan LB_METHOD to use. */
|
||
|
int totalproc, /* # of processors for which to partition */
|
||
|
size_t ndot, /* Length of x, y, z, and part (== # of elements) */
|
||
|
int *vwgt, /* vertex weights */
|
||
|
float *x, /* x-coordinates */
|
||
|
float *y, /* y-coordinates */
|
||
|
float *z, /* z-coordinates */
|
||
|
int ignore_z, /* flag indicating whether to use z-coords*/
|
||
|
int *part, /* Output: partition assignments for each element */
|
||
|
int argc, /* Fields needed by MPI_Init */
|
||
|
char *argv[] /* Fields needed by MPI_Init */
|
||
|
)
|
||
|
{
|
||
|
/* Function to allow Zoltan to compute decomposition using RCB.
|
||
|
* Assuming running Zoltan in serial (as nem_slice is serial).
|
||
|
* Return PARTITION_ASSIGNMENTS from Zoltan_LB_Partition; they should
|
||
|
* match what is needed in part array above.
|
||
|
*/
|
||
|
|
||
|
/* Copy mesh data and pointers into structure accessible from callback fns. */
|
||
|
Zoltan_Data.ndot = ndot;
|
||
|
Zoltan_Data.vwgt = vwgt;
|
||
|
Zoltan_Data.x = x;
|
||
|
Zoltan_Data.y = y;
|
||
|
Zoltan_Data.z = (ignore_z != 0 ? nullptr : z);
|
||
|
|
||
|
/* Initialize Zoltan */
|
||
|
float ver;
|
||
|
int ierr = Zoltan_Initialize(argc, argv, &ver);
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Initialize ({}:{})\n", __FILE__, __LINE__);
|
||
|
exit(-1);
|
||
|
}
|
||
|
struct Zoltan_Struct *zz = Zoltan_Create(MPI_COMM_WORLD);
|
||
|
|
||
|
/* Register Callback functions */
|
||
|
/* Using global Zoltan_Data; could register it here instead as data field. */
|
||
|
ierr = Zoltan_Set_Fn(zz, ZOLTAN_NUM_GEOM_FN_TYPE,
|
||
|
reinterpret_cast<ZOLTAN_VOID_FN *>(zoltan_num_dim), nullptr);
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Fn ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
ierr = Zoltan_Set_Fn(zz, ZOLTAN_NUM_OBJ_FN_TYPE,
|
||
|
reinterpret_cast<ZOLTAN_VOID_FN *>(zoltan_num_obj), nullptr);
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Fn ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
ierr = Zoltan_Set_Fn(zz, ZOLTAN_OBJ_LIST_FN_TYPE,
|
||
|
reinterpret_cast<ZOLTAN_VOID_FN *>(zoltan_obj_list), nullptr);
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Fn ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
ierr = Zoltan_Set_Fn(zz, ZOLTAN_GEOM_MULTI_FN_TYPE,
|
||
|
reinterpret_cast<ZOLTAN_VOID_FN *>(zoltan_geom), nullptr);
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Fn ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
|
||
|
/* Set parameters for Zoltan */
|
||
|
{
|
||
|
std::string str = fmt::format("{}", totalproc);
|
||
|
ierr = Zoltan_Set_Param(zz, "NUM_GLOBAL_PARTITIONS", str.c_str());
|
||
|
}
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Param ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
ierr = Zoltan_Set_Param(zz, "NUM_LID_ENTRIES", "0");
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Param ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
ierr = Zoltan_Set_Param(zz, "LB_METHOD", method);
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Param ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
ierr = Zoltan_Set_Param(zz, "REMAP", "0");
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Param ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
ierr = Zoltan_Set_Param(zz, "RETURN_LISTS", "PARTITION_ASSIGNMENTS");
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Param ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
if (vwgt != nullptr) {
|
||
|
ierr = Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "1");
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Param ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
}
|
||
|
if (ignore_z != 0) {
|
||
|
ierr = Zoltan_Set_Param(zz, "RCB_RECTILINEAR_BLOCKS", "1");
|
||
|
if (ierr == ZOLTAN_FATAL) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_Set_Param ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Call partitioner */
|
||
|
fmt::print("Using Zoltan version {}, method {}\n", static_cast<double>(ver), method);
|
||
|
ZOLTAN_ID_PTR dummy1;
|
||
|
ZOLTAN_ID_PTR dummy2; /* Empty output from Zoltan_LB_Partition */
|
||
|
int dummy0;
|
||
|
int *dummy3;
|
||
|
int *dummy4; /* Empty output from Zoltan_LB_Partition */
|
||
|
int zngid_ent;
|
||
|
int znlid_ent; /* Useful output from Zoltan_LB_Partition */
|
||
|
int znobj;
|
||
|
ZOLTAN_ID_PTR zgids;
|
||
|
ZOLTAN_ID_PTR zlids; /* Useful output from Zoltan_LB_Partition */
|
||
|
int *zprocs, *zparts; /* Useful output from Zoltan_LB_Partition */
|
||
|
int changes;
|
||
|
ierr = Zoltan_LB_Partition(zz, &changes, &zngid_ent, &znlid_ent, &dummy0, &dummy1, &dummy2,
|
||
|
&dummy3, &dummy4, &znobj, &zgids, &zlids, &zprocs, &zparts);
|
||
|
if (ierr != 0) {
|
||
|
fmt::print(stderr, "Error returned from Zoltan_LB_Partition ({}:{})\n", __FILE__, __LINE__);
|
||
|
goto End;
|
||
|
}
|
||
|
|
||
|
/* Sanity check */
|
||
|
if (ndot != static_cast<size_t>(znobj)) {
|
||
|
fmt::print(stderr, "Sanity check failed; ndot {} != znobj {}.\n", ndot,
|
||
|
static_cast<size_t>(znobj));
|
||
|
goto End;
|
||
|
}
|
||
|
|
||
|
/* Convert data types from int to int. */
|
||
|
for (size_t i = 0; i < ndot; i++) {
|
||
|
part[zgids[i]] = zparts[i];
|
||
|
}
|
||
|
|
||
|
End:
|
||
|
/* Clean up */
|
||
|
Zoltan_LB_Free_Part(&zgids, &zlids, &zprocs, &zparts);
|
||
|
Zoltan_Destroy(&zz);
|
||
|
if (ierr != 0) {
|
||
|
MPI_Finalize();
|
||
|
exit(-1);
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*****************************************************************************/
|
||
|
void BALANCE_STATS(Machine_Description *machine, /* Machine MESH = nwedge * nslice */
|
||
|
const int *wgt, /* element weights; can be nullptr if no weights */
|
||
|
size_t ndot, /* Length of x, y, z, and part (== # of elements) */
|
||
|
int *part /* Partition assignments for each element */
|
||
|
)
|
||
|
{
|
||
|
/* Routine to print some info about the ZPINCH, BRICK or ZOLTAN_RCB
|
||
|
decompositions */
|
||
|
std::vector<int> cntwgt;
|
||
|
|
||
|
int minwgt = INT_MAX;
|
||
|
int maxwgt = 0;
|
||
|
int sumwgt = 0;
|
||
|
|
||
|
int npart = machine->dim[0] * machine->dim[1] * machine->dim[2];
|
||
|
std::vector<int> cnts(npart);
|
||
|
if (wgt != nullptr) {
|
||
|
cntwgt.resize(npart);
|
||
|
}
|
||
|
for (size_t i = 0; i < ndot; i++) {
|
||
|
cnts[part[i]]++;
|
||
|
if (wgt != nullptr) {
|
||
|
cntwgt[part[i]] += wgt[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
size_t sum = 0;
|
||
|
int max = 0;
|
||
|
int min = ndot;
|
||
|
for (int i = 0; i < npart; i++) {
|
||
|
if (cnts[i] > max) {
|
||
|
max = cnts[i];
|
||
|
}
|
||
|
if (cnts[i] < min) {
|
||
|
min = cnts[i];
|
||
|
}
|
||
|
sum += cnts[i];
|
||
|
if (wgt != nullptr) {
|
||
|
if (cntwgt[i] > maxwgt) {
|
||
|
maxwgt = cntwgt[i];
|
||
|
}
|
||
|
if (cntwgt[i] < minwgt) {
|
||
|
minwgt = cntwgt[i];
|
||
|
}
|
||
|
sumwgt += cntwgt[i];
|
||
|
}
|
||
|
if (cnts[i] == 0) {
|
||
|
fmt::print("ZERO on {}\n", i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fmt::print("CNT STATS: min = {} max = {} avg = {}\n", min, max,
|
||
|
static_cast<double>(sum) / static_cast<double>(npart));
|
||
|
if (wgt != nullptr) {
|
||
|
fmt::print("WGT STATS: min = {} max = {} avg = {}\n", minwgt, maxwgt,
|
||
|
static_cast<double>(sumwgt) / static_cast<double>(npart));
|
||
|
}
|
||
|
}
|
||
|
} // namespace
|