// Copyright(C) 1999-2023 National Technology & Engineering Solutions // of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with // NTESS, the U.S. Government retains certain rights in this software. // // See packages/seacas/LICENSE for details #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if USE_METIS #include #else using idx_t = int; #endif #include #ifdef SEACAS_HAVE_MPI #include #endif // ======================================================================== // TODO(gdsjaar): // * Sideset distribution factors // * Variables // * All entity types // * More efficient border-node-processor communication map. // ======================================================================== extern double seacas_timer(); int debug_level = 0; // size_t partial_count = 1'00'000; size_t partial_count = 1'000'000'000; namespace { void progress(const std::string &output) { static auto start = std::chrono::steady_clock::now(); if ((debug_level & 1) != 0) { auto now = std::chrono::steady_clock::now(); std::chrono::duration diff = now - start; fmt::print(stderr, " [{:.2f} - {}]\t{}\n", diff.count(), fmt::group_digits(Ioss::MemoryUtils::get_memory_info()), output); } } void proc_progress(int p, int proc_count) { if (((debug_level & 8) != 0) && ((proc_count <= 20) || ((p + 1) % (proc_count / 20) == 0))) { progress("\t\tProcessor " + std::to_string(p + 1)); } } // Add the chain maps for file-per-rank output... template void output_chain_maps(std::vector &proc_region, const Ioss::chain_t &chains, const std::vector &elem_to_proc, size_t proc_begin, size_t proc_size, INT /* dummy */) { progress(__func__); size_t block_count = proc_region[0]->get_property("element_block_count").get_int(); size_t offset = 0; for (size_t b = 0; b < block_count; b++) { if (debug_level & 4) { progress("\tBlock " + std::to_string(b + 1)); } size_t proc_count = proc_region.size(); std::vector> map(proc_count); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { const auto &proc_ebs = proc_region[p]->get_element_blocks(); size_t proc_element_count = proc_ebs[b]->entity_count(); map[p].reserve(proc_element_count * 2); } size_t global_element_count = elem_to_proc.size(); for (size_t j = 0; j < global_element_count; j++) { size_t p = elem_to_proc[offset + j]; if (p >= proc_begin && p < proc_begin + proc_size) { auto &chain_entry = chains[j + offset]; // TODO: Map this from global to local element number... size_t loc_elem = proc_region[p]->get_database()->element_global_to_local(chain_entry.element); map[p].push_back(loc_elem); map[p].push_back(chain_entry.link); } } offset += global_element_count; for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { const auto &proc_ebs = proc_region[p]->get_element_blocks(); proc_ebs[b]->put_field_data("chain", map[p]); map[p].clear(); proc_progress(p, proc_count); } } } void add_chain_maps(Ioss::Region ®ion) { ex_put_map_param(region.get_database()->get_file_pointer(), 0, 2); ex_put_name(region.get_database()->get_file_pointer(), EX_ELEM_MAP, 1, "chain:root_element_id"); ex_put_name(region.get_database()->get_file_pointer(), EX_ELEM_MAP, 2, "chain:depth_from_root"); // The chain / line data will be stored as an element map... const auto &blocks = region.get_element_blocks(); for (const auto &block : blocks) { auto field = Ioss::Field("chain", region.field_int_type(), "Real[2]", Ioss::Field::MAP).set_index(1); block->field_add(field); } } void add_decomp_map(Ioss::Region ®ion, const std::string &decomp_variable_name, bool add_chain_info) { ex_opts(EX_VERBOSE); if (add_chain_info) { ex_put_map_param(region.get_database()->get_file_pointer(), 0, 3); ex_put_name(region.get_database()->get_file_pointer(), EX_ELEM_MAP, 1, decomp_variable_name.c_str()); ex_put_name(region.get_database()->get_file_pointer(), EX_ELEM_MAP, 2, "chain:root_element_id"); ex_put_name(region.get_database()->get_file_pointer(), EX_ELEM_MAP, 3, "chain:depth_from_root"); } else { ex_put_map_param(region.get_database()->get_file_pointer(), 0, 1); ex_put_name(region.get_database()->get_file_pointer(), EX_ELEM_MAP, 1, decomp_variable_name.c_str()); } // The chain / line data will be stored as an element map... const auto &blocks = region.get_element_blocks(); for (const auto &block : blocks) { auto field = Ioss::Field(decomp_variable_name, Ioss::Field::INT32, IOSS_SCALAR(), Ioss::Field::MAP) .set_index(1); block->field_add(field); if (add_chain_info) { auto ch_field = Ioss::Field("chain", region.field_int_type(), "Real[2]", Ioss::Field::MAP).set_index(2); block->field_add(ch_field); } } } template void output_decomp_map(Ioss::Region ®ion, const std::vector &elem_to_proc, const Ioss::chain_t &chains, const std::string &decomp_variable_name, bool add_chain_info) { const auto &blocks = region.get_element_blocks(); size_t offset = 0; for (const auto &block : blocks) { size_t num_elem = block->entity_count(); block->put_field_data(decomp_variable_name, (void *)&elem_to_proc[offset], -1); if (add_chain_info) { std::vector chain; chain.reserve(num_elem * 2); for (size_t i = 0; i < num_elem; i++) { auto &chain_entry = chains[i + offset]; chain.push_back(chain_entry.element); chain.push_back(chain_entry.link); } block->put_field_data("chain", chain); } offset += num_elem; } } void add_decomp_field(Ioss::Region ®ion, const std::string &decomp_variable_name, bool add_chain_info) { region.begin_mode(Ioss::STATE_DEFINE_TRANSIENT); const auto &blocks = region.get_element_blocks(); for (const auto &block : blocks) { block->field_add(Ioss::Field(decomp_variable_name, region.field_int_type(), IOSS_SCALAR(), Ioss::Field::TRANSIENT)); if (add_chain_info) { block->field_add( Ioss::Field("chain", region.field_int_type(), "Real[2]", Ioss::Field::TRANSIENT)); } } region.end_mode(Ioss::STATE_DEFINE_TRANSIENT); } template void output_decomp_field(Ioss::Region ®ion, const std::vector &elem_to_proc, const Ioss::chain_t &chains, const std::string &decomp_variable_name, bool add_chain_info) { region.begin_mode(Ioss::STATE_TRANSIENT); auto step = region.add_state(0.0); region.begin_state(step); output_decomp_map(region, elem_to_proc, chains, decomp_variable_name, add_chain_info); region.end_state(step); region.end_mode(Ioss::STATE_TRANSIENT); } template void line_decomp_modify(const Ioss::chain_t &element_chains, const std::vector &elem_to_proc, int proc_count, INT dummy); int case_compare(const char *s1, const char *s2) { const char *c1 = s1; const char *c2 = s2; for (;;) { if (::toupper(*c1) != ::toupper(*c2)) { return (::toupper(*c1) - ::toupper(*c2)); } if (*c1 == '\0') { return 0; } c1++; c2++; } } void exodus_error(int lineno) { std::ostringstream errmsg; fmt::print( errmsg, "Exodus error ({}) {} at line {} in file Slice.C. Please report to gdsjaar@sandia.gov " "if you need help.", exerrval, ex_strerror(exerrval), lineno); ex_err(nullptr, nullptr, EX_PRTLASTMSG); throw std::runtime_error(errmsg.str()); } template void populate_proc_node(size_t count, size_t offset, size_t element_nodes, const std::vector &elem_to_proc, const std::vector &glob_conn, std::vector> &proc_node, std::vector &on_proc_count) { // Determine which processor(s) each node is present on. // Also count number of nodes on each processor. size_t el = 0; for (size_t j = 0; j < count; j++) { auto p = elem_to_proc[offset + j]; for (size_t k = 0; k < element_nodes; k++) { INT node = glob_conn[el++] - 1; bool exists = std::find(std::begin(proc_node[node]), std::end(proc_node[node]), p) != std::end(proc_node[node]); if (!exists) { proc_node[node].push_back(p); on_proc_count[p]++; } } } } void filename_substitution(std::string &filename, const SystemInterface &interFace); template void slice(Ioss::Region ®ion, const std::string &nemfile, SystemInterface &interFace, INT dummy); template bool is_sequential(const std::vector &map) { progress(__func__); for (size_t i = 0; i < map.size(); i++) { if (map[i] != i + 1) { return false; } } return true; } #if USE_METIS int get_common_node_count(const Ioss::Region ®ion) { progress(__func__); // Determine number of nodes that elements must share to be // considered connected. A 8-node hex-only mesh would have 4 // A 3D shell mesh should have 2. Basically, use the minimum // number of nodes per side for all element blocks... Omit sphere // elements; ignore bars(?)... int common_nodes = 999; const auto &ebs = region.get_element_blocks(); for (const auto &eb : ebs) { const Ioss::ElementTopology *topology = eb->topology(); const Ioss::ElementTopology *boundary = topology->boundary_type(0); if (boundary != nullptr) { common_nodes = std::min(common_nodes, boundary->number_boundaries()); } else { // Different topologies on some element faces... size_t nb = topology->number_boundaries(); for (size_t bb = 1; bb <= nb; bb++) { boundary = topology->boundary_type(bb); if (boundary != nullptr) { common_nodes = std::min(common_nodes, boundary->number_boundaries()); } } } } common_nodes = std::max(1, common_nodes); fmt::print(stderr, "Setting common_nodes to {}\n", common_nodes); return common_nodes; } #endif } // namespace // ======================================================================== int main(int argc, char *argv[]) { #ifdef SEACAS_HAVE_MPI MPI_Init(&argc, &argv); #endif double begin = seacas_timer(); Ioss::Init::Initializer io; SystemInterface::show_version(); SystemInterface interFace; bool ok = interFace.parse_options(argc, argv); if (!ok) { fmt::print(stderr, "\nERROR: Problem parsing command line options.\n\n"); exit(EXIT_FAILURE); } std::string nem_file = interFace.nemesisFile_; std::string path = interFace.output_path(); if (!path.empty()) { filename_substitution(path, interFace); // See if specified path exists. Ioss::FileInfo output_path(path); if (!output_path.exists()) { // Try to create the directory... Ioss::FileInfo::create_path(path); } else if (!output_path.is_dir()) { fmt::print(stderr, "ERROR: Path '{}' is not a directory.\n", path); exit(EXIT_FAILURE); } // See if the nem_file already has a path prepended to the // filename and if so, extract the basename. Ioss::FileInfo nemesis(nem_file); std::string sep = "/"; if (path[path.length() - 1] == '/') { sep = ""; } nem_file = path + sep + nemesis.tailname(); } if (interFace.outputDecompMap_ || interFace.outputDecompField_) { // Then not creating split files, just adding map or field to a single output file // Need to check that not overwriting input file... if (interFace.inputFile_ == nem_file) { nem_file += "-decomp"; } } fmt::print(stderr, "\nInput: '{}'\n", interFace.inputFile_); fmt::print(stderr, "Output: '{}'\n", nem_file); debug_level = interFace.debug(); partial_count = interFace.partial(); //======================================================================== // INPUT ... // NOTE: The "READ_RESTART" mode ensures that the node and element ids will be mapped. //======================================================================== Ioss::DatabaseIO *dbi = Ioss::IOFactory::create(interFace.inputFormat_, interFace.inputFile_, Ioss::READ_RESTART, Ioss::ParallelUtils::comm_world()); if (dbi == nullptr || !dbi->ok(true)) { std::exit(EXIT_FAILURE); } if (interFace.ints64Bit_) { dbi->set_int_byte_size_api(Ioss::USE_INT64_API); } dbi->set_surface_split_type(Ioss::SPLIT_BY_DONT_SPLIT); dbi->set_field_separator(0); // NOTE: 'region' owns 'db' pointer at this time... Ioss::Region region(dbi, "region_1"); region.output_summary(std::cerr, true); try { if (dbi->int_byte_size_api() == 4) { progress("4-byte slice"); slice(region, nem_file, interFace, 1); } else { progress("8-byte slice"); slice(region, nem_file, interFace, static_cast(1)); } } catch (std::exception &e) { fmt::print(stderr, "\n{}\n\nSlice terminated due to exception\n", e.what()); exit(EXIT_FAILURE); } #ifdef SEACAS_HAVE_MPI MPI_Finalize(); #endif fmt::print(stderr, "\nHigh-Water Memory Use: {} bytes\n", fmt::group_digits(Ioss::MemoryUtils::get_hwm_memory_info())); fmt::print(stderr, "Total execution time = {:.5}\n", seacas_timer() - begin); fmt::print(stderr, "\nSlice execution successful.\n"); return EXIT_SUCCESS; } namespace { template void create_adjacency_list(const Ioss::Region ®ion, std::vector &pointer, std::vector &adjacency, INT) { progress(__func__); // Size of pointer list is element count + 1; // Size of adjacency list is sum of nodes-per-element for each element. size_t sum = 0; size_t count = 0; const auto &ebs = region.get_element_blocks(); for (const auto &eb : ebs) { size_t element_count = eb->entity_count(); size_t element_nodes = eb->topology()->number_nodes(); sum += element_count * element_nodes; count += element_count; } pointer.reserve(count + 1); adjacency.reserve(sum); fmt::print(stderr, "\tAdjacency Size = {} for {} elements.\n", fmt::group_digits(sum), fmt::group_digits(count)); // Now, iterate the blocks again, get connectivity and build adjacency structure. std::vector connectivity; for (const auto &eb : ebs) { eb->get_field_data("connectivity_raw", connectivity); size_t element_count = eb->entity_count(); size_t element_nodes = eb->topology()->number_nodes(); size_t el = 0; for (size_t j = 0; j < element_count; j++) { pointer.push_back(adjacency.size()); for (size_t k = 0; k < element_nodes; k++) { INT node = connectivity[el++] - 1; adjacency.push_back(node); } } } pointer.push_back(adjacency.size()); assert(pointer.size() == count + 1); assert(adjacency.size() == sum); } template void decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, std::vector &elem_to_proc, IOSS_MAYBE_UNUSED INT dummy) { progress(__func__); // Populate the 'elem_to_proc' vector with a mapping from element to processor. size_t element_count = region.get_property("element_count").get_int(); size_t elem_per_proc = element_count / interFace.processor_count(); size_t extra = element_count % interFace.processor_count(); elem_to_proc.reserve(element_count); fmt::print(stderr, "\nDecomposing {} elements across {} processors using method '{}'.\n", fmt::group_digits(element_count), fmt::group_digits(interFace.processor_count()), interFace.decomposition_method()); if (interFace.lineDecomp_) { fmt::print(stderr, "\tDecomposition will be modified to put element lines/chains/columns on " "same processor rank\n"); } if (interFace.outputDecompMap_) { fmt::print(stderr, "\tDecomposition will be output to an element map named '{}'.\n", interFace.decomposition_variable()); } if (interFace.outputDecompField_) { fmt::print(stderr, "\tDecomposition will be output to an element field named '{}'.\n", interFace.decomposition_variable()); } fmt::print(stderr, "\n"); if (interFace.decomposition_method() == "linear") { size_t elem_beg = 0; for (size_t proc = 0; proc < interFace.processor_count(); proc++) { size_t add = (proc < extra) ? 1 : 0; size_t elem_end = elem_beg + elem_per_proc + add; for (size_t elem = elem_beg; elem < elem_end; elem++) { elem_to_proc.push_back(proc); } elem_beg = elem_end; } } else if (interFace.decomposition_method() == "scattered") { // Scattered... size_t proc = 0; for (size_t elem = 0; elem < element_count; elem++) { elem_to_proc.push_back(proc++); if (proc >= interFace.processor_count()) { proc = 0; } } } else if (interFace.decomposition_method() == "rb" || interFace.decomposition_method() == "kway") { #if USE_METIS std::vector pointer; std::vector adjacency; double start = seacas_timer(); create_adjacency_list(region, pointer, adjacency, dummy); double end = seacas_timer(); fmt::print(stderr, "\tCreate Adjacency List = {:.5}\n", end - start); // Call Metis to get the partition... { start = seacas_timer(); idx_t elem_count = element_count; idx_t common = get_common_node_count(region); idx_t proc_count = interFace.processor_count(); idx_t obj_val = 0; std::vector options((METIS_NOPTIONS)); METIS_SetDefaultOptions(&options[0]); if (interFace.decomposition_method() == "kway") { options[METIS_OPTION_PTYPE] = METIS_PTYPE_KWAY; } else { options[METIS_OPTION_PTYPE] = METIS_PTYPE_RB; } options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; if (interFace.contiguous_decomposition()) { options[METIS_OPTION_CONTIG] = 1; } options[METIS_OPTION_DBGLVL] = 2; options[METIS_OPTION_MINCONN] = 1; idx_t node_count = region.get_property("node_count").get_int(); std::vector node_partition(node_count); std::vector elem_partition(element_count); fmt::print(stderr, "\tCalling METIS Decomposition routine.\n"); METIS_PartMeshDual(&elem_count, &node_count, &pointer[0], &adjacency[0], nullptr, nullptr, &common, &proc_count, nullptr, &options[0], &obj_val, &elem_partition[0], &node_partition[0]); Ioss::Utils::clear(node_partition); elem_to_proc.reserve(element_count); std::copy(elem_partition.begin(), elem_partition.end(), std::back_inserter(elem_to_proc)); end = seacas_timer(); fmt::print(stderr, "\tMETIS Partition = {:.5}\n", end - start); fmt::print(stderr, "Objective value = {}\n", obj_val); // TODO Check Error... } #else fmt::print(stderr, "ERROR: Metis library not enabled in this version of slice.\n" " The 'rb' and 'kway' methods are not available.\n\n"); std::exit(1); #endif } else if (interFace.decomposition_method() == "random") { // Random... Use scattered method and then random_shuffle() the vector. // Ensures that each processor has correct number of elements, but // they are randomly distributed. size_t proc = 0; for (size_t elem = 0; elem < element_count; elem++) { elem_to_proc.push_back(proc++); if (proc >= interFace.processor_count()) { proc = 0; } } std::random_device rd; std::mt19937 g(rd()); std::shuffle(elem_to_proc.begin(), elem_to_proc.end(), g); } else if (interFace.decomposition_method() == "variable") { const std::string &elem_variable = interFace.decomposition_variable(); if (elem_variable.empty()) { fmt::print(stderr, "\nERROR: No element decomposition variable specified.\n"); exit(EXIT_FAILURE); } // Get all element blocks and cycle through each reading the // values for the processor... const auto &blocks = region.get_element_blocks(); auto c_region = (Ioss::Region *)(®ion); c_region->begin_state(1); for (const auto &block : blocks) { if (!block->field_exists(elem_variable)) { fmt::print(stderr, "\nERROR: Element variable '{}' does not exist on block {}.\n", elem_variable, block->name()); exit(EXIT_FAILURE); } std::vector tmp_vals; block->get_field_data(elem_variable, tmp_vals); auto block_count = block->entity_count(); for (int64_t i = 0; i < block_count; i++) { elem_to_proc.push_back((int)tmp_vals[i]); } } } else if (interFace.decomposition_method() == "map") { std::string map_name = interFace.decomposition_variable(); if (map_name.empty()) { fmt::print(stderr, "\nERROR: No element decomposition map specified.\n"); exit(EXIT_FAILURE); } // If the "map_name" string contains a comma, then the value // following the comma is either an integer "scale" which is // divided into each entry in `elem_to_proc`, or it is the // string "auto" which will automatically scale all values by // the *integer* "max/processorCount" // // NOTE: integer division with *no* rounding is used. int iscale = 1; auto pos = map_name.find(","); if (pos != std::string::npos) { // Extract the string following the comma... auto scale = map_name.substr(pos + 1); if (scale == "AUTO" || scale == "auto") { iscale = 0; } else { iscale = std::stoi(scale); } } map_name = map_name.substr(0, pos); Ioss::DatabaseIO *db = region.get_database(); int exoid = db->get_file_pointer(); bool map_read = false; int map_count = ex_inquire_int(exoid, EX_INQ_ELEM_MAP); if (map_count > 0) { int max_name_length = ex_inquire_int(exoid, EX_INQ_DB_MAX_USED_NAME_LENGTH); max_name_length = max_name_length < 32 ? 32 : max_name_length; char **names = Ioss::Utils::get_name_array(map_count, max_name_length); int error = ex_get_names(exoid, EX_ELEM_MAP, names); if (error < 0) { exodus_error(__LINE__); } for (int i = 0; i < map_count; i++) { if (case_compare(names[i], map_name.c_str()) == 0) { elem_to_proc.resize(element_count); error = ex_get_num_map(exoid, EX_ELEM_MAP, i + 1, elem_to_proc.data()); if (error < 0) { exodus_error(__LINE__); } map_read = true; break; } } Ioss::Utils::delete_name_array(names, map_count); } if (!map_read) { fmt::print(stderr, "\nERROR: Element decomposition map '{}' could not be read from file.\n", map_name); exit(EXIT_FAILURE); } // Do the scaling (integer division...) if (iscale == 0) { // Auto scaling was asked for. Determine max entry in `elem_to_proc` and // set the scale factor. auto max_proc = *std::max_element(elem_to_proc.begin(), elem_to_proc.end()); iscale = (max_proc + 1) / interFace.processor_count(); fmt::print(" Element Processor Map automatic scaling factor = {}\n", iscale); if (iscale == 0) { fmt::print(stderr, "ERROR: Max value in element processor map is {} which is\n" "\tless than the processor count ({}). Scaling values is not possible.", max_proc, interFace.processor_count()); exit(EXIT_FAILURE); } } std::transform(elem_to_proc.begin(), elem_to_proc.end(), elem_to_proc.begin(), [iscale](int p) { return p / iscale; }); } else if (interFace.decomposition_method() == "file") { // Read the element decomposition mapping from a file. The // syntax of the file is an optional element count followed by // the processor for this range. If the element range is // omitted, then the processor applies to the next element in // the sequence. All elements must be specified or an error will // be raised. // // Example: // 0 // 100 1 // 0 // // Will assign: // * element 1 to processor 0; // * followed by the next 100 elements (2 to 101) to processor 1; // * followed by the next element (102) to processor 0. // // The resulting decomposition will have 2 elements (1, 102) on // processor 0 and 100 elements (2..101) on processor 1. const std::string &filename = interFace.decomposition_file(); if (filename.empty()) { fmt::print(stderr, "\nERROR: No element decomposition file specified.\n"); exit(EXIT_FAILURE); } std::ifstream decomp_file(filename, std::ios::in); if (!decomp_file.good()) { fmt::print( stderr, "\nERROR: Element decomposition file '{}' does not exist or could not be opened.\n", filename); exit(EXIT_FAILURE); } std::string line; size_t line_num = 0; while (std::getline(decomp_file, line)) { line_num++; // See if 1 or 2 tokens on line... std::vector tokens; tokens = SLIB::tokenize(line, ", \t"); size_t proc = 0; size_t count = 1; if (tokens.empty()) { break; } else if (tokens.size() == 1) { // Just a processor specification for the next element... proc = std::stoi(tokens[0]); elem_to_proc.push_back(proc); } else { // Count and processor specified. count = std::stoi(tokens[0]); proc = std::stoi(tokens[1]); } if (proc > interFace.processor_count()) { fmt::print(stderr, "\nERROR: Invalid processor {} specified on line {} of decomposition file.\n" "\tValid range is 0..{}\n", fmt::group_digits(proc), fmt::group_digits(line_num), fmt::group_digits(interFace.processor_count() - 1)); exit(EXIT_FAILURE); } if (elem_to_proc.size() + count > element_count) { fmt::print(stderr, "\nERROR: The processor specification on line {}" " of the decomposition file results in too many elements being specified.\n" "\tThe total number of elements in the model is {}\n" "\tPrior to this line, {} elements were specified.\n" "\tIncluding this line, {} elements will be specified.\n", fmt::group_digits(line_num), fmt::group_digits(element_count), fmt::group_digits(elem_to_proc.size()), fmt::group_digits(elem_to_proc.size() + count)); exit(EXIT_FAILURE); } for (size_t i = 0; i < count; i++) { elem_to_proc.push_back(proc); } } } assert(elem_to_proc.size() == element_count); } template void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, int proc_count, INT /* dummy */) { // Get a map of all chains and the elements in the chains. Map key will be root. std::map> chains; for (size_t i = 0; i < element_chains.size(); i++) { auto &chain_entry = element_chains[i]; chains[chain_entry.element].push_back(i + 1); if ((debug_level & 16) != 0) { fmt::print("[{}]: element {}, link {}, processor {}\n", i + 1, chain_entry.element, chain_entry.link, elem_to_proc[i]); } } // Delta: elements added/removed from each processor... std::vector delta(proc_count); // Now, for each chain... for (auto &chain : chains) { if ((debug_level & 16) != 0) { fmt::print("Chain Root: {} contains: {}\n", chain.first, fmt::join(chain.second, ", ")); } std::vector chain_proc_count(proc_count); const auto &chain_elements = chain.second; // * get processors used by elements in the chain... for (const auto &element : chain_elements) { auto proc = elem_to_proc[element - 1]; chain_proc_count[proc]++; } // * Now, subtract the `delta` from each count for (int i = 0; i < proc_count; i++) { chain_proc_count[i] -= delta[i]; } // * Find the maximum value in `chain_proc_count` auto max = std::max_element(chain_proc_count.begin(), chain_proc_count.end()); auto max_proc = std::distance(chain_proc_count.begin(), max); // * Assign all elements in the chain to `max_proc`. // * Update the deltas for all processors that gain/lose elements... for (const auto &element : chain_elements) { if (elem_to_proc[element - 1] != max_proc) { auto old_proc = elem_to_proc[element - 1]; elem_to_proc[element - 1] = max_proc; delta[max_proc]++; delta[old_proc]--; } } } std::vector proc_element_count(proc_count); for (auto proc : elem_to_proc) { proc_element_count[proc]++; } if ((debug_level & 16) != 0) { fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); } } template void free_connectivity_storage(std::vector>> &connectivity, size_t proc_begin, size_t proc_size) { progress(__func__); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { size_t block_count = connectivity[p].size(); for (size_t b = 0; b < block_count; b++) { Ioss::Utils::clear(connectivity[p][b]); } Ioss::Utils::clear(connectivity[p]); } size_t processor_count = connectivity.size(); if (proc_begin + proc_size == processor_count) { Ioss::Utils::clear(connectivity); } } template void get_sidesets(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &elem_to_proc, INT /*dummy*/) { progress(__func__); // This routine reads the sidesets in the global database; // and defines corresponding sidesets on each processor... size_t proc_count = proc_region.size(); auto &ss = region.get_sidesets(); size_t set_count = ss.size(); for (size_t s = 0; s < set_count; s++) { auto *gss = ss[s]; auto &ss_name = gss->name(); std::vector sset(proc_count); for (size_t p = 0; p < proc_count; p++) { sset[p] = new Ioss::SideSet(proc_region[p]->get_database(), ss_name); proc_region[p]->add(sset[p]); } auto &side_blocks = gss->get_side_blocks(); for (auto &gsb : side_blocks) { std::vector ss_elems; gsb->get_field_data("element_side_raw", ss_elems); std::vector pss(proc_count); for (size_t i = 0; i < ss_elems.size(); i += 2 /* elem,side pairs */) { int64_t elem = ss_elems[i] - 1; int p = elem_to_proc[elem]; pss[p]++; } auto &name = gsb->name(); auto &side_type = gsb->topology()->name(); auto &elem_type = gsb->parent_element_topology()->name(); for (size_t p = 0; p < proc_count; p++) { auto *side_block = new Ioss::SideBlock(proc_region[p]->get_database(), name, side_type, elem_type, pss[p]); sset[p]->add(side_block); } } } } template void output_sidesets(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &elem_to_proc, size_t proc_begin, size_t proc_size, INT /*dummy*/) { progress(__func__); // This routine reads the sidesets in the global database; // and outputs the sidesets on each processor... size_t proc_count = proc_region.size(); auto &ss = region.get_sidesets(); size_t set_count = ss.size(); for (size_t s = 0; s < set_count; s++) { if (debug_level & 4) { progress("\tSideset " + std::to_string(s + 1)); } Ioss::SideSet *gss = ss[s]; auto &ss_name = gss->name(); std::vector proc_ss(proc_count); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { proc_ss[p] = proc_region[p]->get_sideset(ss_name); } auto &side_blocks = gss->get_side_blocks(); for (auto &gsb : side_blocks) { auto &sb_name = gsb->name(); std::vector proc_sb(proc_count); std::vector> psb_elems(proc_count); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { proc_sb[p] = proc_ss[p]->get_side_block(sb_name); size_t elem_count = proc_sb[p]->entity_count(); psb_elems[p].reserve(elem_count * 2); } std::vector ss_elems; gsb->get_field_data("element_side_raw", ss_elems); for (size_t i = 0; i < ss_elems.size(); i += 2 /* elem,side pairs */) { int64_t elem = ss_elems[i] - 1; int p = elem_to_proc[elem]; psb_elems[p].push_back(elem + 1); psb_elems[p].push_back(ss_elems[i + 1]); } for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { Ioss::SideBlock *psb = proc_sb[p]; psb->put_field_data("element_side", psb_elems[p]); proc_progress(p, proc_count); } } } if (set_count > 0) { static bool output = false; if (!output) { fmt::print(stderr, "WARNING: Sideset distribution factors not yet handled correctly.\n"); output = true; } } } template void output_communication_map(const Ioss::Region &global_region, std::vector &proc_region, const std::vector &node_to_proc, const std::vector &node_to_proc_pointer, size_t proc_begin, size_t proc_size) { progress(__func__); std::vector> border_node_proc_map(proc_size); INT global_node_count = global_region.get_property("node_count").get_int(); // Iterate all nodes and count the number of processors it is on: for (INT i = 0; i < global_node_count; i++) { size_t node_proc_count = node_to_proc_pointer[i + 1] - node_to_proc_pointer[i]; if (node_proc_count > 1) { // Get the pairs for all border nodes on this processor... // Not efficient at this time... size_t beg = node_to_proc_pointer[i]; size_t end = node_to_proc_pointer[i + 1]; for (size_t j = beg; j < end; j++) { size_t node = i + 1; size_t proc = node_to_proc[j]; for (size_t k = beg; k < end; k++) { if (j == k) { continue; } size_t p = node_to_proc[k]; if (p >= proc_begin && p < proc_begin + proc_size) { border_node_proc_map[p - proc_begin].push_back(node); border_node_proc_map[p - proc_begin].push_back(proc); } } } } } progress("border_node_proc_map fully populated"); size_t proc_count = proc_region.size(); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { auto &commset = proc_region[p]->get_commsets()[0]; commset->put_field_data("entity_processor", border_node_proc_map[p - proc_begin]); border_node_proc_map[p - proc_begin].clear(); proc_progress(p, proc_count); } } template void define_communication_data(const Ioss::Region &global_region, std::vector &proc_region, const std::vector &node_to_proc, const std::vector &node_to_proc_pointer) { progress(__func__); // This routine categorizes the nodes on a processor as interior // or border. // TODO(gdsjaar): Categorize elements also. For now, all treated as // interior which works for sierra-based applications // The node_to_proc_pointer has information about the number of // processors that a node is shared with. If the count is 1, then // the node is interior; otherwise, it is border. // Allocates: // * interior_nodes INT size - #interior nodes // * border_nodes_proc_map INT size - (proc-node) pair for each border node INT global_node_count = global_region.get_property("node_count").get_int(); size_t proc_count = proc_region.size(); std::vector interior_nodes(proc_count); std::vector border_nodes(proc_count); // Iterate all nodes and count the number of processors it is on: for (INT i = 0; i < global_node_count; i++) { size_t node_proc_count = node_to_proc_pointer[i + 1] - node_to_proc_pointer[i]; if (node_proc_count == 1) { size_t proc = node_to_proc[node_to_proc_pointer[i]]; interior_nodes[proc]++; } else { // Get the pairs for all border nodes on this processor... // Not efficient at this time... size_t beg = node_to_proc_pointer[i]; size_t end = node_to_proc_pointer[i + 1]; for (size_t j = beg; j < end; j++) { for (size_t k = beg; k < end; k++) { if (j == k) { continue; } size_t p = node_to_proc[k]; border_nodes[p]++; } } } } INT global_element_count = global_region.get_property("element_count").get_int(); // Categorize each element as interior... // Categorize the remaining nodes as border... for (size_t p = 0; p < proc_count; p++) { Ioss::Region *region = proc_region[p]; INT element_count = region->get_property("element_count").get_int(); INT node_count = region->get_property("node_count").get_int(); INT border_node_cnt = node_count - interior_nodes[p]; region->property_add(Ioss::Property("global_node_count", global_node_count)); region->property_add(Ioss::Property("global_element_count", global_element_count)); region->property_add(Ioss::Property("processor_count", static_cast(proc_count))); region->property_add(Ioss::Property("my_processor", static_cast(p))); region->property_add(Ioss::Property("internal_node_count", interior_nodes[p])); region->property_add(Ioss::Property("border_node_count", border_node_cnt)); region->property_add(Ioss::Property("internal_element_count", element_count)); region->property_add(Ioss::Property("border_element_count", 0)); // Add commset data... The length of the commset is the number // of pairs for all border nodes. // // For each node on this processor that isn't an interior node, // create the pair... auto *commset = new Ioss::CommSet(region->get_database(), "commset_node", "node", border_nodes[p]); commset->property_add(Ioss::Property("id", 1)); region->add(commset); if (debug_level & 2) { fmt::print(stderr, "Commset for processor {} has {} entries.\n", p, border_nodes[p]); } } } template void get_nodesets(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &node_to_proc, const std::vector &node_to_proc_pointer) { progress(__func__); // This routine reads the nodesets in the global database; // and defines corresponding nodesets on each processor... size_t proc_count = proc_region.size(); auto &ns = region.get_nodesets(); size_t set_count = ns.size(); for (size_t s = 0; s < set_count; s++) { std::vector pns(proc_count); Ioss::NodeSet *gns = ns[s]; std::vector ns_nodes; gns->get_field_data("ids_raw", ns_nodes); for (size_t i = 0; i < ns_nodes.size(); i++) { int64_t node = ns_nodes[i] - 1; size_t p_beg = node_to_proc_pointer[node]; size_t p_end = node_to_proc_pointer[node + 1]; for (size_t j = p_beg; j < p_end; j++) { size_t p = node_to_proc[j]; pns[p]++; } } auto &name = ns[s]->name(); if (debug_level & 2) { fmt::print(stderr, "\tNodeset {}--", name); } for (size_t p = 0; p < proc_count; p++) { auto *node_set = new Ioss::NodeSet(proc_region[p]->get_database(), name, pns[p]); proc_region[p]->add(node_set); if (debug_level & 2) { fmt::print(stderr, "{}:{}, ", p, pns[p]); } } if (debug_level & 2) { fmt::print(stderr, "\n"); } } } template void output_nodesets(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &node_to_proc, const std::vector &node_to_proc_pointer, size_t proc_begin, size_t proc_size) { progress(__func__); // This routine reads the nodesets in the global database; // and defines corresponding nodesets on each processor... size_t proc_count = proc_region.size(); auto &ns = region.get_nodesets(); size_t set_count = ns.size(); for (size_t s = 0; s < set_count; s++) { if (debug_level & 4) { progress("\tNodeSet " + std::to_string(s + 1)); } Ioss::NodeSet *gns = ns[s]; std::vector ns_nodes; gns->get_field_data("ids_raw", ns_nodes); std::vector ns_df; gns->get_field_data("distribution_factors", ns_df); std::vector> pns_nodes(proc_count); std::vector> pns_df(proc_count); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { size_t node_count = proc_region[p]->get_nodesets()[s]->entity_count(); pns_nodes[p].reserve(node_count); pns_df[p].reserve(node_count); } for (size_t i = 0; i < ns_nodes.size(); i++) { int64_t node = ns_nodes[i] - 1; size_t p_beg = node_to_proc_pointer[node]; size_t p_end = node_to_proc_pointer[node + 1]; for (size_t j = p_beg; j < p_end; j++) { size_t p = node_to_proc[j]; pns_nodes[p].push_back(node + 1); pns_df[p].push_back(ns_df[i]); } } for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { Ioss::NodeSet *proc_ns = proc_region[p]->get_nodesets()[s]; proc_ns->put_field_data("ids", pns_nodes[p]); proc_ns->put_field_data("distribution_factors", pns_df[p]); proc_progress(p, proc_count); } } } template void output_node_map(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &node_to_proc, const std::vector &node_to_proc_pointer, size_t proc_begin, size_t proc_size) { progress(__func__); // This is the processor-local to global-implicit node map... // This maps the 1..#node in the global mesh to each processor... size_t node_count = region.get_property("node_count").get_int(); size_t proc_count = proc_region.size(); std::vector> proc_map(proc_count); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { size_t pnode_count = proc_region[p]->get_property("node_count").get_int(); proc_map[p].reserve(pnode_count); } for (size_t i = 0; i < node_count; i++) { size_t p_beg = node_to_proc_pointer[i]; size_t p_end = node_to_proc_pointer[i + 1]; for (size_t j = p_beg; j < p_end; j++) { size_t p = node_to_proc[j]; if (p >= proc_begin && p < proc_begin + proc_size) { proc_map[p].push_back(i + 1); } } } for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { Ioss::NodeBlock *nb = proc_region[p]->get_node_blocks()[0]; nb->put_field_data("ids", proc_map[p]); proc_map[p].clear(); proc_progress(p, proc_count); } } template void output_global_node_map(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &node_to_proc, const std::vector &node_to_proc_pointer, size_t proc_begin, size_t proc_size) { progress(__func__); // This is the processor-local to global-implicit node map... // This maps the node_number map (if it exists) in the global mesh // to each processor... std::vector ids; Ioss::NodeBlock *gnb = region.get_node_blocks()[0]; gnb->get_field_data("ids", ids); // Check whether the map is sequential (X maps to X); bool sequential = is_sequential(ids); if (!sequential) { fmt::print(stderr, "Node map is not sequential...\n"); } size_t node_count = region.get_property("node_count").get_int(); size_t proc_count = proc_region.size(); std::vector> proc_map(proc_count); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { size_t pnode_count = proc_region[p]->get_property("node_count").get_int(); proc_map[p].reserve(pnode_count); } for (size_t i = 0; i < node_count; i++) { size_t p_beg = node_to_proc_pointer[i]; size_t p_end = node_to_proc_pointer[i + 1]; for (size_t j = p_beg; j < p_end; j++) { size_t p = node_to_proc[j]; if (p >= proc_begin && p < proc_begin + proc_size) { proc_map[p].push_back(ids[i]); } } } for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { Ioss::NodeBlock *nb = proc_region[p]->get_node_blocks()[0]; nb->put_field_data("ids", proc_map[p]); proc_map[p].clear(); proc_progress(p, proc_count); } } template void output_element_map(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &elem_to_proc, size_t proc_begin, size_t proc_size, INT /* dummy */) { progress(__func__); // map[p][b] = map for block b on processor p size_t proc_count = proc_region.size(); const auto &ebs = region.get_element_blocks(); size_t block_count = ebs.size(); size_t offset = 0; for (size_t b = 0; b < block_count; b++) { if (debug_level & 4) { progress("\tBlock " + std::to_string(b + 1)); } #if 0 std::vector ids; ebs[b]->get_field_data("ids", ids); #endif std::vector> map(proc_count); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { const auto &proc_ebs = proc_region[p]->get_element_blocks(); size_t proc_element_count = proc_ebs[b]->entity_count(); map[p].reserve(proc_element_count); } size_t element_count = ebs[b]->entity_count(); for (size_t j = 0; j < element_count; j++) { size_t p = elem_to_proc[offset + j]; if (p >= proc_begin && p < proc_begin + proc_size) { #if 0 map[p].push_back(ids[j]); #else map[p].push_back(offset + j + 1); #endif } } offset += element_count; for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { const auto &proc_ebs = proc_region[p]->get_element_blocks(); proc_ebs[b]->put_field_data("ids", map[p]); map[p].clear(); proc_progress(p, proc_count); } } } template void output_coordinates(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &node_to_proc, const std::vector &node_to_proc_pointer, size_t proc_begin, size_t proc_size) { progress(__func__); std::vector glob_coord_x; std::vector glob_coord_y; std::vector glob_coord_z; Ioss::NodeBlock *gnb = region.get_node_blocks()[0]; // Distribute nodal coordinates to each processor... // coordinates[p][i] = x,y,z coordinates on processor p size_t processor_count = proc_region.size(); std::vector> coordinates_x(processor_count); std::vector> coordinates_y(processor_count); std::vector> coordinates_z(processor_count); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { size_t pnode_count = proc_region[p]->get_property("node_count").get_int(); coordinates_x[p].reserve(pnode_count); coordinates_y[p].reserve(pnode_count); coordinates_z[p].reserve(pnode_count); } progress("\tReserve processor coordinate vectors"); Ioss::DatabaseIO *db = region.get_database(); size_t node_count = region.get_property("node_count").get_int(); if (node_count > partial_count) { int exoid = db->get_file_pointer(); glob_coord_x.resize(partial_count); glob_coord_y.resize(partial_count); glob_coord_z.resize(partial_count); for (size_t beg = 1; beg <= node_count; beg += partial_count) { size_t count = partial_count; if (beg + count - 1 > node_count) { count = node_count - beg + 1; } ex_get_partial_coord(exoid, beg, count, glob_coord_x.data(), glob_coord_y.data(), glob_coord_z.data()); progress("\tpartial_coord: " + std::to_string(beg) + " " + std::to_string(count)); for (size_t i = 0; i < count; i++) { size_t ii = beg + i - 1; size_t p_beg = node_to_proc_pointer[ii]; size_t p_end = node_to_proc_pointer[ii + 1]; for (size_t j = p_beg; j < p_end; j++) { size_t p = node_to_proc[j]; if (p >= proc_begin && p < proc_begin + proc_size) { coordinates_x[p].push_back(glob_coord_x[i]); coordinates_y[p].push_back(glob_coord_y[i]); coordinates_z[p].push_back(glob_coord_z[i]); } } } } } else { gnb->get_field_data("mesh_model_coordinates_x", glob_coord_x); gnb->get_field_data("mesh_model_coordinates_y", glob_coord_y); gnb->get_field_data("mesh_model_coordinates_z", glob_coord_z); progress("\tRead global mesh_model_coordinates"); for (size_t i = 0; i < node_count; i++) { size_t p_beg = node_to_proc_pointer[i]; size_t p_end = node_to_proc_pointer[i + 1]; for (size_t j = p_beg; j < p_end; j++) { size_t p = node_to_proc[j]; if (p >= proc_begin && p < proc_begin + proc_size) { coordinates_x[p].push_back(glob_coord_x[i]); coordinates_y[p].push_back(glob_coord_y[i]); coordinates_z[p].push_back(glob_coord_z[i]); } } } } progress("\tPopulate processor coordinate vectors"); Ioss::Utils::clear(glob_coord_x); Ioss::Utils::clear(glob_coord_y); Ioss::Utils::clear(glob_coord_z); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { Ioss::NodeBlock *nb = proc_region[p]->get_node_blocks()[0]; nb->put_field_data("mesh_model_coordinates_x", coordinates_x[p]); nb->put_field_data("mesh_model_coordinates_y", coordinates_y[p]); nb->put_field_data("mesh_model_coordinates_z", coordinates_z[p]); proc_progress(p, processor_count); } progress("\tOutput processor coordinate vectors"); } // Output a component at a time... template void output_coordinates_c(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &node_to_proc, const std::vector &node_to_proc_pointer, size_t proc_begin, size_t proc_size) { progress(__func__); std::vector glob_coord; Ioss::NodeBlock *gnb = region.get_node_blocks()[0]; std::array field_name{"mesh_model_coordinates_x", "mesh_model_coordinates_y", "mesh_model_coordinates_z"}; // Distribute nodal coordinates to each processor... // coordinates[p][i] = x,y,z coordinates on processor p size_t processor_count = proc_region.size(); std::vector> coordinates(processor_count); Ioss::DatabaseIO *db = region.get_database(); size_t node_count = region.get_property("node_count").get_int(); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { size_t pnode_count = proc_region[p]->get_property("node_count").get_int(); coordinates[p].reserve(pnode_count); } progress("\tReserve processor coordinate vectors"); for (size_t comp = 0; comp < 3; comp++) { for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { coordinates[p].resize(0); } if (node_count > partial_count) { int exoid = db->get_file_pointer(); glob_coord.resize(partial_count); for (size_t beg = 1; beg <= node_count; beg += partial_count) { size_t count = partial_count; if (beg + count - 1 > node_count) { count = node_count - beg + 1; } switch (comp) { case 0: ex_get_partial_coord(exoid, beg, count, glob_coord.data(), nullptr, nullptr); break; case 1: ex_get_partial_coord(exoid, beg, count, nullptr, glob_coord.data(), nullptr); break; case 2: ex_get_partial_coord(exoid, beg, count, nullptr, nullptr, glob_coord.data()); break; } progress("\tpartial_coord: " + std::to_string(beg) + " " + std::to_string(count)); for (size_t i = 0; i < count; i++) { size_t ii = beg + i - 1; size_t p_beg = node_to_proc_pointer[ii]; size_t p_end = node_to_proc_pointer[ii + 1]; for (size_t j = p_beg; j < p_end; j++) { size_t p = node_to_proc[j]; if (p >= proc_begin && p < proc_begin + proc_size) { coordinates[p].push_back(glob_coord[i]); } } } } } else { gnb->get_field_data(field_name[comp], glob_coord); progress("\tRead global mesh_model_coordinates"); for (size_t i = 0; i < node_count; i++) { size_t p_beg = node_to_proc_pointer[i]; size_t p_end = node_to_proc_pointer[i + 1]; for (size_t j = p_beg; j < p_end; j++) { size_t p = node_to_proc[j]; if (p >= proc_begin && p < proc_begin + proc_size) { coordinates[p].push_back(glob_coord[i]); } } } } progress("\tPopulate processor coordinate vectors"); Ioss::Utils::clear(glob_coord); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { Ioss::NodeBlock *nb = proc_region[p]->get_node_blocks()[0]; nb->put_field_data(field_name[comp], coordinates[p]); proc_progress(p, processor_count); } } progress("\tOutput processor coordinate vectors"); } template void output_connectivity(const Ioss::Region ®ion, std::vector &proc_region, const std::vector &elem_to_proc, size_t proc_begin, size_t proc_size, INT /*dummy*/) { // Read connectivity and partition to each processor/block. // connectvity[p][b] = connectivity for block b on processor p progress(__func__); const auto &ebs = region.get_element_blocks(); size_t block_count = ebs.size(); size_t processor_count = proc_region.size(); Ioss::DatabaseIO *db = region.get_database(); std::vector glob_conn; size_t offset = 0; for (size_t b = 0; b < block_count; b++) { std::vector> connectivity(processor_count); size_t element_count = ebs[b]->entity_count(); size_t element_nodes = ebs[b]->topology()->number_nodes(); size_t block_id = ebs[b]->get_property("id").get_int(); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { const auto &pebs = proc_region[p]->get_element_blocks(); size_t pelement_count = pebs[b]->entity_count(); size_t pelement_nodes = pebs[b]->topology()->number_nodes(); connectivity[p].reserve(pelement_count * pelement_nodes); // Use reserve, not resize } // Do a 'partial_count' elements at a time... if (element_count >= partial_count) { int exoid = db->get_file_pointer(); glob_conn.resize(partial_count * element_nodes); for (size_t beg = 1; beg <= element_count; beg += partial_count) { size_t count = partial_count; if (beg + count - 1 > element_count) { count = element_count - beg + 1; } ex_get_partial_conn(exoid, EX_ELEM_BLOCK, block_id, beg, count, glob_conn.data(), nullptr, nullptr); progress(fmt::format("\tpartial_conn-- start: {}\tcount: {}", fmt::group_digits(beg), fmt::group_digits(count))); size_t el = 0; for (size_t j = 0; j < count; j++) { size_t p = elem_to_proc[offset + j]; if (p >= proc_begin && p < proc_begin + proc_size) { for (size_t k = 0; k < element_nodes; k++) { connectivity[p].push_back(glob_conn[el++]); } } else { el += element_nodes; } } offset += count; } } else { ebs[b]->get_field_data("connectivity_raw", glob_conn); size_t el = 0; for (size_t j = 0; j < element_count; j++) { size_t p = elem_to_proc[offset + j]; if (p >= proc_begin && p < proc_begin + proc_size) { for (size_t k = 0; k < element_nodes; k++) { connectivity[p].push_back(glob_conn[el++]); } } else { el += element_nodes; } } offset += element_count; } for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { const auto &proc_ebs = proc_region[p]->get_element_blocks(); Ioss::ElementBlock *eb = proc_ebs[b]; eb->put_field_data("connectivity", connectivity[p]); } } } template void get_proc_elem_block_count(const Ioss::Region ®ion, std::vector &elem_to_proc, std::vector> &proc_elem_block_cnt) { progress(__func__); const auto &ebs = region.get_element_blocks(); size_t block_count = ebs.size(); size_t begin = 0; for (size_t i = 0; i < block_count; i++) { size_t end = begin + ebs[i]->entity_count(); for (size_t j = begin; j < end; j++) { size_t processor = elem_to_proc[j]; proc_elem_block_cnt[i][processor]++; } begin = end; } size_t processor_count = proc_elem_block_cnt[0].size(); for (size_t i = 0; i < processor_count; i++) { size_t sum = 0; for (size_t j = 0; j < block_count; j++) { sum += proc_elem_block_cnt[j][i]; } proc_elem_block_cnt[block_count][i] = sum; if (debug_level & 2) { fmt::print(stderr, "\tProcessor {} has {} elements.\n", fmt::group_digits(i), fmt::group_digits(sum)); } } } template void get_node_to_proc(Ioss::Region ®ion, std::vector &proc_region, const std::vector &elem_to_proc, std::vector &node_to_proc, std::vector &node_to_proc_pointer) { progress(__func__); // Process each element block connectivity to get the node_to_proc mapping. // The 'node_to_proc_pointer' vector maps the processor span in the // node_to_proc vector. The processors that node 'node' (0-based) // is on are: // * begin = node_to_proc_pointer[node] // * end = node_to_proc_pointer[node+1] // * proc_list = node_to_proc[begin] .. node_to_proc[end-1] // size_t proc_count = proc_region.size(); size_t node_count = region.get_property("node_count").get_int(); if (node_count == 0) { // Should never happen, but makes static analyzers happy... return; } std::vector> proc_node(node_count); // Assume that the majority of nodes will be on 2 or less // processors (hopefully, most are on 1). // Preallocate the proc_node[node] vector to 2 to minimize // resizes... Use 'reserve' instead of 'resize' for (size_t i = 0; i < node_count; i++) { proc_node[i].reserve(2); } progress("\tProc_node reserved"); IOSS_MAYBE_UNUSED size_t sum_on_proc_count = 0; Ioss::DatabaseIO *db = region.get_database(); const auto &ebs = region.get_element_blocks(); size_t block_count = ebs.size(); size_t offset = 0; std::vector on_proc_count(proc_count); for (size_t b = 0; b < block_count; b++) { std::vector glob_conn; size_t element_count = ebs[b]->entity_count(); size_t element_nodes = ebs[b]->topology()->number_nodes(); size_t block_id = ebs[b]->get_property("id").get_int(); // Do a 'partial_count' elements at a time... if (element_count >= partial_count) { int exoid = db->get_file_pointer(); glob_conn.resize(partial_count * element_nodes); for (size_t beg = 1; beg <= element_count; beg += partial_count) { size_t count = partial_count; if (beg + count - 1 > element_count) { count = element_count - beg + 1; } ex_get_partial_conn(exoid, EX_ELEM_BLOCK, block_id, beg, count, glob_conn.data(), nullptr, nullptr); progress(fmt::format("\tpartial_conn-- start: {}\tcount: {}", fmt::group_digits(beg), fmt::group_digits(count))); populate_proc_node(count, offset, element_nodes, elem_to_proc, glob_conn, proc_node, on_proc_count); offset += count; } } else { ebs[b]->get_field_data("connectivity_raw", glob_conn); populate_proc_node(element_count, offset, element_nodes, elem_to_proc, glob_conn, proc_node, on_proc_count); offset += element_count; } } for (size_t p = 0; p < proc_count; p++) { Ioss::NodeBlock *nb = new Ioss::NodeBlock(proc_region[p]->get_database(), "node_block1", on_proc_count[p], 3); proc_region[p]->add(nb); if (debug_level & 2) { fmt::print(stderr, "\tProcessor {} has {} nodes.\n", fmt::group_digits(p), fmt::group_digits(on_proc_count[p])); } sum_on_proc_count += on_proc_count[p]; } progress("\tProc_node populated"); // Have data for each node showing which processors it is on... // proc_node[node].size() is number of processors for this node... node_to_proc_pointer.reserve(node_count + 1); std::vector proc_histo(17); size_t node_to_proc_pointer_size = 0; for (size_t i = 0; i < node_count; i++) { size_t num_procs = proc_node[i].size(); if (num_procs == 0) { fmt::print(stderr, "WARNING: Node {} is not connected to any elements.\n", fmt::group_digits(i + 1)); } else if (num_procs < proc_histo.size()) { proc_histo[num_procs]++; } else { proc_histo[0]++; } node_to_proc_pointer.push_back(node_to_proc_pointer_size); node_to_proc_pointer_size += num_procs; } // Output histogram.. fmt::print(stderr, "Processor count per node histogram:\n"); for (size_t i = 1; i < proc_histo.size(); i++) { if (proc_histo[i] > 0) { fmt::print(stderr, "\tNodes on {:2} processors = {:12}\t({:2})%\n", fmt::group_digits(i), fmt::group_digits(proc_histo[i]), (proc_histo[i] * 100 + node_count / 2) / node_count); } } if (proc_histo[0] > 0) { fmt::print(stderr, "\tNodes on {} or more processors = {}\t({:2})%\n", fmt::group_digits(proc_histo.size()), fmt::group_digits(proc_histo[0]), (proc_histo[0] * 100 + node_count / 2) / node_count); } fmt::print(stderr, "\n"); node_to_proc_pointer.push_back(node_to_proc_pointer_size); node_to_proc.reserve(node_to_proc_pointer_size); progress("\tNode_to_proc reserved"); assert(sum_on_proc_count == node_to_proc_pointer_size); for (auto &pn : proc_node) { size_t num_procs = pn.size(); for (size_t p = 0; p < num_procs; p++) { node_to_proc.push_back(pn[p]); } } assert(node_to_proc.size() == node_to_proc_pointer_size); progress("\tNode_to_proc populated"); } template void slice(Ioss::Region ®ion, const std::string &nemfile, SystemInterface &interFace, INT dummy) { progress(__func__); bool create_split_files = !interFace.outputDecompMap_ && !interFace.outputDecompField_; std::vector proc_region; if (create_split_files) { proc_region.resize(interFace.processor_count()); } bool ints64 = (sizeof(INT) == 8); Ioss::PropertyManager properties; if (interFace.netcdf4_) { properties.add(Ioss::Property("FILE_TYPE", "netcdf4")); } if (interFace.netcdf5_) { properties.add(Ioss::Property("FILE_TYPE", "netcdf5")); } if (interFace.compressionLevel_ > 0 || interFace.shuffle_ || interFace.szip_) { properties.add(Ioss::Property("FILE_TYPE", "netcdf4")); properties.add(Ioss::Property("COMPRESSION_LEVEL", interFace.compressionLevel_)); properties.add(Ioss::Property("COMPRESSION_SHUFFLE", static_cast(interFace.shuffle_))); if (interFace.szip_) { properties.add(Ioss::Property("COMPRESSION_METHOD", "szip")); } else if (interFace.zlib_) { properties.add(Ioss::Property("COMPRESSION_METHOD", "zlib")); } } if (interFace.ints64Bit_) { properties.add(Ioss::Property("INTEGER_SIZE_DB", 8)); properties.add(Ioss::Property("INTEGER_SIZE_API", 8)); } double start = seacas_timer(); std::vector elem_to_proc; decompose_elements(region, interFace, elem_to_proc, dummy); double end = seacas_timer(); fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); Ioss::chain_t element_chains; if (interFace.lineDecomp_) { element_chains = Ioss::generate_element_chains(region, interFace.lineSurfaceList_, debug_level, dummy); line_decomp_modify(element_chains, elem_to_proc, interFace.processor_count(), dummy); } if (!create_split_files) { Ioss::DatabaseIO *dbo = Ioss::IOFactory::create( "exodus", nemfile, Ioss::WRITE_RESTART, Ioss::ParallelUtils::comm_world(), properties); if (dbo == nullptr || !dbo->ok(true)) { std::exit(EXIT_FAILURE); } // NOTE: 'output_region' owns 'dbo' pointer at this time Ioss::Region output_region(dbo, "region_2"); // Set the qa information... output_region.property_add(Ioss::Property(std::string("code_name"), qainfo[0])); output_region.property_add(Ioss::Property(std::string("code_version"), qainfo[2])); Ioss::MeshCopyOptions options{}; options.ints_64_bit = sizeof(INT) == 64; options.delete_timesteps = true; options.data_storage_type = 2; options.verbose = true; // Copy mesh portion of input region to the output region Ioss::copy_database(region, output_region, options); // KLUGE: The metadata has already been written on // output_region, but we couldn't define the maps until now, so // need to update the metadata with map information and hope // that no other maps exist on the database... if (interFace.outputDecompMap_) { bool line_decomp = interFace.lineDecomp_; add_decomp_map(output_region, interFace.decomposition_variable(), line_decomp); output_decomp_map(output_region, elem_to_proc, element_chains, interFace.decomposition_variable(), line_decomp); } if (interFace.outputDecompField_) { bool line_decomp = interFace.lineDecomp_; add_decomp_field(output_region, interFace.decomposition_variable(), line_decomp); output_decomp_field(output_region, elem_to_proc, element_chains, interFace.decomposition_variable(), line_decomp); } return; } bool close_files = interFace.processor_count() + 1 > interFace.max_files(); for (size_t i = 0; i < interFace.processor_count(); i++) { std::string outfile = Ioss::Utils::decode_filename(nemfile, i, interFace.processor_count()); Ioss::DatabaseIO *dbo = Ioss::IOFactory::create( "exodus", outfile, Ioss::WRITE_RESTART, Ioss::ParallelUtils::comm_world(), properties); if (ints64) { dbo->set_int_byte_size_api(Ioss::USE_INT64_API); } proc_region[i] = new Ioss::Region(dbo); proc_region[i]->begin_mode(Ioss::STATE_DEFINE_MODEL); if (close_files) { proc_region[i]->get_database()->closeDatabase(); } } start = seacas_timer(); // Build the proc_elem_block_cnt[i][j] vector. // Gives number of elements in block i on processor j size_t block_count = region.get_property("element_block_count").get_int(); std::vector> proc_elem_block_cnt(block_count + 1); for (auto &pebc : proc_elem_block_cnt) { pebc.resize(interFace.processor_count()); } get_proc_elem_block_count(region, elem_to_proc, proc_elem_block_cnt); end = seacas_timer(); fmt::print(stderr, "Calculate elements per element block on each processor = {:.5}\n", end - start); // Create element blocks for each processor... for (size_t p = 0; p < interFace.processor_count(); p++) { const auto &ebs = region.get_element_blocks(); size_t bc = ebs.size(); for (size_t b = 0; b < bc; b++) { std::string type = ebs[b]->topology()->name(); auto *eb = new Ioss::ElementBlock(proc_region[p]->get_database(), ebs[b]->name(), type, proc_elem_block_cnt[b][p]); proc_region[p]->add(eb); } } // Now that we have the elements on each processor and the element // blocks those elements are in, can generate the node to proc list... start = seacas_timer(); std::vector node_to_proc; std::vector node_to_proc_pointer; get_node_to_proc(region, proc_region, elem_to_proc, node_to_proc, node_to_proc_pointer); end = seacas_timer(); fmt::print(stderr, "Node Categorization Time = {:.5}\n", end - start); // Communication map data -- interior/border nodes start = seacas_timer(); define_communication_data(region, proc_region, node_to_proc, node_to_proc_pointer); end = seacas_timer(); fmt::print(stderr, "Communication Data Definitions = {:.5}\n", end - start); // Determine nodeset distribution to processor regions. start = seacas_timer(); get_nodesets(region, proc_region, node_to_proc, node_to_proc_pointer); end = seacas_timer(); fmt::print(stderr, "Get nodeset data = {:.5}\n", end - start); start = seacas_timer(); get_sidesets(region, proc_region, elem_to_proc, (INT)0); end = seacas_timer(); fmt::print(stderr, "Get sideset data = {:.5}\n", end - start); start = seacas_timer(); double start_comb = start; fmt::print(stderr, "Begin writing output files\n"); size_t proc_count = interFace.processor_count(); // Output in processor chunks of size <= max_files so can keep all files open.... size_t max_files = interFace.max_files(); size_t chunks = (proc_count + max_files - 1) / max_files; size_t size_per_chunk = (proc_count + chunks - 1) / chunks; if (chunks > 1) { fmt::print(stderr, "\nMax open files = {}; processing files in {} chunks of size {} to maximize " "performance.\n", max_files, chunks, size_per_chunk); } for (size_t chunk = 0; chunk < chunks; chunk++) { size_t proc_begin = chunk * size_per_chunk; size_t proc_size = size_per_chunk; if (proc_begin + proc_size > proc_count) { proc_size = proc_count - proc_begin; } fmt::print(stderr, "\nProcessor range {} to {}\n", fmt::group_digits(proc_begin), fmt::group_digits(proc_begin + proc_size - 1)); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { Ioss::transfer_coordinate_frames(region, *proc_region[p]); Ioss::transfer_assemblies(region, *proc_region[p], Ioss::MeshCopyOptions{}, 0); proc_region[p]->synchronize_id_and_name(®ion); proc_region[p]->end_mode(Ioss::STATE_DEFINE_MODEL); if (interFace.lineDecomp_) { add_chain_maps(*proc_region[p]); } proc_region[p]->begin_mode(Ioss::STATE_MODEL); proc_progress(p, proc_count); } end = seacas_timer(); fmt::print(stderr, "\tDefine output databases = {:.5}\n", end - start); // Generate and output node map... #if 1 start = seacas_timer(); output_node_map(region, proc_region, node_to_proc, node_to_proc_pointer, proc_begin, proc_size); end = seacas_timer(); fmt::print(stderr, "\tNode Map Output = {:.5}\n", end - start); #else start = seacas_timer(); output_global_node_map(region, proc_region, node_to_proc, node_to_proc_pointer, proc_begin, proc_size); end = seacas_timer(); fmt::print(stderr, "\tGlobal Node Map Output = {:.5}\n", end - start); #endif start = seacas_timer(); output_element_map(region, proc_region, elem_to_proc, proc_begin, proc_size, (INT)1); end = seacas_timer(); fmt::print(stderr, "\tElement Map Output = {:.5}\n", end - start); start = seacas_timer(); output_communication_map(region, proc_region, node_to_proc, node_to_proc_pointer, proc_begin, proc_size); end = seacas_timer(); fmt::print(stderr, "\tCommunication map Output = {:.5}\n", end - start); output_connectivity(region, proc_region, elem_to_proc, proc_begin, proc_size, (INT)1); end = seacas_timer(); fmt::print(stderr, "Connectivity Output = {:.5}\n", end - start); start = seacas_timer(); #if 0 output_coordinates(region, proc_region, node_to_proc, node_to_proc_pointer, proc_begin, proc_size); #else output_coordinates_c(region, proc_region, node_to_proc, node_to_proc_pointer, proc_begin, proc_size); #endif end = seacas_timer(); fmt::print(stderr, "\tCoordinates Output = {:.5}\n", end - start); start = seacas_timer(); output_nodesets(region, proc_region, node_to_proc, node_to_proc_pointer, proc_begin, proc_size); end = seacas_timer(); fmt::print(stderr, "\tNodeset Output = {:.5}\n", end - start); start = seacas_timer(); output_sidesets(region, proc_region, elem_to_proc, proc_begin, proc_size, (INT)0); end = seacas_timer(); fmt::print(stderr, "\tSideset Output = {:.5}\n", end - start); if (interFace.lineDecomp_) { output_chain_maps(proc_region, element_chains, elem_to_proc, proc_begin, proc_size, (INT)0); } // Close all files... start = seacas_timer(); for (size_t p = proc_begin; p < proc_begin + proc_size; p++) { proc_region[p]->end_mode(Ioss::STATE_MODEL); delete proc_region[p]; } end = seacas_timer(); fmt::print(stderr, "\tClose and finalize processor {} to {} output databases = {:.5}\n", proc_begin, proc_begin + proc_size - 1, end - start); } end = seacas_timer(); fmt::print(stderr, "\nTotal time to write output files = {:.5} ({:.5} per file)\n", end - start_comb, (end - start_comb) / interFace.processor_count()); } void filename_substitution(std::string &filename, const SystemInterface &interFace) { // See if filename contains "%P" which is replaced by the number of processors... // Assumes that %P only occurs once... // filename is changed. size_t pos = filename.find("%P"); if (pos != std::string::npos) { // Found the characters... Replace with the processor count... size_t num_proc = interFace.processor_count(); std::string tmp(filename, 0, pos); tmp += std::to_string(num_proc); tmp += filename.substr(pos + 2); filename = tmp; } // If contains %M, replace with the decomposition method. pos = filename.find("%M"); if (pos != std::string::npos) { // Found the characters... Replace with the input file basename... const std::string &method_name = interFace.decomposition_method(); std::string tmp(filename, 0, pos); tmp += method_name; tmp += filename.substr(pos + 2); filename = tmp; } } } // namespace