|
|
#include <cstdlib>
|
|
|
#include <cmath>
|
|
|
#include <iostream>
|
|
|
|
|
|
#include "femgrp.h"
|
|
|
#include "matconv.h"
|
|
|
#include "Constants.h"
|
|
|
#include "vtkwriter.h"
|
|
|
#ifdef _OPENMP
|
|
|
#include <omp.h>
|
|
|
#endif
|
|
|
#include <map>
|
|
|
#include "MeshPartition_METIS5.h"
|
|
|
#include <vector>
|
|
|
#include "debug.hpp"
|
|
|
#include "vtk-5.0/vtkTetra.h"
|
|
|
#include "rapidcsv.h"
|
|
|
#include <string>
|
|
|
#include <sys/stat.h>
|
|
|
#include <sys/types.h>
|
|
|
#include <errno.h>
|
|
|
#include <cstring> // For strerror
|
|
|
#include <cstdio> // For perror or printf
|
|
|
|
|
|
#include <algorithm> // for std::max
|
|
|
#include <fstream>
|
|
|
#include <string>
|
|
|
#include <iomanip>
|
|
|
#include <filesystem> // at top of file
|
|
|
|
|
|
|
|
|
auto check_dev_ptr = [](const void* p, const char* name) -> bool {
|
|
|
if (!p) {
|
|
|
fprintf(stderr, "[addExcitationE_port] ❌ NULL pointer: %s\n", name);
|
|
|
return false;
|
|
|
}
|
|
|
cudaPointerAttributes attr;
|
|
|
#if CUDART_VERSION >= 10000
|
|
|
cudaError_t perr = cudaPointerGetAttributes(&attr, p);
|
|
|
if (perr != cudaSuccess) {
|
|
|
fprintf(stderr, "[addExcitationE_port] ⚠️ cudaPointerGetAttributes failed for %s: %s\n",
|
|
|
name, cudaGetErrorString(perr));
|
|
|
// Still allow launch; you can change to 'return false;' if you prefer.
|
|
|
} else {
|
|
|
// cudaMemoryTypeDevice == 2 in older runtimes; in newer, use attr.type == cudaMemoryTypeDevice
|
|
|
#if CUDART_VERSION >= 11000
|
|
|
bool is_dev = (attr.type == cudaMemoryTypeDevice);
|
|
|
#else
|
|
|
bool is_dev = (attr.memoryType == cudaMemoryTypeDevice);
|
|
|
#endif
|
|
|
if (!is_dev) {
|
|
|
fprintf(stderr, "[addExcitationE_port] ⚠️ %s is NOT a device pointer (type=%d)\n",
|
|
|
name,
|
|
|
#if CUDART_VERSION >= 11000
|
|
|
(int)attr.type
|
|
|
#else
|
|
|
(int)attr.memoryType
|
|
|
#endif
|
|
|
);
|
|
|
}
|
|
|
}
|
|
|
#endif
|
|
|
return true;
|
|
|
};
|
|
|
|
|
|
|
|
|
// ======================================
|
|
|
// Interpolation Quadrature Points (host tables)
|
|
|
#define g6_a_h 0.816847572980459
|
|
|
#define g6_b_h (1.0 - g6_a_h) / 2.0
|
|
|
#define g6_c_h 0.108103018168070
|
|
|
#define g6_d_h (1.0 - g6_c_h) / 2.0
|
|
|
#define g6_W1_h 0.109951743655322
|
|
|
#define g6_W2_h 0.223381589678011
|
|
|
|
|
|
#define g9_a_h 0.437525248383384
|
|
|
#define g9_b_h (1.0 - 2.0 * g9_a_h)
|
|
|
#define g9_c_h 0.797112651860071
|
|
|
#define g9_d_h 0.165409927389841
|
|
|
#define g9_e_h (1.0 - g9_c_h - g9_d_h)
|
|
|
#define g9_W1_h 0.205950504760887
|
|
|
#define g9_W2_h 0.063691414286223
|
|
|
|
|
|
fp_t_ts g2d_6_h[6][4] = {
|
|
|
{g6_a_h, g6_b_h, g6_b_h, g6_W1_h},
|
|
|
{g6_b_h, g6_a_h, g6_b_h, g6_W1_h},
|
|
|
{g6_b_h, g6_b_h, g6_a_h, g6_W1_h},
|
|
|
{g6_c_h, g6_d_h, g6_d_h, g6_W2_h},
|
|
|
{g6_d_h, g6_c_h, g6_d_h, g6_W2_h},
|
|
|
{g6_d_h, g6_d_h, g6_c_h, g6_W2_h}
|
|
|
};
|
|
|
|
|
|
fp_t_ts g2d_9_h[9][4] = {
|
|
|
{g9_b_h, g9_a_h, g9_a_h, g9_W1_h},
|
|
|
{g9_a_h, g9_b_h, g9_a_h, g9_W1_h},
|
|
|
{g9_a_h, g9_a_h, g9_b_h, g9_W1_h},
|
|
|
|
|
|
{g9_c_h, g9_d_h, g9_e_h, g9_W2_h},
|
|
|
{g9_c_h, g9_e_h, g9_d_h, g9_W2_h},
|
|
|
{g9_d_h, g9_c_h, g9_e_h, g9_W2_h},
|
|
|
{g9_d_h, g9_e_h, g9_c_h, g9_W2_h},
|
|
|
{g9_e_h, g9_c_h, g9_d_h, g9_W2_h},
|
|
|
{g9_e_h, g9_d_h, g9_c_h, g9_W2_h}
|
|
|
};
|
|
|
|
|
|
const int GAUSS_POINT_NUM_h[4] = {6, 9, 9, 9};
|
|
|
|
|
|
|
|
|
|
|
|
// ---- Shapes for quadratic triangle (P2) at barycentric l=(l0,l1,l2) ----
|
|
|
static inline void triP2_shapes(const double l[3], double N[6]) {
|
|
|
const double l0=l[0], l1=l[1], l2=l[2];
|
|
|
N[0] = l0*(2.0*l0-1.0); // vertex 0
|
|
|
N[1] = l1*(2.0*l1-1.0); // vertex 1
|
|
|
N[2] = l2*(2.0*l2-1.0); // vertex 2
|
|
|
N[3] = 4.0*l1*l2; // edge(1,2)
|
|
|
N[4] = 4.0*l0*l2; // edge(0,2)
|
|
|
N[5] = 4.0*l0*l1; // edge(0,1)
|
|
|
}
|
|
|
|
|
|
// ---- One normal + area from 3 points (xyz9 = x0,y0,z0, x1,y1,z1, x2,y2,z2) ----
|
|
|
static inline void face_geometry9_host(const fp_t_ts* xyz9, double n[3], double& area) {
|
|
|
const double x0=xyz9[0], y0=xyz9[1], z0=xyz9[2];
|
|
|
const double x1=xyz9[3], y1=xyz9[4], z1=xyz9[5];
|
|
|
const double x2=xyz9[6], y2=xyz9[7], z2=xyz9[8];
|
|
|
double a[3] = {x1-x0, y1-y0, z1-z0};
|
|
|
double b[3] = {x2-x0, y2-y0, z2-z0};
|
|
|
// n ∝ a × b
|
|
|
n[0] = a[1]*b[2] - a[2]*b[1];
|
|
|
n[1] = a[2]*b[0] - a[0]*b[2];
|
|
|
n[2] = a[0]*b[1] - a[1]*b[0];
|
|
|
double nn = sqrt(n[0]*n[0] + n[1]*n[1] + n[2]*n[2]);
|
|
|
area = 0.5*nn;
|
|
|
if (nn > 0) { n[0]/=nn; n[1]/=nn; n[2]/=nn; }
|
|
|
}
|
|
|
|
|
|
// ---- Project vector to tangential plane (in-place) ----
|
|
|
static inline void proj_tangent(double v[3], const double n[3])
|
|
|
{
|
|
|
const double vn = v[0]*n[0] + v[1]*n[1] + v[2]*n[2];
|
|
|
v[0]-=vn*n[0]; v[1]-=vn*n[1]; v[2]-=vn*n[2];
|
|
|
}
|
|
|
|
|
|
// ---- Host quadrature accessor using your *_h tables ----
|
|
|
static inline void tri_gauss_host(int Q, int q, fp_t& z0, fp_t& z1, fp_t& z2, fp_t& w) {
|
|
|
if (Q == 6) { z0 = g2d_6_h[q][0]; z1 = g2d_6_h[q][1]; z2 = g2d_6_h[q][2]; w = g2d_6_h[q][3]; return; }
|
|
|
if (Q == 9) { z0 = g2d_9_h[q][0]; z1 = g2d_9_h[q][1]; z2 = g2d_9_h[q][2]; w = g2d_9_h[q][3]; return; }
|
|
|
// add more orders if you enable them
|
|
|
z0=z1=z2=w=0;
|
|
|
}
|
|
|
|
|
|
// ---- Interpolate E/H to Q quadrature points and project tangential ----
|
|
|
static inline void interp_port_fields_to_quads(
|
|
|
const fp_t_ts* xyz9, // x0 y0 z0 x1 y1 z1 x2 y2 z2
|
|
|
const vtr evtr[6], // P2 nodal vectors for E (face order: 0..5)
|
|
|
const vtr hvtr[6], // P2 nodal vectors for H
|
|
|
int PolyFlag,
|
|
|
fp_t_ts* Etan_out, // [Q*3]
|
|
|
fp_t_ts* Htan_out, // [Q*3]
|
|
|
fp_t_ts port_excitation_magnitude)
|
|
|
{
|
|
|
const int Q = GAUSS_POINT_NUM_h[PolyFlag];
|
|
|
double n[3], area;
|
|
|
face_geometry9_host(xyz9, n, area);
|
|
|
|
|
|
for (int q=0; q<Q; ++q)
|
|
|
{
|
|
|
fp_t z0,z1,z2,w; tri_gauss_host(Q,q,z0,z1,z2,w);
|
|
|
double l[3] = { (double)z0, (double)z1, (double)z2 };
|
|
|
double N[6]; triP2_shapes(l,N);
|
|
|
|
|
|
double E[3]={0,0,0}, H[3]={0,0,0};
|
|
|
for (int m=0; m<6; ++m)
|
|
|
{
|
|
|
const double a = N[m];
|
|
|
E[0]+=a*evtr[m].getx();
|
|
|
E[1]+=a*evtr[m].gety();
|
|
|
E[2]+=a*evtr[m].getz();
|
|
|
H[0]+=a*hvtr[m].getx();
|
|
|
H[1]+=a*hvtr[m].gety();
|
|
|
H[2]+=a*hvtr[m].getz();
|
|
|
}
|
|
|
|
|
|
proj_tangent(E,n);
|
|
|
proj_tangent(H,n);
|
|
|
|
|
|
Etan_out[q*3+0] = (fp_t_ts)E[0] * port_excitation_magnitude;
|
|
|
Etan_out[q*3+1] = (fp_t_ts)E[1] * port_excitation_magnitude;
|
|
|
Etan_out[q*3+2] = (fp_t_ts)E[2] * port_excitation_magnitude;
|
|
|
Htan_out[q*3+0] = (fp_t_ts)H[0] * port_excitation_magnitude;
|
|
|
Htan_out[q*3+1] = (fp_t_ts)H[1] * port_excitation_magnitude;
|
|
|
Htan_out[q*3+2] = (fp_t_ts)H[2] * port_excitation_magnitude;
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Write port quadrature fields to CSV
|
|
|
// Columns: face_idx,global_face_id,tet_id,port_idx,q,z0,z1,z2,w,x,y,z,Et_x,Et_y,Et_z,Ht_x,Ht_y,Ht_z
|
|
|
bool write_port_quadrature_csv(
|
|
|
const char* out_path,
|
|
|
int PolyFlag,
|
|
|
int excitationFaces,
|
|
|
const int* PortFacePidx_h, // length = excitationFaces; -1 for non-port faces
|
|
|
const int* FaceID_excitation_h, // length = excitationFaces (optional; can pass nullptr)
|
|
|
const int* TetID_excitation_h, // length = excitationFaces (optional; can pass nullptr)
|
|
|
const fp_t_ts* nd_coords_face_h, // length = excitationFaces * 9
|
|
|
const fp_t_ts* Etan_qp_h, // length = excitationFaces * Q * 3
|
|
|
const fp_t_ts* Htan_qp_h // length = excitationFaces * Q * 3
|
|
|
) {
|
|
|
if (!out_path || !nd_coords_face_h || !Etan_qp_h || !Htan_qp_h || !PortFacePidx_h) {
|
|
|
fprintf(stderr, "write_port_quadrature_csv: null pointer argument.\n");
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
const int Q = GAUSS_POINT_NUM_h[PolyFlag];
|
|
|
|
|
|
std::ofstream ofs(out_path);
|
|
|
if (!ofs) {
|
|
|
fprintf(stderr, "write_port_quadrature_csv: failed to open %s\n", out_path);
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
ofs.setf(std::ios::scientific);
|
|
|
ofs << std::setprecision(9);
|
|
|
|
|
|
// Header
|
|
|
ofs << "face_idx,global_face_id,tet_id,port_idx,q,"
|
|
|
"z0,z1,z2,w,x,y,z,Et_x,Et_y,Et_z,Ht_x,Ht_y,Ht_z\n";
|
|
|
|
|
|
for (int f = 0; f < excitationFaces; ++f) {
|
|
|
int pidx = PortFacePidx_h[f];
|
|
|
if (pidx < 0) continue; // skip non-port faces
|
|
|
|
|
|
int global_face_id = FaceID_excitation_h ? FaceID_excitation_h[f] : -1;
|
|
|
int tet_id = TetID_excitation_h ? TetID_excitation_h[f] : -1;
|
|
|
|
|
|
// Triangle vertices
|
|
|
const fp_t_ts* xyz9 = &nd_coords_face_h[3 * 3 * f];
|
|
|
double Ax = (double)xyz9[0], Ay = (double)xyz9[1], Az = (double)xyz9[2];
|
|
|
double Bx = (double)xyz9[3], By = (double)xyz9[4], Bz = (double)xyz9[5];
|
|
|
double Cx = (double)xyz9[6], Cy = (double)xyz9[7], Cz = (double)xyz9[8];
|
|
|
|
|
|
// Fields
|
|
|
const fp_t_ts* Eface = &Etan_qp_h[(size_t)f * Q * 3];
|
|
|
const fp_t_ts* Hface = &Htan_qp_h[(size_t)f * Q * 3];
|
|
|
|
|
|
for (int q = 0; q < Q; ++q) {
|
|
|
fp_t z0, z1, z2, w;
|
|
|
tri_gauss_host(Q, q, z0, z1, z2, w);
|
|
|
|
|
|
// Quadrature point physical coords
|
|
|
double x = z0 * Ax + z1 * Bx + z2 * Cx;
|
|
|
double y = z0 * Ay + z1 * By + z2 * Cy;
|
|
|
double z = z0 * Az + z1 * Bz + z2 * Cz;
|
|
|
|
|
|
ofs << f << ','
|
|
|
<< global_face_id << ','
|
|
|
<< tet_id << ','
|
|
|
<< pidx << ','
|
|
|
<< q << ','
|
|
|
<< (double)z0 << ','
|
|
|
<< (double)z1 << ','
|
|
|
<< (double)z2 << ','
|
|
|
<< (double)w << ','
|
|
|
<< x << ',' << y << ',' << z << ','
|
|
|
<< (double)Eface[q*3+0] << ','
|
|
|
<< (double)Eface[q*3+1] << ','
|
|
|
<< (double)Eface[q*3+2] << ','
|
|
|
<< (double)Hface[q*3+0] << ','
|
|
|
<< (double)Hface[q*3+1] << ','
|
|
|
<< (double)Hface[q*3+2] << '\n';
|
|
|
}
|
|
|
}
|
|
|
|
|
|
ofs.close();
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
|
|
|
// Evaluate at centroid
|
|
|
static inline void interp_port_fields_to_centroid(
|
|
|
const fp_t_ts* xyz9, // x0 y0 z0 x1 y1 z1 x2 y2 z2
|
|
|
const vtr evtr[6], // P2 nodal vectors for E
|
|
|
const vtr hvtr[6], // P2 nodal vectors for H
|
|
|
fp_t_ts Etan_out[3], // centroid E_t
|
|
|
fp_t_ts Htan_out[3]) // centroid H_t
|
|
|
{
|
|
|
// Face normal (for tangential projection)
|
|
|
double n[3], area;
|
|
|
face_geometry9_host(xyz9, n, area);
|
|
|
|
|
|
// Centroid barycentrics
|
|
|
const double l[3] = { 1.0/3.0, 1.0/3.0, 1.0/3.0 };
|
|
|
|
|
|
// Quadratic triangle shape functions at centroid
|
|
|
double N[6];
|
|
|
triP2_shapes(l, N);
|
|
|
|
|
|
// Interpolate P2 field
|
|
|
double E[3] = {0.0, 0.0, 0.0};
|
|
|
double H[3] = {0.0, 0.0, 0.0};
|
|
|
for (int m = 0; m < 6; ++m) {
|
|
|
const double a = N[m];
|
|
|
E[0] += a * evtr[m].getx(); E[1] += a * evtr[m].gety(); E[2] += a * evtr[m].getz();
|
|
|
H[0] += a * hvtr[m].getx(); H[1] += a * hvtr[m].gety(); H[2] += a * hvtr[m].getz();
|
|
|
}
|
|
|
|
|
|
// Project onto the tangential plane
|
|
|
proj_tangent(E, n);
|
|
|
proj_tangent(H, n);
|
|
|
|
|
|
// Output single centroid values
|
|
|
Etan_out[0] = (fp_t_ts)E[0];
|
|
|
Etan_out[1] = (fp_t_ts)E[1];
|
|
|
Etan_out[2] = (fp_t_ts)E[2];
|
|
|
|
|
|
Htan_out[0] = (fp_t_ts)H[0];
|
|
|
Htan_out[1] = (fp_t_ts)H[1];
|
|
|
Htan_out[2] = (fp_t_ts)H[2];
|
|
|
}
|
|
|
|
|
|
// ======================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ---- centroid helper (assumes tet->nd[0..3] exist and have getCoord().getx/y/z()) ----
|
|
|
|
|
|
void make_dir_if_not_exist(const char* path) {
|
|
|
struct stat st;
|
|
|
if (stat(path, &st) != 0) {
|
|
|
// Directory does not exist, try to create it
|
|
|
if (mkdir(path, 0755) != 0) {
|
|
|
perror("mkdir failed");
|
|
|
}
|
|
|
} else if (!S_ISDIR(st.st_mode)) {
|
|
|
fprintf(stderr, "%s exists but is not a directory\n", path);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
void exportNeighData(
|
|
|
int* NeighMap_h, int neighMapSize,
|
|
|
int* NeighClass_h, int N_class,
|
|
|
int* NeighClassOffset_h)
|
|
|
{
|
|
|
// Export NeighMap_h
|
|
|
{
|
|
|
std::ofstream ofs("NeighMap.txt");
|
|
|
for (int i = 0; i < neighMapSize; i++) {
|
|
|
ofs << NeighMap_h[i] << "\n";
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Export NeighClass_h
|
|
|
{
|
|
|
std::ofstream ofs("NeighClass.txt");
|
|
|
for (int i = 0; i < N_class; i++) {
|
|
|
ofs << NeighClass_h[i] << "\n";
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Export NeighClassOffset_h
|
|
|
{
|
|
|
std::ofstream ofs("NeighClassOffset.txt");
|
|
|
for (int i = 0; i < N_class; i++) {
|
|
|
ofs << NeighClassOffset_h[i] << "\n";
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// ---- Safe CUDA helpers -------------------------------------------------------
|
|
|
inline cudaError_t SafeCudaMalloc(void** p, size_t nbytes)
|
|
|
{
|
|
|
if (nbytes == 0) { *p = nullptr; return cudaSuccess; }
|
|
|
return cudaMalloc(p, nbytes);
|
|
|
}
|
|
|
|
|
|
inline cudaError_t SafeCudaMemcpyH2D(void* dst, const void* src, size_t nbytes)
|
|
|
{
|
|
|
if (nbytes == 0 || !dst || !src) return cudaSuccess;
|
|
|
return cudaMemcpy(dst, src, nbytes, cudaMemcpyHostToDevice);
|
|
|
}
|
|
|
|
|
|
inline cudaError_t SafeCudaMemset0(void* dst, size_t nbytes)
|
|
|
{
|
|
|
if (nbytes == 0 || !dst) return cudaSuccess;
|
|
|
return cudaMemset(dst, 0, nbytes); // zero is always safe
|
|
|
}
|
|
|
|
|
|
#define BYTES(T, count) (static_cast<size_t>(count) * sizeof(T))
|
|
|
#define CUDA_SAFE_MALLOC(ptr, bytes) CUDA_SAFE_CALL(SafeCudaMalloc((void**)&(ptr), (bytes)))
|
|
|
#define CUDA_SAFE_COPY(dst, src, bytes) CUDA_SAFE_CALL(SafeCudaMemcpyH2D((dst), (src), (bytes)))
|
|
|
#define CUDA_SAFE_ZERO(dst, bytes) CUDA_SAFE_CALL(SafeCudaMemset0((dst), (bytes)))
|
|
|
// ---- Safe CUDA helpers -------------------------------------------------------
|
|
|
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA) || defined(DGTD_USE_CUDA_OPENCL)
|
|
|
#include "kernels.cuh"
|
|
|
cudaStream_t stream_E, stream_H;
|
|
|
cudaStream_t stream_Pade;
|
|
|
ExcitationProp excitationProp;
|
|
|
std::vector<ExcitationProp> portExcitations;
|
|
|
ExcitationProp* ExcitationProps_d;
|
|
|
#endif
|
|
|
|
|
|
using namespace ClipperLib;
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int TriNumBas = 6;
|
|
|
bool ModuleFlag = true;
|
|
|
|
|
|
static fp_t BaryCoord[SecondOrderNodes][4] = {
|
|
|
{1.0, 0.0, 0.0, 0.0},
|
|
|
{0.0, 1.0, 0.0, 0.0},
|
|
|
{0.0, 0.0, 1.0, 0.0},
|
|
|
{0.0, 0.0, 0.0, 1.0},
|
|
|
{0.5, 0.5, 0.0, 0.0},
|
|
|
{0.5, 0.0, 0.5, 0.0},
|
|
|
{0.5, 0.0, 0.0, 0.5},
|
|
|
{0.0, 0.5, 0.5, 0.0},
|
|
|
{0.0, 0.5, 0.0, 0.5},
|
|
|
{0.0, 0.0, 0.5, 0.5}
|
|
|
};
|
|
|
|
|
|
static int fac2tet[4][18] = {
|
|
|
{5, 4, 3, 11, 10, 9, 12, 13, 25, 24, 23, 26, 30, 31, 32, 42, 43, 44},
|
|
|
{5, 2, 1, 11, 8, 7, 14, 15, 25, 22, 21, 27, 33, 34, 35, 42, 43, 44},
|
|
|
{4, 2, 0, 10, 8, 6, 16, 17, 24, 22, 20, 28, 36, 37, 38, 42, 43, 44},
|
|
|
{3, 1, 0, 9, 7, 6, 18, 19, 23, 21, 20, 29, 39, 40, 41, 42, 43, 44}
|
|
|
};
|
|
|
|
|
|
int faceExcitationOrder[15] = {
|
|
|
1, 2, 4, 8, 3, 5, 6, 9, 10, 12, 3, 7, 11, 13, 14
|
|
|
};
|
|
|
|
|
|
int First2Second[3][2] = {
|
|
|
{1, 2},
|
|
|
{0, 2},
|
|
|
{0, 1}
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
void writeDenseMatrixToCSV_rapidcsv(const std::string& filename, denseMat<T>* mat, int dim)
|
|
|
{
|
|
|
std::vector<std::vector<T>> data(dim, std::vector<T>(dim));
|
|
|
|
|
|
for (int i = 0; i < dim; ++i)
|
|
|
for (int j = 0; j < dim; ++j)
|
|
|
data[i][j] = mat->getEntry(i, j);
|
|
|
|
|
|
// rapidcsv needs column-major data
|
|
|
std::vector<std::vector<T>> cols(dim, std::vector<T>(dim));
|
|
|
for (int j = 0; j < dim; ++j)
|
|
|
for (int i = 0; i < dim; ++i)
|
|
|
cols[j][i] = data[i][j];
|
|
|
|
|
|
rapidcsv::Document doc;
|
|
|
for (int j = 0; j < dim; ++j)
|
|
|
doc.SetColumn<T>(j, cols[j]);
|
|
|
|
|
|
doc.Save(filename);
|
|
|
}
|
|
|
|
|
|
template<typename T_in, typename T_out>
|
|
|
denseMat<T_out>* wrapFlatMatrixConvert(const T_in* data, int dim) {
|
|
|
auto* mat = new denseMat<T_out>(dim, dim);
|
|
|
for (int i = 0; i < dim; ++i)
|
|
|
for (int j = 0; j < dim; ++j)
|
|
|
mat->setEntry(i, j, static_cast<T_out>(data[i * dim + j]));
|
|
|
return mat;
|
|
|
}
|
|
|
|
|
|
|
|
|
FemGrp::FemGrp(){
|
|
|
nodeCNT = 0;
|
|
|
edgeCNT = 0;
|
|
|
faceCNT = 0;
|
|
|
tetraCNT = 0;
|
|
|
bcCNT = 0;
|
|
|
|
|
|
regularCNT = 1; //at least there is a non regular group
|
|
|
regularTetraCNT = 0;
|
|
|
|
|
|
ndARRAY = nullptr;
|
|
|
tetARRAY = nullptr;
|
|
|
edgeARRAY = nullptr;
|
|
|
faceARRAY = nullptr;
|
|
|
regularReferenceARRAY = nullptr;
|
|
|
objProp = nullptr;
|
|
|
totalObjNum = 0;
|
|
|
usePade = false;
|
|
|
padeTime = -1;
|
|
|
padeCNT = 0;
|
|
|
tsSource = 0;
|
|
|
nonConformalCase = false;
|
|
|
nonConformalCNT = 0;
|
|
|
neighCNT = 0;
|
|
|
|
|
|
writeWhilePade = false;
|
|
|
writePadeTD = false;
|
|
|
|
|
|
Coord.setO(0.0, 0.0, 0.0);
|
|
|
Coord.setx_axis(1.0, 0.0, 0.0);
|
|
|
Coord.sety_axis(0.0, 1.0, 0.0);
|
|
|
Coord.setz_axis(0.0, 0.0, 1.0);
|
|
|
freq = 0.0;
|
|
|
|
|
|
// Added for DGTD
|
|
|
TimeStep_dt = 0.0;
|
|
|
ClassMul = 0;
|
|
|
dt_min = 0.0;
|
|
|
dt_max = 0.0;
|
|
|
dimE = 0;
|
|
|
dimH = 0;
|
|
|
N_class = 0;
|
|
|
NtimeSteps = 0;
|
|
|
LocTimeSteps = nullptr;
|
|
|
LocalExciIndexE = nullptr;
|
|
|
LocalExciIndexH = nullptr;
|
|
|
ClassTetraCnt = nullptr;
|
|
|
ClassTetraIndex = nullptr;
|
|
|
ClassTetraOffset = nullptr;
|
|
|
planeWaveMesh = nullptr;
|
|
|
InterSurfMesh = nullptr;
|
|
|
SurfMesh = nullptr;
|
|
|
To = 0.0;
|
|
|
Tau = 0.0;
|
|
|
SamplingRate = 1.0;
|
|
|
FinalTime = 0.0;
|
|
|
TimeDistFlag = 0; // Port
|
|
|
ExcitFlag = 0; // Scattering
|
|
|
|
|
|
regularRegionFlag = false;
|
|
|
PlaneWaveBCFlag = false;
|
|
|
PortBCFlag = false;
|
|
|
|
|
|
fieldEnergy = 0.0;
|
|
|
maxFieldEnergy = 0.0;
|
|
|
energyDecayFactor = 0.0;
|
|
|
numberOfEnergyPoints = 0;
|
|
|
|
|
|
UseQuadratureMatrices = true;
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
cudaStreamCreate(&stream_E);
|
|
|
cudaStreamCreate(&stream_H);
|
|
|
cudaStreamCreate(&stream_Pade);
|
|
|
|
|
|
En_d = nullptr;
|
|
|
Hn12_d = nullptr;
|
|
|
En1_d = nullptr;
|
|
|
Hn32_d = nullptr;
|
|
|
#endif
|
|
|
}
|
|
|
|
|
|
FemGrp::~FemGrp(){
|
|
|
|
|
|
}
|
|
|
|
|
|
void FemGrp::readNODE(){
|
|
|
|
|
|
// Read only the nodes belonging to this subdomain and neighbors
|
|
|
char nname[StrLenShort];
|
|
|
int pType;
|
|
|
fp_t singORDER, Priority, x, y, z;
|
|
|
|
|
|
sprintf(nname, "%s.node", fname);
|
|
|
ifstream nodefile(nname, ios::in);
|
|
|
|
|
|
if(!nodefile){
|
|
|
cout << "File " << nname << " does NOT exist " << endl;
|
|
|
exit(1);
|
|
|
}
|
|
|
|
|
|
if(usePade){
|
|
|
initializeMaxMinPoints();
|
|
|
}
|
|
|
|
|
|
int nodeTotal;
|
|
|
nodefile >> unit;
|
|
|
nodefile >> nodeTotal;
|
|
|
nodeCNT = nodeTotal; // only one domain, global = local
|
|
|
if(nodeCNT >= 1){
|
|
|
ndARRAY = new node[nodeCNT];
|
|
|
for(int k = 0; k < nodeTotal; k ++){
|
|
|
ndARRAY[k].set_globalId(k);
|
|
|
nodefile >> pType >> Priority >> singORDER >> x >> y >> z;
|
|
|
ndARRAY[k].set_n(k);
|
|
|
ndARRAY[k].set_pType(pType);
|
|
|
ndARRAY[k].setPType(static_cast<PointType>(pType));
|
|
|
ndARRAY[k].set_singORDER(singORDER);
|
|
|
ndARRAY[k].set_coord(x * unit, y * unit, z * unit);
|
|
|
// ndARRAY[k].print();
|
|
|
if(usePade){
|
|
|
setMaxMinPoints(x * unit, y * unit, z * unit);
|
|
|
}
|
|
|
}
|
|
|
cout << "MaxPoint = (" << maxPoint.getx() << ", " << maxPoint.gety() << ", " << maxPoint.getz() << ") " << endl;
|
|
|
cout << "MinPoint = (" << minPoint.getx() << ", " << minPoint.gety() << ", " << minPoint.getz() << ") " << endl;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::readTETRA(){
|
|
|
// Read only the tetras in this subdomain and neighbors
|
|
|
int i, j, objNum, ndid[NumOfNodes], bcd[NumOfFaces], sNum[NumOfFaces];
|
|
|
node *nd[NumOfNodes];
|
|
|
char tname[StrLenShort];
|
|
|
|
|
|
readBcMap(); // read in surface-btype map
|
|
|
|
|
|
sprintf(tname, "%s.tetra", fname);
|
|
|
ifstream tetrafile(tname, ios::in);
|
|
|
|
|
|
if(!tetrafile){
|
|
|
cout << "File " << tname << " does NOT exist " << endl;
|
|
|
exit(1);
|
|
|
}
|
|
|
|
|
|
int tetraTotal;
|
|
|
tetrafile >> tetraTotal;
|
|
|
|
|
|
// Only one domain exists
|
|
|
tetraCNT = tetraTotal;
|
|
|
|
|
|
if(tetraCNT >= 1){
|
|
|
tetARRAY = new tetra[tetraCNT];
|
|
|
|
|
|
for(i = 0; i < tetraTotal; i ++){
|
|
|
tetrafile >> objNum;
|
|
|
if(objNum > totalObjNum)
|
|
|
totalObjNum = objNum;
|
|
|
tetrafile >> ndid[0] >> ndid[1] >> ndid[2] >> ndid[3]; //get the ids of the nodes
|
|
|
tetrafile >> sNum[0] >> sNum[1] >> sNum[2] >> sNum[3]; //get the bc number of the faces
|
|
|
|
|
|
for(j = 0; j < 4; j++){
|
|
|
nd[j] = &(ndARRAY[ndid[j]]);
|
|
|
bcd[j] = bcMap[sNum[j]];
|
|
|
}
|
|
|
|
|
|
tetARRAY[i].set_objNum(objNum);
|
|
|
tetARRAY[i].set_node(nd[0], nd[1], nd[2], nd[3]);
|
|
|
tetARRAY[i].set_nbc(bcd[0], bcd[1], bcd[2], bcd[3]);
|
|
|
tetARRAY[i].reArrange(); //set the nodes and bc from smallest to greatest id
|
|
|
tetARRAY[i].setcnt(i);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::readBcMap(){
|
|
|
char name[StrLenShort];
|
|
|
int i, surfCNT, sNum, bNum;
|
|
|
|
|
|
sprintf(name, "%s.bcmap", fname);
|
|
|
ifstream foo(name, ios::in);
|
|
|
|
|
|
if(!foo){
|
|
|
cout << "File " << name << " does NOT exist " << endl;
|
|
|
exit(1);
|
|
|
}
|
|
|
|
|
|
foo >> surfCNT;
|
|
|
if(surfCNT > 0){
|
|
|
bcMap = new int[surfCNT + 1];
|
|
|
bcMap[0] = 0;
|
|
|
for(i = 0; i < surfCNT; i ++){
|
|
|
foo >> sNum >> bNum;
|
|
|
bcMap[sNum] = bNum;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::readMaterial(){
|
|
|
char name[StrLenShort], matName[StrLenShort], dirName[StrLenShort], tmpName[StrLenShort], materialName[StrLenShort];
|
|
|
int i, j, k;
|
|
|
fp_t real, imaginary, cval, temp;
|
|
|
FILE *matFILE;
|
|
|
|
|
|
totalObjNum ++;
|
|
|
objProp = new Material[totalObjNum];
|
|
|
|
|
|
sprintf(name, "%s.prop", fname);
|
|
|
ifstream foo(name, ios::in);
|
|
|
if(!foo){
|
|
|
cout << "File " << name << " does NOT exist " << endl;
|
|
|
exit(1);
|
|
|
}
|
|
|
|
|
|
foo >> dirName; //directory where the materials are storaged
|
|
|
DEBUG_INFO("totalObjNum: " + to_string(totalObjNum));
|
|
|
//TODO: it only takes the real part
|
|
|
for(i = 0; i < totalObjNum; i++)
|
|
|
{
|
|
|
foo >> materialName;
|
|
|
sprintf(matName, "%s/%s.m", dirName, materialName);
|
|
|
matFILE = fopen(matName, "r");
|
|
|
cout << "Reading material properties from file: " << materialName << endl;
|
|
|
fscanf(matFILE, "%s", tmpName);
|
|
|
|
|
|
// relative dielectric constant
|
|
|
for(j = 0; j < NumOfUnitaryVectors; j ++){
|
|
|
for(k = 0; k < NumOfUnitaryVectors; k ++){
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le %le ", &real, &imaginary);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e %e ", &real, &imaginary);
|
|
|
#endif
|
|
|
cval = real;
|
|
|
objProp[i].epsr.setEntry(j, k, cval);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// relative permeability
|
|
|
for(j = 0; j < NumOfUnitaryVectors; j ++){
|
|
|
for(k = 0; k < NumOfUnitaryVectors; k ++){
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le %le ", &real, &imaginary);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e %e ", &real, &imaginary);
|
|
|
#endif
|
|
|
cval = real;
|
|
|
objProp[i].mur.setEntry(j, k, cval);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// conductivity
|
|
|
for(j = 0; j < NumOfUnitaryVectors; j ++){
|
|
|
for(k = 0; k < NumOfUnitaryVectors; k ++){
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &real);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &real);
|
|
|
#endif
|
|
|
cval = real;
|
|
|
objProp[i].sigma.setEntry(j, k, cval);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
objProp[i].rum = objProp[i].mur.inverse();
|
|
|
|
|
|
// Tag Scattering Region
|
|
|
if (strncmp(materialName, "scattering", 10) == 0)
|
|
|
{
|
|
|
objProp[i].scattering_region = true;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
objProp[i].scattering_region = false;
|
|
|
}
|
|
|
|
|
|
|
|
|
// PML
|
|
|
if (strncmp(materialName, "pml", 3) == 0)
|
|
|
{
|
|
|
|
|
|
PML_flag = true;
|
|
|
|
|
|
// Set Tetrahedron PML type true
|
|
|
objProp[i].set_PML_Flag(1);
|
|
|
cout << "PML Material Properties: " << endl;
|
|
|
|
|
|
// PML Max Conductivity
|
|
|
fp_t conductivity_PML = objProp[i].sigma.getEntry(0, 0);
|
|
|
cout << "conductivity_PML = " << conductivity_PML << endl;
|
|
|
|
|
|
// PML Order
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
PML_conductivity_order = temp;
|
|
|
objProp[i].set_PML_m_ord(PML_conductivity_order);
|
|
|
cout << "PML_m_ord: " << objProp[i].get_PML_m_ord() << endl;
|
|
|
|
|
|
// PML Thickness
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
PML_thickness = temp;
|
|
|
objProp[i].set_PML_thick(PML_thickness);
|
|
|
cout << "PML_thickness: " << objProp[i].get_PML_thick() << endl;
|
|
|
|
|
|
// PML Geometry
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
Ellipse_Rx = temp;
|
|
|
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
Ellipse_Ry = temp;
|
|
|
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
Ellipse_Rz = temp;
|
|
|
|
|
|
cout << "Ellipse_Rx: " << Ellipse_Rx << endl;
|
|
|
cout << "Ellipse_Ry: " << Ellipse_Ry << endl;
|
|
|
cout << "Ellipse_Rz: " << Ellipse_Rz << endl;
|
|
|
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
planewave_xmin = temp;
|
|
|
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
planewave_xmax = temp;
|
|
|
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
planewave_ymin = temp;
|
|
|
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
planewave_ymax = temp;
|
|
|
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
planewave_zmin = temp;
|
|
|
|
|
|
#ifdef DGTD_USE_DOUBLE
|
|
|
fscanf(matFILE, "%le ", &temp);
|
|
|
#else
|
|
|
fscanf(matFILE, "%e ", &temp);
|
|
|
#endif
|
|
|
planewave_zmax = temp;
|
|
|
|
|
|
cout << "PML Region:\n";
|
|
|
cout << " x: [" << planewave_xmin << ", " << planewave_xmax << "]\n";
|
|
|
cout << " y: [" << planewave_ymin << ", " << planewave_ymax << "]\n";
|
|
|
cout << " z: [" << planewave_zmin << ", " << planewave_zmax << "]\n";
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
// Set Tetrahedron PML type false
|
|
|
objProp[i].set_PML_Flag(0);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fclose(matFILE);
|
|
|
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::readBC()
|
|
|
{
|
|
|
char name[StrLenShort], bcName[StrLenShort], portName[StrLenShort];
|
|
|
int i, pNum, bNum, bType;
|
|
|
fp_t impR, impI, magE;
|
|
|
fp_t x, y, z;
|
|
|
fp_t theta, phi;
|
|
|
fp_t rox, roy, roz;
|
|
|
fp_t r1x, r1y, r1z;
|
|
|
int PortFlag;
|
|
|
fp_t CHIRP_BW_MHZ;
|
|
|
fp_t phaseE;
|
|
|
fp_t port_dx, port_dy, port_dz;
|
|
|
fp_t vpath_x, vpath_y, vpath_z;
|
|
|
fp_t epr, mur;
|
|
|
|
|
|
PEC_PMC_port_flag = 0;
|
|
|
|
|
|
|
|
|
// For ports
|
|
|
bcNumToPnum.clear();
|
|
|
pnumToBcNum.clear();
|
|
|
|
|
|
// For PML
|
|
|
int pmlMode; // 0->radiation(port) , 1->scattering
|
|
|
fp_t pol_x, pol_y, pol_z;
|
|
|
PML_flag = false;
|
|
|
|
|
|
readBcMap();
|
|
|
|
|
|
sprintf(name, "%s.bc", fname);
|
|
|
ifstream foo(name, ios::in);
|
|
|
|
|
|
if(!foo){
|
|
|
cout << "File " << name << " does NOT exist " << endl;
|
|
|
exit(1);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
foo >> bcCNT;
|
|
|
|
|
|
bcARRAY = new bc[bcCNT];
|
|
|
portCNT = 0;
|
|
|
nonConformalCNT = 0;
|
|
|
for(i = 0; i < bcCNT; i ++)
|
|
|
{
|
|
|
foo >> bNum >> bcName;
|
|
|
bcARRAY[i].set_bNum(bNum); // id in file
|
|
|
bcARRAY[i].set_name(bcName); // name in file
|
|
|
bType = bcTypeConvert(bcName);
|
|
|
bcARRAY[i].set_bType(bType);
|
|
|
|
|
|
switch (bType)
|
|
|
{
|
|
|
case 0: // none
|
|
|
{
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
case pmcType: // pmc
|
|
|
{
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
case fieldPlaneType:
|
|
|
{
|
|
|
break; // fieldPlane
|
|
|
}
|
|
|
|
|
|
case outputSurfType:
|
|
|
{
|
|
|
cout << "outputSurfType" << endl;
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
case abcType:
|
|
|
{
|
|
|
foo >> impR; //abc
|
|
|
bcARRAY[i].set_rval(impR * No);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
case constE:
|
|
|
{
|
|
|
foo >> x >> y >> z; // constE
|
|
|
bcARRAY[i].SETFIELD(x, y, z);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
case pecType:
|
|
|
{
|
|
|
break; // pec
|
|
|
}
|
|
|
|
|
|
case impType:
|
|
|
{
|
|
|
foo >> impR >> impI; //original
|
|
|
bcARRAY[i].set_cval(impR, impI);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
case portType:
|
|
|
{
|
|
|
|
|
|
// (1) TEM rectangular port
|
|
|
// port <name> <pNum> 1 <impR> <impI> <magE> <dx> <dy> <dz> <BW> <epr> <mur> <vpath_x> <vpath_y> <vpath_z>
|
|
|
// (2) TEM coaxial port
|
|
|
// port <name> <pNum> 2 <impR> <impI> <magE> <dx> <dy> <dz> <BW> <epr> <mur> <r0x> <r0y> <r0z> <r1x> <r1y> <r1z> <r2x> <r2y> <r2z>
|
|
|
// (3) TE rectangular port (a is along height and b is along width)
|
|
|
// port <name> <pNum> 3 <impR> <impI> <magE> <dx> <dy> <dz> <BW> <epr> <mur> <a> <b> <m> <n> <uv0x> <uv0y> <uv0z> <vpx> <vpy> <vpz>
|
|
|
|
|
|
pNum = -1;
|
|
|
PortFlag = 0;
|
|
|
|
|
|
if (!(foo >> portName >> pNum >> PortFlag))
|
|
|
{
|
|
|
std::cerr << "[PORT] Failed to read <name pNum PortFlag>\n";
|
|
|
break;
|
|
|
}
|
|
|
cout << "pNum = " << pNum << endl;
|
|
|
|
|
|
// Initialization of the variables
|
|
|
impR=0.0, impI=0.0, magE=1.0;
|
|
|
port_dx=0.0, port_dy=0.0, port_dz=1.0;
|
|
|
CHIRP_BW_MHZ=0.0, epr=1.0, mur=1.0;
|
|
|
|
|
|
if (!(foo >> impR >> impI >> magE >> port_dx >> port_dy >> port_dz >> CHIRP_BW_MHZ >> epr >> mur))
|
|
|
{
|
|
|
std::cerr << "[PORT] Failed to read common fields for port " << portName << "\n";
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
// Book-keeping
|
|
|
bcARRAY[i].set_name(portName);
|
|
|
bcARRAY[i].set_cval(impR, impI);
|
|
|
bcARRAY[i].set_rval(impR);
|
|
|
bcARRAY[i].set_pNum(pNum);
|
|
|
bcARRAY[i].set_PortFlag(PortFlag);
|
|
|
|
|
|
portCNT++;
|
|
|
PWorPort = 1;
|
|
|
PortBCFlag = true;
|
|
|
|
|
|
// If user gives impR==0, let device compute eta
|
|
|
const double MU0 = 1.2566370614359173e-6; // 4π·1e-7
|
|
|
const double EPS0 = 8.854187817e-12;
|
|
|
const double PI = 3.14159265358979323846;
|
|
|
if (epr <= 0.0) epr = 1.0;
|
|
|
if (mur <= 0.0) mur = 1.0;
|
|
|
const double mu = mur * MU0;
|
|
|
const double eps = epr * EPS0;
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA) || defined(DGTD_USE_CUDA_OPENCL)
|
|
|
|
|
|
|
|
|
excitationProp.ExcitationFlag = ExcitFlag;
|
|
|
|
|
|
ExcitationProp portEx{};
|
|
|
portEx.portNum = pNum;
|
|
|
portEx.BCNum = i;
|
|
|
|
|
|
// Timing / envelope
|
|
|
portEx.TimeDistributionFlag = getTimeDist();
|
|
|
portEx.to = To;
|
|
|
portEx.tau = Tau;
|
|
|
portEx.freq_m = (fp_t_ts)freq; // MHz
|
|
|
portEx.CHIRP_BW_MHZ = (fp_t_ts)CHIRP_BW_MHZ;
|
|
|
|
|
|
// Medium / amplitude
|
|
|
portEx.epr = (fp_t_ts)((epr>0.0)? epr : 1.0);
|
|
|
portEx.mur = (fp_t_ts)((mur>0.0)? mur : 1.0);
|
|
|
portEx.Emagnitude = (fp_t_ts)magE;
|
|
|
|
|
|
// Direction vector (diagnostic; geometry gives unit normal)
|
|
|
portEx.PortDirection[0] = (fp_t_ts)port_dx;
|
|
|
portEx.PortDirection[1] = (fp_t_ts)port_dy;
|
|
|
portEx.PortDirection[2] = (fp_t_ts)port_dz;
|
|
|
|
|
|
// E/H field impedance (if 0, device computes implicitly)
|
|
|
portEx.PortImpedance = (fp_t_ts)impR;
|
|
|
portEx.PortFlag = PortFlag;
|
|
|
|
|
|
// Map BC <-> Port
|
|
|
bcNumToPnum[portEx.BCNum] = portEx.portNum;
|
|
|
pnumToBcNum[portEx.portNum] = portEx.BCNum;
|
|
|
|
|
|
|
|
|
// ---- Branch by PortFlag for extra fields ----
|
|
|
switch (PortFlag)
|
|
|
{
|
|
|
case 1: // TEM rectangular: needs vpath
|
|
|
{
|
|
|
double vpx=0, vpy=0, vpz=0;
|
|
|
if (!(foo >> vpx >> vpy >> vpz))
|
|
|
{
|
|
|
std::cerr << "[PORT] TEM-rect missing <vpath_x vpath_y vpath_z> for " << portName << "\n";
|
|
|
// Default vpath to PortDirection if absent
|
|
|
vpx = port_dx; vpy = port_dy; vpz = port_dz;
|
|
|
}
|
|
|
portEx.vpath[0] = (fp_t_ts)vpx;
|
|
|
portEx.vpath[1] = (fp_t_ts)vpy;
|
|
|
portEx.vpath[2] = (fp_t_ts)vpz;
|
|
|
|
|
|
|
|
|
|
|
|
if (impR == 0.0 && impI == 0.0)
|
|
|
{
|
|
|
double eta = std::sqrt(mu/eps); // η = sqrt(μ/ε)
|
|
|
portEx.PortImpedance = (fp_t_ts)eta;
|
|
|
bcARRAY[i].set_rval(eta);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
portEx.PortImpedance = (fp_t_ts)impR;
|
|
|
bcARRAY[i].set_rval(impR);
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
case 2: // TEM coax: needs r0 (center), r1 (inner), r2 (outer)
|
|
|
{
|
|
|
double r0x, r0y, r0z, r1x, r1y, r1z, r2x, r2y, r2z;
|
|
|
if (!(foo >> r0x >> r0y >> r0z
|
|
|
>> r1x >> r1y >> r1z
|
|
|
>> r2x >> r2y >> r2z)) {
|
|
|
std::cerr << "[PORT] TEM-coax missing r0/r1/r2 for " << portName << "\n";
|
|
|
// Provide safe defaults (degenerate; will inject 0)
|
|
|
r0x=r0y=r0z=0; r1x=1e-3; r1y=r1z=0; r2x=4e-3; r2y=r2z=0;
|
|
|
}
|
|
|
portEx.r0_port[0]=(fp_t_ts)r0x; portEx.r0_port[1]=(fp_t_ts)r0y; portEx.r0_port[2]=(fp_t_ts)r0z;
|
|
|
portEx.r1_port[0]=(fp_t_ts)r1x; portEx.r1_port[1]=(fp_t_ts)r1y; portEx.r1_port[2]=(fp_t_ts)r1z;
|
|
|
portEx.r2_port[0]=(fp_t_ts)r2x; portEx.r2_port[1]=(fp_t_ts)r2y; portEx.r2_port[2]=(fp_t_ts)r2z;
|
|
|
|
|
|
std::array<double,3> v10 = { r1x - r0x, r1y - r0y, r1z - r0z };
|
|
|
std::array<double,3> v20 = { r2x - r0x, r2y - r0y, r2z - r0z };
|
|
|
const double a = std::sqrt(v10[0]*v10[0] + v10[1]*v10[1] + v10[2]*v10[2]);
|
|
|
const double b = std::sqrt(v20[0]*v20[0] + v20[1]*v20[1] + v20[2]*v20[2]);
|
|
|
|
|
|
if (impR == 0.0 && impI == 0.0)
|
|
|
{
|
|
|
double eta = std::sqrt(mu/eps); // η = sqrt(μ/ε)
|
|
|
|
|
|
// Characteristic (V/I) line impedance of the coax
|
|
|
double Z0_line = std::numeric_limits<double>::quiet_NaN();
|
|
|
bool geom_ok = (a > 0.0) && (b > a);
|
|
|
if (geom_ok)
|
|
|
{
|
|
|
Z0_line = (eta / (2.0*PI)) * std::log(b/a);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
std::cerr << "[PORT] TEM-coax invalid radii (a=" << a << ", b=" << b
|
|
|
<< "). Using only field impedance eta for BC.\n";
|
|
|
}
|
|
|
|
|
|
|
|
|
portEx.PortImpedance = (fp_t_ts)Z0_line;
|
|
|
bcARRAY[i].set_rval(Z0_line);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
portEx.PortImpedance = (fp_t_ts)impR;
|
|
|
bcARRAY[i].set_rval(impR);
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
case 3: // TE_mn rectangular: needs rect_a rect_b m n uv0x uv0y uv0z vpx vpy vpz
|
|
|
{
|
|
|
double rect_a, rect_b;
|
|
|
int m, n;
|
|
|
double uv0x, uv0y, uv0z;
|
|
|
double vpx, vpy, vpz;
|
|
|
|
|
|
if (!(foo >> rect_a >> rect_b >> m >> n >> uv0x >> uv0y >> uv0z >> vpx >> vpy >> vpz))
|
|
|
{
|
|
|
std::cerr << "[PORT] TE_mn missing <a b m n uv0x uv0y uv0z vpx vpy vpz> for " << portName << "\n";
|
|
|
// Safe defaults (device clamps tiny a/b)
|
|
|
rect_a = 1.0; rect_b = 1.0; m = 1; n = 0;
|
|
|
uv0x = uv0y = uv0z = 0.0;
|
|
|
// use PortDirection as fallback vpath
|
|
|
vpx = port_dx; vpy = port_dy; vpz = port_dz;
|
|
|
}
|
|
|
|
|
|
portEx.rect_a = (fp_t_ts)rect_a;
|
|
|
portEx.rect_b = (fp_t_ts)rect_b;
|
|
|
portEx.m = m;
|
|
|
portEx.n = n;
|
|
|
portEx.uv0[0]=(fp_t_ts)uv0x;
|
|
|
portEx.uv0[1]=(fp_t_ts)uv0y;
|
|
|
portEx.uv0[2]=(fp_t_ts)uv0z;
|
|
|
|
|
|
// store the raw vpath too (optional, but handy for logging/diagnostics)
|
|
|
portEx.vpath[0] = (fp_t_ts)vpx;
|
|
|
portEx.vpath[1] = (fp_t_ts)vpy;
|
|
|
portEx.vpath[2] = (fp_t_ts)vpz;
|
|
|
|
|
|
// ---- Build t1, t2 from vpath and PortDirection (n) ----
|
|
|
// n = normalized PortDirection
|
|
|
double nx = port_dx, ny = port_dy, nz = port_dz;
|
|
|
double nrm = std::sqrt(nx*nx + ny*ny + nz*nz);
|
|
|
if (nrm < 1e-14) { nx = 0.0; ny = 0.0; nz = 1.0; nrm = 1.0; }
|
|
|
nx /= nrm; ny /= nrm; nz /= nrm;
|
|
|
|
|
|
double t1x = vpx;
|
|
|
double t1y = vpy;
|
|
|
double t1z = vpz;
|
|
|
|
|
|
// t2 = n × t1
|
|
|
double t2x = ny*t1z - nz*t1y;
|
|
|
double t2y = nz*t1x - nx*t1z;
|
|
|
double t2z = nx*t1y - ny*t1x;
|
|
|
double t2n = std::sqrt(t2x*t2x + t2y*t2y + t2z*t2z);
|
|
|
t2x /= t2n; t2y /= t2n; t2z /= t2n;
|
|
|
|
|
|
// store in the excitation
|
|
|
portEx.t1[0] = (fp_t_ts)t1x; portEx.t1[1] = (fp_t_ts)t1y; portEx.t1[2] = (fp_t_ts)t1z;
|
|
|
portEx.t2[0] = (fp_t_ts)t2x; portEx.t2[1] = (fp_t_ts)t2y; portEx.t2[2] = (fp_t_ts)t2z;
|
|
|
|
|
|
|
|
|
if (impR == 0.0 && impI == 0.0)
|
|
|
{
|
|
|
// Geometry (meters) & mode indices already read into rect_a, rect_b, m, n
|
|
|
const double a = (rect_a > 0.0) ? rect_a : 1e-12;
|
|
|
const double b = (rect_b > 0.0) ? rect_b : 1e-12;
|
|
|
|
|
|
// Frequency (MHz in your code)
|
|
|
const double omega = 2.0 * PI * freq * 1.0e6;
|
|
|
|
|
|
const double kc2 = std::pow(m*PI/a, 2.0) + std::pow(n*PI/b, 2.0); // k_cutoff^2
|
|
|
const double k2 = omega*omega * mu * eps; // k^2
|
|
|
|
|
|
double Z_TE_real = std::numeric_limits<double>::quiet_NaN();
|
|
|
double Z_TE_imag = 0.0;
|
|
|
|
|
|
if (k2 <= kc2)
|
|
|
{
|
|
|
// Below cutoff: Z_TE = -j*(ωμ/α), purely reactive
|
|
|
const double alpha = std::sqrt(kc2 - k2);
|
|
|
Z_TE_imag = -(omega * mu) / alpha;
|
|
|
Z_TE_real = 1e12; // large real placeholder for BC scalar
|
|
|
std::cerr << "[PORT] TE_mn below cutoff (a=" << a << ", b=" << b
|
|
|
<< ", m=" << m << ", n=" << n << "). Using large real Z for BC, "
|
|
|
<< "Im{Z_TE}=" << Z_TE_imag << " Ohm.\n";
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
// Above cutoff: Z_TE is real and positive
|
|
|
const double beta = std::sqrt(k2 - kc2);
|
|
|
Z_TE_real = (omega * mu) / beta;
|
|
|
}
|
|
|
|
|
|
// User asked us to determine impedance → store TE wave impedance
|
|
|
portEx.PortImpedance = (fp_t_ts)Z_TE_real;
|
|
|
bcARRAY[i].set_rval(Z_TE_real);
|
|
|
bcARRAY[i].set_cval(Z_TE_real, Z_TE_imag);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
// User-specified
|
|
|
portEx.PortImpedance = (fp_t_ts)impR;
|
|
|
bcARRAY[i].set_rval(impR);
|
|
|
bcARRAY[i].set_cval(impR, impI);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
default:
|
|
|
{
|
|
|
std::cerr << "[PORT] Unknown PortFlag=" << PortFlag << " for " << portName
|
|
|
<< ". Defaulting to TEM-rect with vpath=PortDirection.\n";
|
|
|
portEx.PortFlag = 1;
|
|
|
portEx.vpath[0] = (fp_t_ts)port_dx;
|
|
|
portEx.vpath[1] = (fp_t_ts)port_dy;
|
|
|
portEx.vpath[2] = (fp_t_ts)port_dz;
|
|
|
if (impR == 0.0 && impI == 0.0) portEx.PortImpedance = (fp_t_ts)0.0;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
portExcitations.push_back(portEx);
|
|
|
|
|
|
// Log summary
|
|
|
std::cout << "\n=========================\n"
|
|
|
<< " PORT BOUNDARY CONDITION \n"
|
|
|
<< "=========================\n"
|
|
|
<< "PortName : " << portName << "\n"
|
|
|
<< "PortNum : " << (portEx.portNum - 1) << "\n"
|
|
|
<< "PortFlag : " << portEx.PortFlag << " (1=TEM-rect, 2=TEM-coax, 3=TE_mn)\n"
|
|
|
<< "E/H Zport : " << portEx.PortImpedance << " + j" << impI << " (0 => implicit)\n"
|
|
|
<< "magE : " << portEx.Emagnitude << "\n"
|
|
|
<< "PortDir : (" << port_dx << ", " << port_dy << ", " << port_dz << ")\n"
|
|
|
<< "epr, mur : " << epr << ", " << mur << "\n";
|
|
|
|
|
|
if (portEx.PortFlag == 1)
|
|
|
{
|
|
|
std::cout << "vpath : (" << portEx.vpath[0] << ", " << portEx.vpath[1] << ", " << portEx.vpath[2] << ")\n";
|
|
|
}
|
|
|
else if (portEx.PortFlag == 2)
|
|
|
{
|
|
|
std::cout << "r0 : (" << portEx.r0_port[0] << ", " << portEx.r0_port[1] << ", " << portEx.r0_port[2] << ")\n"
|
|
|
<< "r1(inner) : (" << portEx.r1_port[0] << ", " << portEx.r1_port[1] << ", " << portEx.r1_port[2] << ")\n"
|
|
|
<< "r2(outer) : (" << portEx.r2_port[0] << ", " << portEx.r2_port[1] << ", " << portEx.r2_port[2] << ")\n";
|
|
|
}
|
|
|
else if (portEx.PortFlag == 3)
|
|
|
{
|
|
|
std::cout << "rect(a,b) : " << portEx.rect_a << ", " << portEx.rect_b << "\n"
|
|
|
<< "m,n : " << portEx.m << ", " << portEx.n << "\n"
|
|
|
<< "uv0 : (" << portEx.uv0[0] << ", " << portEx.uv0[1] << ", " << portEx.uv0[2] << ")\n";
|
|
|
}
|
|
|
std::cout << "=========================\n\n";
|
|
|
#endif
|
|
|
|
|
|
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
|
|
|
case planeWaveType: // planeWave (theta, phi, ex, ey, ez)
|
|
|
{
|
|
|
char typeName[StrLenShort];
|
|
|
foo >> typeName >> magE >> theta >> phi >> x >> y >> z >> rox >> roy >> roz;
|
|
|
|
|
|
cout << " " << endl;
|
|
|
cout << "====================================================================================================" << endl;
|
|
|
cout << " PLANEWAVE BOUNDARY CONDITION " << endl;
|
|
|
cout << "====================================================================================================" << endl;
|
|
|
printf(" PlaneWaveType : %f %f %f %f %f %f %f %f %f\n", magE, theta, phi, x, y, z, rox, roy, roz);
|
|
|
printf(" Unit : %f\n", unit);
|
|
|
bcARRAY[i].set_name(typeName);
|
|
|
bcARRAY[i].set_magE(magE);
|
|
|
bcARRAY[i].setTheta(theta);
|
|
|
bcARRAY[i].setPhi(phi);
|
|
|
bcARRAY[i].set_cval(No, 0.0);
|
|
|
bcARRAY[i].SETFIELD(x, y, z);
|
|
|
bcARRAY[i].setPW_ro(rox * unit, roy * unit, roz * unit);
|
|
|
|
|
|
cout << " Name : " << typeName << endl;
|
|
|
cout << " magE : " << magE << endl;
|
|
|
cout << " Theta : " << theta << endl;
|
|
|
cout << " Phi : " << phi << endl;
|
|
|
cout << " POL : " << "(" << x << ", " << y << ", " << z << ")" << endl;
|
|
|
cout << " r0 : " << "(" << rox << ", " << roy << ", " << roz << ")" << endl;
|
|
|
cout << "====================================================================================================" << endl;
|
|
|
cout << " " << endl;
|
|
|
|
|
|
PWorPort = 0;
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA) || defined(DGTD_USE_CUDA_OPENCL)
|
|
|
// for cuda kernel
|
|
|
excitationProp.ro[0] = rox * unit;
|
|
|
excitationProp.ro[1] = roy * unit;
|
|
|
excitationProp.ro[2] = roz * unit;
|
|
|
|
|
|
excitationProp.Emagnitude = magE;
|
|
|
|
|
|
excitationProp.Epol[0] = x;
|
|
|
excitationProp.Epol[1] = y;
|
|
|
excitationProp.Epol[2] = z;
|
|
|
|
|
|
excitationProp.ExcitationFlag = ExcitFlag;
|
|
|
excitationProp.freq_m = freq;
|
|
|
|
|
|
excitationProp.to = To;
|
|
|
excitationProp.tau = Tau;
|
|
|
|
|
|
excitationProp.phi = phi;
|
|
|
excitationProp.theta = theta;
|
|
|
#endif
|
|
|
|
|
|
interior_excitation_flag = false;
|
|
|
planeWaveMesh = new PlaneWaveMesh;
|
|
|
planeWaveMesh->setName(typeName);
|
|
|
PlaneWaveBCFlag = true;
|
|
|
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
case nonConformal:
|
|
|
{
|
|
|
nonConformalCase = true;
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
|
|
|
// Excitation Mode (PlaneWave into PML region)
|
|
|
case pmlType:
|
|
|
{
|
|
|
foo >> pmlMode >> portName >> magE >> theta >> phi >> pol_x >> pol_y >> pol_z >> rox >> roy >> roz;
|
|
|
|
|
|
PWorPort = 0;
|
|
|
|
|
|
std::cout << "\n";
|
|
|
std::cout << "====================================================================================================" << std::endl;
|
|
|
std::cout << " PML EXCITATION BOUNDARY CONDITION " << std::endl;
|
|
|
std::cout << "====================================================================================================" << std::endl;
|
|
|
printf(" PML Mode : %d\n", pmlMode);
|
|
|
printf(" Port Name : %s\n", portName);
|
|
|
printf(" magE : %f\n", magE);
|
|
|
printf(" Theta : %f\n", theta);
|
|
|
printf(" Phi : %f\n", phi);
|
|
|
printf(" POL : (%f, %f, %f)\n", pol_x, pol_y, pol_z);
|
|
|
printf(" r0 : (%f, %f, %f)\n", rox, roy, roz);
|
|
|
printf(" Unit : %f\n", unit); // Make sure `unit` is defined
|
|
|
std::cout << "====================================================================================================" << std::endl;
|
|
|
std::cout << "\n";
|
|
|
|
|
|
// Apply to BC object
|
|
|
bcARRAY[i].set_name(portName);
|
|
|
bcARRAY[i].set_magE(magE);
|
|
|
bcARRAY[i].setTheta(theta);
|
|
|
bcARRAY[i].setPhi(phi);
|
|
|
bcARRAY[i].set_cval(No, 0.0);
|
|
|
bcARRAY[i].SETFIELD(pol_x, pol_y, pol_z); // Assuming SETFIELD is for polarization
|
|
|
bcARRAY[i].setPW_ro(rox * unit, roy * unit, roz * unit);
|
|
|
|
|
|
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA) || defined(DGTD_USE_CUDA_OPENCL)
|
|
|
|
|
|
excitationProp.ro[0] = rox * unit;
|
|
|
excitationProp.ro[1] = roy * unit;
|
|
|
excitationProp.ro[2] = roz * unit;
|
|
|
|
|
|
excitationProp.Emagnitude = magE;
|
|
|
excitationProp.Epol[0] = pol_x;
|
|
|
excitationProp.Epol[1] = pol_y;
|
|
|
excitationProp.Epol[2] = pol_z;
|
|
|
|
|
|
excitationProp.ExcitationFlag = ExcitFlag; // Must be defined
|
|
|
excitationProp.freq_m = freq; // Must be defined
|
|
|
|
|
|
excitationProp.to = To; // Must be defined
|
|
|
excitationProp.tau = Tau; // Must be defined
|
|
|
|
|
|
excitationProp.phi = phi;
|
|
|
excitationProp.theta = theta;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
if (pmlMode == 1)
|
|
|
{
|
|
|
interior_excitation_flag = true;
|
|
|
|
|
|
planeWaveMesh = new PlaneWaveMesh;
|
|
|
planeWaveMesh->setName(portName);
|
|
|
PlaneWaveBCFlag = true;
|
|
|
}
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
Make the egde and face arrays
|
|
|
*/
|
|
|
|
|
|
int localEdgebType(int n, int nbc[]){
|
|
|
int bType, nb1, nb2;
|
|
|
|
|
|
switch (n){
|
|
|
case 0:
|
|
|
nb1 = nbc[2];
|
|
|
nb2 = nbc[3];
|
|
|
break;
|
|
|
case 1:
|
|
|
nb1 = nbc[1];
|
|
|
nb2 = nbc[3];
|
|
|
break;
|
|
|
case 2:
|
|
|
nb1 = nbc[1];
|
|
|
nb2 = nbc[2];
|
|
|
break;
|
|
|
case 3:
|
|
|
nb1 = nbc[0];
|
|
|
nb2 = nbc[3];
|
|
|
break;
|
|
|
case 4:
|
|
|
nb1 = nbc[0];
|
|
|
nb2 = nbc[2];
|
|
|
break;
|
|
|
case 5:
|
|
|
nb1 = nbc[0];
|
|
|
nb2 = nbc[1];
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
bType = (nb1 > nb2) ? nb1 : nb2; //original
|
|
|
|
|
|
return bType;
|
|
|
}
|
|
|
|
|
|
void FemGrp::makeEdgeArray(){
|
|
|
int i, j;
|
|
|
|
|
|
// oversized array for edge BCs
|
|
|
int* edgeBcs = new int[tetraCNT * 6];
|
|
|
// store global edge ids for set/array use
|
|
|
int** edgeIds = new int*[tetraCNT];
|
|
|
for(i = 0; i < tetraCNT; i++)
|
|
|
edgeIds[i] = new int[NumOfEdges];
|
|
|
|
|
|
int nbc[NumOfFaces];
|
|
|
list<edge*> edgeList;
|
|
|
list<edge*>::iterator edgeListIter;
|
|
|
edgeSetPtr = new set<edge>;
|
|
|
set<edge>::iterator edgeSetIter;
|
|
|
int index = 0;
|
|
|
for(i = 0; i < tetraCNT; i++){
|
|
|
tetra* tet = &(tetARRAY[i]);
|
|
|
|
|
|
for(j = 0; j < NumOfFaces; j++)
|
|
|
nbc[j] = bcArrange(tet->getbc(j)); //return the bc (the number define for the material) of each face
|
|
|
|
|
|
for(j = 0; j < NumOfEdges; j++){
|
|
|
int n0 = edgeMAP[j][0];
|
|
|
int n1 = edgeMAP[j][1];
|
|
|
int bType = localEdgebType(j, nbc); //return the most important bc of the edge checking both faces
|
|
|
|
|
|
node* nd0 = tet->getNode(n0);
|
|
|
node* nd1 = tet->getNode(n1);
|
|
|
edge* eg = new edge;
|
|
|
eg->setEdge(nd0, nd1);
|
|
|
|
|
|
//add each edge just once
|
|
|
edgeSetIter = edgeSetPtr->find(*eg);
|
|
|
if(edgeSetIter == edgeSetPtr->end()){
|
|
|
// new edge
|
|
|
eg->setGlobalCnt(index);
|
|
|
edgeIds[i][j] = index;
|
|
|
eg->setbType(bType);
|
|
|
edgeBcs[index] = bType;
|
|
|
edgeSetPtr->insert(*eg);
|
|
|
edgeList.push_back(eg);
|
|
|
index++;
|
|
|
}else{
|
|
|
// set the boundary condicion of higher value if the edge was already set
|
|
|
delete eg;
|
|
|
edgeIds[i][j] = edgeSetIter->getGlobalCnt();
|
|
|
if(bType > edgeSetIter->getbType()){
|
|
|
edgeBcs[edgeIds[i][j]] = bType;
|
|
|
(const_cast<edge&>(*edgeSetIter)).setbType(bType);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
// convert the list into an array
|
|
|
edgeCNT = edgeList.size();
|
|
|
cout << " edgeCNT == " << edgeCNT << endl;
|
|
|
edgeARRAY = new edge*[edgeCNT];
|
|
|
index = 0;
|
|
|
for(edgeListIter = edgeList.begin(); edgeListIter != edgeList.end(); edgeListIter++)
|
|
|
edgeARRAY[index++] = *edgeListIter;
|
|
|
|
|
|
// set the boundary conditions
|
|
|
for(i = 0; i < edgeCNT; i++)
|
|
|
edgeARRAY[i]->setbType(edgeBcs[i]);
|
|
|
delete [] edgeBcs;
|
|
|
|
|
|
// get tetra-edge linkage
|
|
|
for(i = 0; i < tetraCNT; i++){
|
|
|
for(j = 0; j < NumOfEdges; j++)
|
|
|
tetARRAY[i].setEdge(edgeARRAY[edgeIds[i][j]], j);
|
|
|
}
|
|
|
for(i = 0; i < tetraCNT; i++)
|
|
|
delete [] edgeIds[i];
|
|
|
|
|
|
delete [] edgeIds;
|
|
|
}
|
|
|
|
|
|
void FemGrp::makeNonConformalArray(){
|
|
|
ncARRAY = new int[nonConformalCNT];
|
|
|
int index = 0;
|
|
|
for(int i=0; i < tetraCNT; i++){
|
|
|
tetra* tet = &(tetARRAY[i]);
|
|
|
if(tet->getIsNC()){
|
|
|
ncARRAY[index] = tet->cnt;
|
|
|
index++;
|
|
|
}
|
|
|
}
|
|
|
if(nonConformalCNT != index)
|
|
|
cout << "ERROR in makeNonConformalArray" << endl;
|
|
|
}
|
|
|
|
|
|
|
|
|
void FemGrp::makeFaceArray()
|
|
|
{
|
|
|
int i, j;
|
|
|
|
|
|
// oversized arrays for face BCs and a map from global IDs with PEC faces to IDs without PEC face
|
|
|
int* faceBcs = new int[tetraCNT * NumOfFaces];
|
|
|
int* indexMap = new int[tetraCNT * NumOfFaces]; //TODO: review what's the use of this array
|
|
|
memset(faceBcs, 0, tetraCNT * NumOfFaces * sizeof(int));
|
|
|
memset(indexMap, 0, tetraCNT * NumOfFaces * sizeof(int));
|
|
|
|
|
|
// store global face ids for set/array use
|
|
|
int** faceIds = new int*[tetraCNT];
|
|
|
for(i = 0; i < tetraCNT; i++){
|
|
|
faceIds[i] = new int[NumOfFaces];
|
|
|
memset(faceIds[i], 0, NumOfFaces * sizeof(int));
|
|
|
}
|
|
|
|
|
|
edge eg;
|
|
|
list<face*> faceList;
|
|
|
vector<face*> faceListVector;
|
|
|
list<face*>::iterator faceListIter;
|
|
|
faceSetPtr = new set<face>;
|
|
|
set<face>::iterator faceSetIter;
|
|
|
int index = 0;
|
|
|
int indexNoPec = 0; //TODO: review what's the use of this variable
|
|
|
for(i = 0; i < tetraCNT; i++){
|
|
|
tetra* tet = &(tetARRAY[i]);
|
|
|
|
|
|
for(j = 0; j < NumOfFaces; j++){
|
|
|
int bcNum = tet->getbc(j); // marker
|
|
|
int bType = bcArrange(bcNum); // bc type in the defines
|
|
|
bc* bcPtr = getbcPtr(bcNum); // pointer to the bc
|
|
|
if(bType == nonConformal && !(tet->isNonConformal)){
|
|
|
nonConformalCNT++;
|
|
|
tet->setIsNC(true);
|
|
|
}
|
|
|
node* nd0 = tet->getNode(faceMAP[j][0]);
|
|
|
node* nd1 = tet->getNode(faceMAP[j][1]);
|
|
|
node* nd2 = tet->getNode(faceMAP[j][2]);
|
|
|
face* fc = new face;
|
|
|
fc->setFace(nd0, nd1, nd2); //set a face with the nodes ordered from smallest to greatest id
|
|
|
faceSetIter = faceSetPtr->find(*fc);
|
|
|
if(faceSetIter == faceSetPtr->end()){
|
|
|
// new face
|
|
|
fc->setcnt(index);
|
|
|
faceIds[i][j] = index;
|
|
|
|
|
|
if(bType != pecType)
|
|
|
indexMap[index] = indexNoPec++;
|
|
|
|
|
|
faceBcs[index] = bType;
|
|
|
fc->setbcPtr(bcPtr);
|
|
|
|
|
|
// set up face-edge linkage
|
|
|
eg.setEdge(nd1, nd2);
|
|
|
fc->setEdge(edgeARRAY[edgeSetPtr->find(eg)->getGlobalCnt()], 0);
|
|
|
eg.setEdge(nd0, nd2);
|
|
|
fc->setEdge(edgeARRAY[edgeSetPtr->find(eg)->getGlobalCnt()], 1);
|
|
|
eg.setEdge(nd1, nd0);
|
|
|
fc->setEdge(edgeARRAY[edgeSetPtr->find(eg)->getGlobalCnt()], 2);
|
|
|
|
|
|
index++;
|
|
|
faceSetPtr->insert(*fc);
|
|
|
faceList.push_back(fc);
|
|
|
faceListVector.push_back(fc);
|
|
|
}else{
|
|
|
delete fc;
|
|
|
|
|
|
faceIds[i][j] = faceSetIter->getcnt(); // the j-th local face of tetra i is an old face
|
|
|
if(bType > faceSetIter->getbType()){ // choose btype with a larger value
|
|
|
faceBcs[faceIds[i][j]] = bType;
|
|
|
(const_cast<face&>(*faceSetIter)).setbType(bType);
|
|
|
|
|
|
face* f = faceListVector[faceIds[i][j]];
|
|
|
f->setbType(bType);
|
|
|
f->setbcPtr(bcPtr);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// convert the list into an array
|
|
|
int totalFaceCount = faceList.size();
|
|
|
cout << " totalFaceCount == " << totalFaceCount << endl;
|
|
|
face** totalFaceArray = new face*[totalFaceCount];
|
|
|
index = 0;
|
|
|
for(faceListIter = faceList.begin(); faceListIter != faceList.end(); faceListIter++)
|
|
|
totalFaceArray[index++] = *faceListIter;
|
|
|
|
|
|
// set the boundary conditions
|
|
|
for(i = 0; i < totalFaceCount; i++){
|
|
|
totalFaceArray[i]->setbType(faceBcs[i]);
|
|
|
}
|
|
|
|
|
|
// set tetra-face linkage
|
|
|
for(i = 0; i < tetraCNT; i++){
|
|
|
tetra* tet = &(tetARRAY[i]);
|
|
|
for(j = 0; j < 4; j++){
|
|
|
face* fc = totalFaceArray[faceIds[i][j]];
|
|
|
tet->setFace(fc, j);
|
|
|
if(fc->hydra[0] == nullptr){ // newly found face linkage
|
|
|
fc->hydra[0] = tet;
|
|
|
} else { // already existed, half-linked
|
|
|
fc->hydra[1] = tet;
|
|
|
fc->tetraArrange(); //order hydra[0] < hydra[1]
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
for(i = 0; i < tetraCNT; i++)
|
|
|
delete [] faceIds[i];
|
|
|
delete [] faceIds;
|
|
|
delete [] totalFaceArray;
|
|
|
delete [] faceBcs;
|
|
|
delete [] indexMap;
|
|
|
|
|
|
// convert the reduced list into an array
|
|
|
faceCNT = faceList.size();
|
|
|
faceARRAY = new face*[faceCNT];
|
|
|
|
|
|
indexNoPec = 0;
|
|
|
for(faceListIter = faceList.begin(); faceListIter != faceList.end(); faceListIter++)
|
|
|
faceARRAY[indexNoPec++] = *faceListIter;
|
|
|
|
|
|
while (faceSetIter != faceSetPtr->end()){
|
|
|
set<face>::iterator tmpIter = faceSetIter;
|
|
|
faceSetIter++;
|
|
|
faceSetPtr->erase(tmpIter);
|
|
|
}
|
|
|
faceSetPtr->clear();
|
|
|
delete faceSetPtr;
|
|
|
|
|
|
set<edge>::iterator edgeSetIter = edgeSetPtr->begin();
|
|
|
while(edgeSetIter != edgeSetPtr->end()){
|
|
|
set<edge>::iterator tmpIter = edgeSetIter;
|
|
|
edgeSetIter++;
|
|
|
edgeSetPtr->erase(tmpIter);
|
|
|
}
|
|
|
edgeSetPtr->clear();
|
|
|
delete edgeSetPtr;
|
|
|
}
|
|
|
|
|
|
int FemGrp::bcArrange(int bNum){
|
|
|
// from that indicated in file to type defined in bc.h (marker to bc type)
|
|
|
for(int i = 0; i < bcCNT; i ++){
|
|
|
if(bcARRAY[i].getbNum() == bNum)
|
|
|
return bcARRAY[i].getbType();
|
|
|
}
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
bc *FemGrp::getbcPtr(int bNum){
|
|
|
for(int i = 0; i < bcCNT; i ++)
|
|
|
if(bcARRAY[i].getbNum() == bNum)
|
|
|
return &(bcARRAY[i]);
|
|
|
return nullptr;
|
|
|
}
|
|
|
|
|
|
void FemGrp::AssignExcitParamToFace(){
|
|
|
for(int i = 0; i < faceCNT; i++){
|
|
|
faceARRAY[i]->setTo(To);
|
|
|
faceARRAY[i]->setTau(Tau);
|
|
|
faceARRAY[i]->setTimeDist(TimeDistFlag);
|
|
|
faceARRAY[i]->setExciFlag(ExcitFlag);
|
|
|
faceARRAY[i]->setFrequency(freq);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::AssignMaterialProperties(){
|
|
|
int i;
|
|
|
tetra *tet;
|
|
|
|
|
|
for(i = 0; i < tetraCNT; i++)
|
|
|
{
|
|
|
tet = &(tetARRAY[i]);
|
|
|
tet->SetFacePEC();
|
|
|
tet->SetFacePMC();
|
|
|
tet->set_mat(&(objProp[tet->getobjNum()]));
|
|
|
tet->set_ConductivityFlag();
|
|
|
|
|
|
// Additional routine for scattering region
|
|
|
if (tet->getMat()->scattering_region)
|
|
|
{
|
|
|
tet->scattering_region = true;
|
|
|
}
|
|
|
|
|
|
// Additional routine for PML
|
|
|
if (tet->getMat()->get_PML_Flag() == 1)
|
|
|
{
|
|
|
tet->set_PML_Flag(1);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
tet->set_PML_Flag(0);
|
|
|
}
|
|
|
if (tet->get_PML_Flag() == -1) cout << "PML_Flag() not set " << endl;
|
|
|
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::AssignTetraFlags(){
|
|
|
int AbcCount = 0;
|
|
|
int InterCount = 0;
|
|
|
int PortCount = 0;
|
|
|
|
|
|
tetra *tet;
|
|
|
cout << " " << endl;
|
|
|
cout << "======================================================" << endl;
|
|
|
cout << " Total number of TetraHedra " << endl;
|
|
|
cout << "======================================================" << endl;
|
|
|
cout << " Total number of TetraHedra := " << tetraCNT << endl;
|
|
|
|
|
|
|
|
|
// Parallelized by Qi Jian
|
|
|
|
|
|
#pragma omp parallel for
|
|
|
for(int i = 0; i < tetraCNT; i++)
|
|
|
{
|
|
|
tet = &(tetARRAY[i]);
|
|
|
tet->set_TetrahedronFlag();
|
|
|
}
|
|
|
|
|
|
|
|
|
double min_AABB_size = 3e8 / (freq * 1e6) / 10.0;
|
|
|
// For every tetrahedron, set the neighbor tetrahedra
|
|
|
#pragma omp parallel for
|
|
|
for(int i = 0; i < tetraCNT; i++)
|
|
|
{
|
|
|
tet = &(tetARRAY[i]);
|
|
|
tet->set_NeighborTetra(tetARRAY, ncARRAY, nonConformalCNT, &octree_object, min_AABB_size);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for(int i = 0; i < tetraCNT; i++)
|
|
|
{
|
|
|
tet = &(tetARRAY[i]);
|
|
|
tet->set_PolyOrderFlagDebug(PolyFlag);
|
|
|
|
|
|
// The following code is node thread safe.
|
|
|
if (tet->TetrahedronFlag == 0) InterCount++;
|
|
|
if (tet->TetrahedronFlag == 1) AbcCount++;
|
|
|
if (tet->ExcitationFlag == 1) PortCount++;
|
|
|
}
|
|
|
|
|
|
|
|
|
cout << " Total number of P" << PolyFlag << " TetraHedra := " << tetraCNT << endl;
|
|
|
|
|
|
cout << " Total number of Interior TetraHedra := " << InterCount << endl;
|
|
|
cout << " Total number of AbcCount TetraHedra := " << AbcCount << endl;
|
|
|
cout << " Total number of Port/PlaneWave TetraHedra := " << PortCount << endl;
|
|
|
cout << "======================================================" << endl;
|
|
|
cout << " " << endl;
|
|
|
|
|
|
int min_poly = tetARRAY[0].get_PolyOrderFlag();
|
|
|
for(int i = 1; i < tetraCNT; i++){
|
|
|
if(tetARRAY[i].get_PolyOrderFlag() < min_poly)
|
|
|
min_poly = tetARRAY[i].get_PolyOrderFlag();
|
|
|
}
|
|
|
|
|
|
for(int i = 0; i < tetraCNT; i++)
|
|
|
tetARRAY[i].set_MinimumPoly(min_poly);
|
|
|
|
|
|
// Define Excitation tetrahedral
|
|
|
TetExcitIndexArraySize = PortCount;
|
|
|
TetExcitIndexArray = (int*)malloc(sizeof(int) * TetExcitIndexArraySize);
|
|
|
int index = 0;
|
|
|
for(int i = 0; i < tetraCNT; i ++){
|
|
|
tet = &(tetARRAY[i]);
|
|
|
if(tet->ExcitationFlag == 1){
|
|
|
TetExcitIndexArray[index] = i;
|
|
|
index++;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::makePlaneWaveMesh(){
|
|
|
int i, j;
|
|
|
set<int> meshNodeIds;
|
|
|
|
|
|
// count the number of plane wave faces
|
|
|
int pwFaceNum = 0;
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if(faceARRAY[i]->getbType() == planeWaveType || faceARRAY[i]->getbType() == pmlType)
|
|
|
pwFaceNum++;
|
|
|
}
|
|
|
|
|
|
// set planeWaveMesh_'s faceCnt_ and allocate its faceArray_
|
|
|
planeWaveMesh->setFaceCnt(pwFaceNum);
|
|
|
cout << " pwFaceNum == " << pwFaceNum << endl;
|
|
|
cout << " planeWaveMesh->faceCNT == " << planeWaveMesh->faceCNT << endl;
|
|
|
|
|
|
// populate faceArray_
|
|
|
int index = 0;
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if(faceARRAY[i]->getbType() == planeWaveType || faceARRAY[i]->getbType() == pmlType){
|
|
|
planeWaveMesh->setFace(faceARRAY[i], index);
|
|
|
index++;
|
|
|
// add unique node ids
|
|
|
for(j = 0; j < NumOfNodesPerFace; j++)
|
|
|
meshNodeIds.insert(faceARRAY[i]->getNode(j)->getid());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// allocate and add node pointers to array keep local mapping
|
|
|
int nodeNum = meshNodeIds.size();
|
|
|
planeWaveMesh->setNodeCnt(nodeNum);
|
|
|
cout << " nodeNum == " << nodeNum << endl;
|
|
|
cout << " planeWaveMesh->nodeCNT == " << planeWaveMesh->nodeCNT << endl;
|
|
|
|
|
|
planeWaveMesh->allocGlobToLocMap();
|
|
|
node** PlaneWaveNodeArray = planeWaveMesh->getNodeArray();
|
|
|
map<int, int>& globToLocMap = planeWaveMesh->getGlobToLocMap();
|
|
|
set<int>::iterator meshNodeIdIter;
|
|
|
int nodeCount = 0;
|
|
|
for(meshNodeIdIter = meshNodeIds.begin(); meshNodeIdIter != meshNodeIds.end(); meshNodeIdIter++){
|
|
|
PlaneWaveNodeArray[nodeCount] = &(ndARRAY[*meshNodeIdIter]);
|
|
|
globToLocMap[ndARRAY[*meshNodeIdIter].getid()] = nodeCount++;
|
|
|
}
|
|
|
|
|
|
|
|
|
// Set the bounding box coordinates for the Planewave mesh
|
|
|
// Useful for PML
|
|
|
/*
|
|
|
planeWaveMesh->computeBoundingBox();
|
|
|
planewave_xmin = planeWaveMesh->getXmin();
|
|
|
planewave_xmax = planeWaveMesh->getXmax();
|
|
|
planewave_ymin = planeWaveMesh->getYmin();
|
|
|
planewave_ymax = planeWaveMesh->getYmax();
|
|
|
planewave_zmin = planeWaveMesh->getZmin();
|
|
|
planewave_zmax = planeWaveMesh->getZmax();
|
|
|
|
|
|
cout << "Planewave bounding box coordinates: " << std::endl;
|
|
|
cout << "xmin: " << planewave_xmin << ", xmax: " << planewave_xmax << std::endl;
|
|
|
cout << "ymin: " << planewave_ymin << ", ymax: " << planewave_ymax << std::endl;
|
|
|
cout << "zmin: " << planewave_zmin << ", zmax: " << planewave_zmax << std::endl;
|
|
|
*/
|
|
|
|
|
|
}
|
|
|
|
|
|
// Single BC_ID
|
|
|
void FemGrp::makeInterSurfMesh(int BC_id){
|
|
|
cout << " Generating InterSurf Mesh with " << BC_id << endl;
|
|
|
InterSurfMesh = new PlaneWaveMesh;
|
|
|
int i, j;
|
|
|
set<int> InterSurfNodeIds;
|
|
|
|
|
|
// count the number of faces
|
|
|
int InterFaceNum = 0;
|
|
|
|
|
|
int* FaceMap = new int[faceCNT];
|
|
|
for(i = 0; i < faceCNT; i++)
|
|
|
FaceMap[i] = -1;
|
|
|
|
|
|
// Find the faces
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if(faceARRAY[i]->getbcPtr()->getbType() == BC_id){ //change
|
|
|
InterFaceNum++;
|
|
|
FaceMap[i] = i;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if(InterFaceNum == 0)
|
|
|
return;
|
|
|
|
|
|
// set InterSurfMesh_'s faceCnt_ and allocate its faceArray_
|
|
|
cout << " InterFaceNum == " << InterFaceNum << endl;
|
|
|
InterSurfMesh->setFaceCnt(InterFaceNum);
|
|
|
cout << " FaceNum == " << InterFaceNum << endl;
|
|
|
cout << " ->faceCNT == " << InterSurfMesh->faceCNT << endl;
|
|
|
|
|
|
// populate faceArray_
|
|
|
int index = 0;
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if(FaceMap[i] > 0){
|
|
|
InterSurfMesh->setFace(faceARRAY[i], index);
|
|
|
index++;
|
|
|
// add unique node ids
|
|
|
for(j = 0; j < NumOfNodesPerFace; j++)
|
|
|
InterSurfNodeIds.insert(faceARRAY[i]->getNode(j)->getid());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// allocate and add node pointers to array
|
|
|
// keep local mapping
|
|
|
int nodeNum = InterSurfNodeIds.size();
|
|
|
InterSurfMesh->setNodeCnt(nodeNum);
|
|
|
cout << " nodeNum == " << nodeNum << endl;
|
|
|
cout << " ->nodeCNT == " << InterSurfMesh->nodeCNT << endl;
|
|
|
InterSurfMesh->allocGlobToLocMap();
|
|
|
node** InterSurfNodeArray = InterSurfMesh->getNodeArray();
|
|
|
map<int, int>& globToLocMap = InterSurfMesh->getGlobToLocMap();
|
|
|
set<int>::iterator meshNodeIdIter;
|
|
|
|
|
|
int nodeCount = 0;
|
|
|
for(meshNodeIdIter = InterSurfNodeIds.begin(); meshNodeIdIter != InterSurfNodeIds.end(); meshNodeIdIter++){
|
|
|
InterSurfNodeArray[nodeCount] = &(ndARRAY[*meshNodeIdIter]);
|
|
|
globToLocMap[ndARRAY[*meshNodeIdIter].getid()] = nodeCount++;
|
|
|
}
|
|
|
|
|
|
//write file
|
|
|
char Currents_vtkFile[StrOutput];
|
|
|
sprintf(Currents_vtkFile, "SurfBC_%s_%d", fname, BC_id);
|
|
|
node** locNodeArray = new node*[InterSurfMesh->nodeCNT];
|
|
|
for(i = 0; i < InterSurfMesh->nodeCNT; i++){
|
|
|
node& Node = *(InterSurfMesh->ndArray[i]);
|
|
|
index = InterSurfMesh->globToLocMap_->find(Node.getid())->second;
|
|
|
locNodeArray[index] = new node(index,
|
|
|
Node.getPType(),
|
|
|
Node.getSingOrder(),
|
|
|
Node.getCoord().getx(),
|
|
|
Node.getCoord().gety(),
|
|
|
Node.getCoord().getz());
|
|
|
}
|
|
|
|
|
|
face** locFaceArray = new face*[InterSurfMesh->faceCNT];
|
|
|
for(i = 0; i < InterSurfMesh->faceCNT; i++){
|
|
|
face& Face = *(InterSurfMesh->fcArray[i]);
|
|
|
locFaceArray[i] = new face(Face);
|
|
|
locFaceArray[i]->setFace(locNodeArray[InterSurfMesh->globToLocMap_->find(Face.getNode(0)->getid())->second],
|
|
|
locNodeArray[InterSurfMesh->globToLocMap_->find(Face.getNode(1)->getid())->second],
|
|
|
locNodeArray[InterSurfMesh->globToLocMap_->find(Face.getNode(2)->getid())->second]);
|
|
|
}
|
|
|
//TODO: check why unit is 1. instead of unit
|
|
|
VtkWriter vtkWriter(1.);
|
|
|
//TODO: check why order is 1. instead of order
|
|
|
vtkWriter.writeTriUg(Currents_vtkFile, InterSurfMesh->nodeCNT, locNodeArray, InterSurfMesh->faceCNT, locFaceArray, 1);
|
|
|
|
|
|
for(i = 0; i < InterSurfMesh->nodeCNT; i++)
|
|
|
delete locNodeArray[i];
|
|
|
|
|
|
delete [] locNodeArray;
|
|
|
|
|
|
for(i = 0; i < InterSurfMesh->faceCNT; i++)
|
|
|
delete locFaceArray[i];
|
|
|
|
|
|
delete [] locFaceArray;
|
|
|
}
|
|
|
|
|
|
// Double BC_ID
|
|
|
void FemGrp::makeInterSurfMesh(int BC_id1,int BC_id2){
|
|
|
|
|
|
InterSurfMesh = new PlaneWaveMesh;
|
|
|
int i, j;
|
|
|
set<int> InterSurfNodeIds;
|
|
|
// count the number of faces
|
|
|
int InterFaceNum = 0;
|
|
|
int* FaceMap = new int[faceCNT];
|
|
|
for(i = 0; i < faceCNT; i++) FaceMap[i] = -1;
|
|
|
|
|
|
// Find the faces
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if((faceARRAY[i]->getbcPtr()->getbType() == BC_id1) || (faceARRAY[i]->getbcPtr()->getbType() == BC_id2)){
|
|
|
InterFaceNum++;
|
|
|
FaceMap[i] = i;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if(InterFaceNum == 0) return;
|
|
|
// set InterSurfMesh_'s faceCnt_ and allocate its faceArray_
|
|
|
cout << "== InterFaceNum == " << InterFaceNum << endl;
|
|
|
InterSurfMesh->setFaceCnt(InterFaceNum);
|
|
|
cout << "== FaceNum == " << InterFaceNum << endl;
|
|
|
cout << "== ->faceCNT == " << InterSurfMesh->faceCNT << endl;
|
|
|
// populate faceArray_
|
|
|
int index = 0;
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if(FaceMap[i] > 0){
|
|
|
InterSurfMesh->setFace(faceARRAY[i], index);
|
|
|
index++;
|
|
|
// add unique node ids
|
|
|
for(j = 0; j < NumOfNodesPerFace; j++)
|
|
|
InterSurfNodeIds.insert(faceARRAY[i]->getNode(j)->getid());
|
|
|
}
|
|
|
}
|
|
|
// allocate and add node pointers to array
|
|
|
// keep local mapping
|
|
|
int nodeNum = InterSurfNodeIds.size();
|
|
|
InterSurfMesh->setNodeCnt(nodeNum);
|
|
|
cout << "== nodeNum == " << nodeNum << endl;
|
|
|
cout << "== ->nodeCNT == " << InterSurfMesh->nodeCNT << endl;
|
|
|
InterSurfMesh->allocGlobToLocMap();
|
|
|
node** InterSurfNodeArray = InterSurfMesh->getNodeArray();
|
|
|
map<int, int>& globToLocMap = InterSurfMesh->getGlobToLocMap();
|
|
|
set<int>::iterator meshNodeIdIter;
|
|
|
int nodeCount = 0;
|
|
|
for(meshNodeIdIter = InterSurfNodeIds.begin(); meshNodeIdIter != InterSurfNodeIds.end(); meshNodeIdIter++){
|
|
|
InterSurfNodeArray[nodeCount] = &(ndARRAY[*meshNodeIdIter]);
|
|
|
globToLocMap[ndARRAY[*meshNodeIdIter].getid()] = nodeCount++;
|
|
|
}
|
|
|
//write file
|
|
|
char Currents_vtkFile[StrOutput];
|
|
|
sprintf(Currents_vtkFile, "SurfBC_%s", fname);
|
|
|
node** locNodeArray = new node*[InterSurfMesh->nodeCNT];
|
|
|
for(i = 0; i < InterSurfMesh->nodeCNT; i++){
|
|
|
node& Node = *(InterSurfMesh->ndArray[i]);
|
|
|
index = InterSurfMesh->globToLocMap_->find(Node.getid())->second;
|
|
|
locNodeArray[index] = new node(index, Node.getPType(), Node.getSingOrder(), Node.getCoord().getx(), Node.getCoord().gety(), Node.getCoord().getz());
|
|
|
}
|
|
|
|
|
|
face** locFaceArray = new face*[InterSurfMesh->faceCNT];
|
|
|
for(i = 0; i < InterSurfMesh->faceCNT; i++){
|
|
|
face& Face = *(InterSurfMesh->fcArray[i]);
|
|
|
locFaceArray[i] = new face(Face);
|
|
|
locFaceArray[i]->setFace(
|
|
|
locNodeArray[InterSurfMesh->globToLocMap_->find(Face.getNode(0)->getid())->second],
|
|
|
locNodeArray[InterSurfMesh->globToLocMap_->find(Face.getNode(1)->getid())->second],
|
|
|
locNodeArray[InterSurfMesh->globToLocMap_->find(Face.getNode(2)->getid())->second]);
|
|
|
}
|
|
|
|
|
|
//TODO: check why unit is 1. instead of unit
|
|
|
VtkWriter vtkWriter(1.);
|
|
|
//TODO: check why order is 1. instead of order
|
|
|
vtkWriter.writeTriUg(Currents_vtkFile, InterSurfMesh->nodeCNT, locNodeArray, InterSurfMesh->faceCNT, locFaceArray, 1);
|
|
|
|
|
|
for(i = 0; i < InterSurfMesh->nodeCNT; i++)
|
|
|
delete locNodeArray[i];
|
|
|
delete [] locNodeArray;
|
|
|
for(i = 0; i < InterSurfMesh->faceCNT; i++)
|
|
|
delete locFaceArray[i];
|
|
|
delete [] locFaceArray;
|
|
|
}
|
|
|
|
|
|
void FemGrp::makeSurfMesh(int BC_id){
|
|
|
cout << "Generating Surf Mesh with " << BC_id << endl;
|
|
|
SurfMesh = new PlaneWaveMesh;
|
|
|
int i, j;
|
|
|
set<int> InterSurfNodeIds;
|
|
|
// count the number of faces
|
|
|
int InterFaceNum = 0;
|
|
|
int* FaceMap = new int[faceCNT];
|
|
|
for(i = 0; i < faceCNT; i++)
|
|
|
FaceMap[i] = -1;
|
|
|
|
|
|
// Find the faces
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if(faceARRAY[i]->getbcPtr()->getbType() == BC_id){ //change
|
|
|
InterFaceNum++;
|
|
|
FaceMap[i] = i;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if(InterFaceNum == 0)
|
|
|
return;
|
|
|
|
|
|
// set SurfMesh_'s faceCnt_ and allocate its faceArray_
|
|
|
cout << "== InterFaceNum == " << InterFaceNum << endl;
|
|
|
SurfMesh->setFaceCnt(InterFaceNum);
|
|
|
cout << "== FaceNum == " << InterFaceNum << endl;
|
|
|
cout << "== ->faceCNT == " << SurfMesh->faceCNT << endl;
|
|
|
// populate faceArray_
|
|
|
int index = 0;
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if(FaceMap[i] > 0){
|
|
|
SurfMesh->setFace(faceARRAY[i], index);
|
|
|
index++;
|
|
|
// add unique node ids
|
|
|
for(j = 0; j < NumOfNodesPerFace; j++)
|
|
|
InterSurfNodeIds.insert(faceARRAY[i]->getNode(j)->getid());
|
|
|
}
|
|
|
}
|
|
|
// allocate and add node pointers to array
|
|
|
// keep local mapping
|
|
|
int nodeNum = InterSurfNodeIds.size();
|
|
|
SurfMesh->setNodeCnt(nodeNum);
|
|
|
cout << "== nodeNum == " << nodeNum << endl;
|
|
|
cout << "== ->nodeCNT == " << SurfMesh->nodeCNT << endl;
|
|
|
SurfMesh->allocGlobToLocMap();
|
|
|
node** InterSurfNodeArray = SurfMesh->getNodeArray();
|
|
|
map<int, int>& globToLocMap = SurfMesh->getGlobToLocMap();
|
|
|
set<int>::iterator meshNodeIdIter;
|
|
|
int nodeCount = 0;
|
|
|
for(meshNodeIdIter = InterSurfNodeIds.begin(); meshNodeIdIter != InterSurfNodeIds.end(); meshNodeIdIter++){
|
|
|
InterSurfNodeArray[nodeCount] = &(ndARRAY[*meshNodeIdIter]);
|
|
|
globToLocMap[ndARRAY[*meshNodeIdIter].getid()] = nodeCount++;
|
|
|
}
|
|
|
//write file
|
|
|
char Currents_vtkFile[StrOutput];
|
|
|
sprintf(Currents_vtkFile, "SurfBC_%s_%d", fname, BC_id);
|
|
|
node** locNodeArray = new node*[SurfMesh->nodeCNT];
|
|
|
for(i = 0; i < SurfMesh->nodeCNT; i++){
|
|
|
node& Node = *(SurfMesh->ndArray[i]);
|
|
|
index = SurfMesh->globToLocMap_->find(Node.getid())->second;
|
|
|
locNodeArray[index] = new node(index, Node.getPType(), Node.getSingOrder(), Node.getCoord().getx(), Node.getCoord().gety(), Node.getCoord().getz());
|
|
|
}
|
|
|
|
|
|
face** locFaceArray = new face*[SurfMesh->faceCNT];
|
|
|
for(i = 0; i < SurfMesh->faceCNT; i++){
|
|
|
face& Face = *(SurfMesh->fcArray[i]);
|
|
|
locFaceArray[i] = new face(Face);
|
|
|
locFaceArray[i]->setFace(locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(0)->getid())->second],
|
|
|
locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(1)->getid())->second],
|
|
|
locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(2)->getid())->second]);
|
|
|
}
|
|
|
|
|
|
//TODO: check why unit is 1. instead of unit (it may be because the node coordinates are already scaled after readin. So they are true unit of the geometry)
|
|
|
VtkWriter vtkWriter(1.);
|
|
|
vtkWriter.writeTriUg(Currents_vtkFile, SurfMesh->nodeCNT, locNodeArray, SurfMesh->faceCNT, locFaceArray, 1); //The one is because we only work with first order geometry (modify if we want to work with higher order structures)
|
|
|
|
|
|
for(i = 0; i < SurfMesh->nodeCNT; i++)
|
|
|
delete locNodeArray[i];
|
|
|
delete [] locNodeArray;
|
|
|
for(i = 0; i < SurfMesh->faceCNT; i++)
|
|
|
delete locFaceArray[i];
|
|
|
delete [] locFaceArray;
|
|
|
}
|
|
|
|
|
|
// Double BC_ID
|
|
|
void FemGrp::makeSurfMesh(int BC_id1,int BC_id2){
|
|
|
|
|
|
SurfMesh = new PlaneWaveMesh;
|
|
|
int i, j;
|
|
|
set<int> InterSurfNodeIds;
|
|
|
// count the number of faces
|
|
|
int InterFaceNum = 0;
|
|
|
int* FaceMap = new int[faceCNT];
|
|
|
for(i = 0; i < faceCNT; i++)
|
|
|
FaceMap[i] = -1;
|
|
|
|
|
|
// Find the faces
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if((faceARRAY[i]->getbcPtr()->getbType() == BC_id1) || (faceARRAY[i]->getbcPtr()->getbType() == BC_id2)){
|
|
|
InterFaceNum++;
|
|
|
FaceMap[i] = i;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if(InterFaceNum == 0)
|
|
|
return;
|
|
|
// set SurfMesh_'s faceCnt_ and allocate its faceArray_
|
|
|
cout << "== InterFaceNum == " << InterFaceNum << endl;
|
|
|
SurfMesh->setFaceCnt(InterFaceNum);
|
|
|
cout << "== FaceNum == " << InterFaceNum << endl;
|
|
|
cout << "== ->faceCNT == " << SurfMesh->faceCNT << endl;
|
|
|
// populate faceArray_
|
|
|
int index = 0;
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
if(FaceMap[i] > 0){
|
|
|
SurfMesh->setFace(faceARRAY[i], index);
|
|
|
index++;
|
|
|
// add unique node ids
|
|
|
for(j = 0; j < NumOfNodesPerFace; j++)
|
|
|
InterSurfNodeIds.insert(faceARRAY[i]->getNode(j)->getid());
|
|
|
}
|
|
|
}
|
|
|
// allocate and add node pointers to array
|
|
|
// keep local mapping
|
|
|
int nodeNum = InterSurfNodeIds.size();
|
|
|
SurfMesh->setNodeCnt(nodeNum);
|
|
|
cout << "== nodeNum == " << nodeNum << endl;
|
|
|
cout << "== ->nodeCNT == " << SurfMesh->nodeCNT << endl;
|
|
|
SurfMesh->allocGlobToLocMap();
|
|
|
node** InterSurfNodeArray = SurfMesh->getNodeArray();
|
|
|
map<int, int>& globToLocMap = SurfMesh->getGlobToLocMap();
|
|
|
set<int>::iterator meshNodeIdIter;
|
|
|
int nodeCount = 0;
|
|
|
for(meshNodeIdIter = InterSurfNodeIds.begin(); meshNodeIdIter != InterSurfNodeIds.end(); meshNodeIdIter++){
|
|
|
InterSurfNodeArray[nodeCount] = &(ndARRAY[*meshNodeIdIter]);
|
|
|
globToLocMap[ndARRAY[*meshNodeIdIter].getid()] = nodeCount++;
|
|
|
}
|
|
|
|
|
|
//write file
|
|
|
char Currents_vtkFile[StrOutput];
|
|
|
sprintf(Currents_vtkFile, "SurfBC_%s", fname);
|
|
|
node** locNodeArray = new node*[SurfMesh->nodeCNT];
|
|
|
for(i = 0; i < SurfMesh->nodeCNT; i++){
|
|
|
node& Node = *(SurfMesh->ndArray[i]);
|
|
|
index = SurfMesh->globToLocMap_->find(Node.getid())->second;
|
|
|
locNodeArray[index] = new node(index, Node.getPType(), Node.getSingOrder(), Node.getCoord().getx(), Node.getCoord().gety(), Node.getCoord().getz());
|
|
|
}
|
|
|
|
|
|
face** locFaceArray = new face*[SurfMesh->faceCNT];
|
|
|
for(i = 0; i < SurfMesh->faceCNT; i++){
|
|
|
face& Face = *(SurfMesh->fcArray[i]);
|
|
|
locFaceArray[i] = new face(Face);
|
|
|
locFaceArray[i]->setFace(
|
|
|
locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(0)->getid())->second],
|
|
|
locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(1)->getid())->second],
|
|
|
locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(2)->getid())->second]);
|
|
|
}
|
|
|
|
|
|
//TODO: check why unit is 1. instead of unit
|
|
|
VtkWriter vtkWriter(1.);
|
|
|
//TODO: check why order is 1. instead of order
|
|
|
vtkWriter.writeTriUg(Currents_vtkFile, SurfMesh->nodeCNT, locNodeArray, SurfMesh->faceCNT, locFaceArray, 1);
|
|
|
|
|
|
for(i = 0; i < SurfMesh->nodeCNT; i++)
|
|
|
delete locNodeArray[i];
|
|
|
delete [] locNodeArray;
|
|
|
for(i = 0; i < SurfMesh->faceCNT; i++)
|
|
|
delete locFaceArray[i];
|
|
|
delete [] locFaceArray;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Set up the tet mass matrices and also the local inverses
|
|
|
// If non-matrix free is used also precompute and store the update matrices
|
|
|
void FemGrp::GetMatrices(){
|
|
|
int i;
|
|
|
tetra *tet;
|
|
|
timer_start("CPU Matrices Evaluation",'u');
|
|
|
// this gets the mass matrices for the local tets only
|
|
|
|
|
|
cout << "tetraCNT = " << tetraCNT << endl;
|
|
|
//std::vector<fp_t> vec_x1, vec_y1, vec_z1;
|
|
|
//std::vector<fp_t> vec_A2x, vec_A2y, vec_A2z;
|
|
|
|
|
|
//fp_t cutoff_freq = freq * 1e6; // Convert MHz to Hz
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma omp parallel for schedule(dynamic) private(tet,i)
|
|
|
for(i = 0; i < tetraCNT; i ++)
|
|
|
{
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
|
|
|
|
|
|
|
|
|
//cout << "regularRegionFlag = " << regularRegionFlag << endl;
|
|
|
//cout << "regularReferenceARRAY[" << i << "] = " << regularReferenceARRAY[i] << endl;
|
|
|
|
|
|
//It is important in this order to avoid the checking of a null pointer
|
|
|
if(!regularRegionFlag || regularReferenceARRAY[i] == i)
|
|
|
{
|
|
|
|
|
|
tet = &(tetARRAY[i]);
|
|
|
tet->set_flux_GAMMA(factor_Flux);
|
|
|
bool isPML = tet->get_PML_Flag();
|
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------------
|
|
|
if (isPML)
|
|
|
{
|
|
|
|
|
|
tet->set_Conductivity_Profile_Planar(planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax);
|
|
|
|
|
|
if (UseQuadratureMatrices)
|
|
|
{
|
|
|
tensor identity(1.0, 0.0, 0.0,
|
|
|
0.0, 1.0, 0.0,
|
|
|
0.0, 0.0, 1.0);
|
|
|
|
|
|
tet->Calculate_M_Matrix_E_Numeric();
|
|
|
tet->Calculate_M_Matrix_I_E_Numeric();
|
|
|
tet->Calculate_ABC_E_Numeric();
|
|
|
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_epA_E, tet->matA, tet->mat->epsr, true,
|
|
|
"A", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // epA
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_epB_E, tet->matB, tet->mat->epsr, true,
|
|
|
"B", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // epB
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_epC_E, tet->matC, tet->mat->epsr, true,
|
|
|
"C", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // epC
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_D_E, tet->matD, identity, true,
|
|
|
"D", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // D
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_F_E, tet->matF, identity, true,
|
|
|
"F", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // F
|
|
|
|
|
|
tet->Calculate_Bii_Matrix_E_Numeric();
|
|
|
tet->Calculate_Bij_Matrix_E_Numeric();
|
|
|
tet->Calculate_S_Matrix_E_Numeric();
|
|
|
tet->Calculate_Fii_Matrix_E_Numeric();
|
|
|
tet->Calculate_Fij_Matrix_E_Numeric();
|
|
|
|
|
|
tet->SetUp_LocalFaceToTetraMapE_NMF1_PML(tet->Class_dt);
|
|
|
|
|
|
tet->Calculate_M_Matrix_H_Numeric();
|
|
|
tet->Calculate_M_Matrix_I_H_Numeric();
|
|
|
tet->Calculate_ABC_H_Numeric();
|
|
|
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_muA_H, tet->matA, tet->mat->mur, false,
|
|
|
"A", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // muA
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_muB_H, tet->matB, tet->mat->mur, false,
|
|
|
"B", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // muB
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_muC_H, tet->matC, tet->mat->mur, false,
|
|
|
"C", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // muC
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_D_H, tet->matD, identity, false,
|
|
|
"D", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // D
|
|
|
tet->Calculate_Mass_Material_Matrix_Vary_Numeric( tet->Mass_F_H, tet->matF, identity, false,
|
|
|
"F", planewave_xmin, planewave_ymin, planewave_zmin,
|
|
|
planewave_xmax, planewave_ymax, planewave_zmax,
|
|
|
Ellipse_Rx, Ellipse_Ry, Ellipse_Rz); // F
|
|
|
|
|
|
|
|
|
tet->Calculate_Bii_Matrix_H_Numeric();
|
|
|
tet->Calculate_Bij_Matrix_H_Numeric();
|
|
|
tet->Calculate_S_Matrix_H_Numeric();
|
|
|
tet->Calculate_Fii_Matrix_H_Numeric();
|
|
|
tet->Calculate_Fij_Matrix_H_Numeric();
|
|
|
tet->SetUp_LocalFaceToTetraMapH_NMF1_PML(tet->Class_dt);
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
{
|
|
|
|
|
|
tensor identity(1.0, 0.0, 0.0,
|
|
|
0.0, 1.0, 0.0,
|
|
|
0.0, 0.0, 1.0);
|
|
|
|
|
|
tet->Calculate_M_Matrix_E();
|
|
|
|
|
|
tet->Calculate_M_Matrix_I_E();
|
|
|
tet->Calculate_ABC_E();
|
|
|
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_epA_E, tet->matA, tet->mat->epsr, true); // epA
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_epB_E, tet->matB, tet->mat->epsr, true); // epB
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_epC_E, tet->matC, tet->mat->epsr, true); // epC
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_D_E, tet->matD, identity, true); // D
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_F_E, tet->matF, identity, true); // F
|
|
|
|
|
|
tet->Calculate_Bii_Matrix_E();
|
|
|
tet->Calculate_Bij_Matrix_E();
|
|
|
tet->Calculate_S_Matrix_E();
|
|
|
tet->Calculate_Fii_Matrix_E();
|
|
|
tet->Calculate_Fij_Matrix_E();
|
|
|
|
|
|
|
|
|
tet->SetUp_LocalFaceToTetraMapE_NMF1_PML(tet->Class_dt);
|
|
|
|
|
|
tet->Calculate_M_Matrix_H();
|
|
|
|
|
|
tet->Calculate_M_Matrix_I_H();
|
|
|
tet->Calculate_ABC_H();
|
|
|
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_muA_H, tet->matA, tet->mat->mur, false); // muA
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_muB_H, tet->matB, tet->mat->mur, false); // muB
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_muC_H, tet->matC, tet->mat->mur, false); // muC
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_D_H, tet->matD, identity, false); // D
|
|
|
tet->Calculate_Mass_Material_Matrix( tet->Mass_F_H, tet->matF, identity, false); // F
|
|
|
|
|
|
tet->Calculate_Bii_Matrix_H();
|
|
|
tet->Calculate_Bij_Matrix_H();
|
|
|
tet->Calculate_S_Matrix_H();
|
|
|
tet->Calculate_Fii_Matrix_H();
|
|
|
tet->Calculate_Fij_Matrix_H();
|
|
|
|
|
|
tet->SetUp_LocalFaceToTetraMapH_NMF1_PML(tet->Class_dt);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// PML
|
|
|
// -------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
else
|
|
|
{
|
|
|
if (UseQuadratureMatrices)
|
|
|
{
|
|
|
|
|
|
tet->Calculate_M_Matrix_E_Numeric();
|
|
|
tet->Calculate_M_Matrix_H_Numeric();
|
|
|
tet->Calculate_Bii_Matrix_E_Numeric();
|
|
|
tet->Calculate_Bij_Matrix_E_Numeric();
|
|
|
tet->Calculate_S_Matrix_E_Numeric();
|
|
|
tet->Calculate_Fii_Matrix_E_Numeric();
|
|
|
tet->Calculate_Fij_Matrix_E_Numeric();
|
|
|
tet->SetUp_LocalFaceToTetraMapE_NMF1_Numeric(tet->Class_dt);
|
|
|
|
|
|
tet->Calculate_Bii_Matrix_H_Numeric();
|
|
|
tet->Calculate_Bij_Matrix_H_Numeric();
|
|
|
tet->Calculate_S_Matrix_H_Numeric();
|
|
|
tet->Calculate_Fii_Matrix_H_Numeric();
|
|
|
tet->Calculate_Fij_Matrix_H_Numeric();
|
|
|
|
|
|
tet->SetUp_LocalFaceToTetraMapH_NMF1_Numeric(tet->Class_dt);
|
|
|
}
|
|
|
|
|
|
else
|
|
|
{
|
|
|
tet->Calculate_M_Matrix_E();
|
|
|
tet->Calculate_M_Matrix_H();
|
|
|
|
|
|
tet->Calculate_Bii_Matrix_E();
|
|
|
tet->Calculate_Bij_Matrix_E();
|
|
|
tet->Calculate_S_Matrix_E();
|
|
|
tet->Calculate_Fii_Matrix_E();
|
|
|
tet->Calculate_Fij_Matrix_E();
|
|
|
tet->SetUp_LocalFaceToTetraMapE_NMF1(tet->Class_dt);
|
|
|
|
|
|
tet->Calculate_Bii_Matrix_H();
|
|
|
tet->Calculate_Bij_Matrix_H();
|
|
|
tet->Calculate_S_Matrix_H();
|
|
|
tet->Calculate_Fii_Matrix_H();
|
|
|
tet->Calculate_Fij_Matrix_H();
|
|
|
tet->SetUp_LocalFaceToTetraMapH_NMF1(tet->Class_dt);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#else
|
|
|
tet = &(tetARRAY[i]);
|
|
|
tet->set_flux_GAMMA(factor_Flux);
|
|
|
tet->Calculate_M_Matrix_E();
|
|
|
tet->Calculate_M_Matrix_H();
|
|
|
|
|
|
// this
|
|
|
tet->Calculate_Bii_Matrix_E();
|
|
|
tet->Calculate_Bij_Matrix_E();
|
|
|
tet->Calculate_S_Matrix_E();
|
|
|
tet->Calculate_Fii_Matrix_E();
|
|
|
tet->Calculate_Fij_Matrix_E();
|
|
|
tet->SetUp_LocalFaceToTetraMapE_NMF1(tet->Class_dt);
|
|
|
|
|
|
tet->Calculate_Bii_Matrix_H();
|
|
|
tet->Calculate_Bij_Matrix_H();
|
|
|
tet->Calculate_S_Matrix_H();
|
|
|
tet->Calculate_Fii_Matrix_H();
|
|
|
tet->Calculate_Fij_Matrix_H();
|
|
|
tet->SetUp_LocalFaceToTetraMapH_NMF1(tet->Class_dt);
|
|
|
#endif
|
|
|
}
|
|
|
timer_stop('u');
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
void FemGrp::SetUpMatrixVector(){
|
|
|
DimE = dimE;
|
|
|
DimH = dimH;
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
// MemSizeE = DimE * sizeof(fp_t_ts);
|
|
|
// MemSizeH = DimH * sizeof(fp_t_ts);
|
|
|
|
|
|
// CUDA_SAFE_CALL(cudaMallocHost((void**)&En1_h, MemSizeE, cudaHostAllocMapped));
|
|
|
// CUDA_SAFE_CALL(cudaMallocHost((void**)&Hn32_h, MemSizeH, cudaHostAllocMapped));
|
|
|
#else
|
|
|
MemSizeE = DimE * sizeof(fp_t);
|
|
|
MemSizeH = DimH * sizeof(fp_t);
|
|
|
|
|
|
en = new ArrayFP<fp_t>(DimE);
|
|
|
hn_12 = new ArrayFP<fp_t>(DimH);
|
|
|
en_1 = new ArrayFP<fp_t>(DimE);
|
|
|
hn_32 = new ArrayFP<fp_t>(DimH);
|
|
|
#endif
|
|
|
|
|
|
// pre-compute the facial matrices required for coupling
|
|
|
#pragma omp parallel for schedule(static)
|
|
|
for(int i = 0; i < faceCNT; i++)
|
|
|
faceARRAY[i]->SetUpMatrixFree();
|
|
|
|
|
|
// #pragma omp parallel for schedule(dynamic) private(tet,i)
|
|
|
#pragma omp parallel for schedule(dynamic)
|
|
|
for(int i = 0; i < tetraCNT; i++){
|
|
|
tetARRAY[i].SetUpMatrixFree();
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
void FemGrp::DG_AssignOffsets(){
|
|
|
int i;
|
|
|
int OffsetE = 0;
|
|
|
int OffsetH = 0;
|
|
|
tetra* tet;
|
|
|
|
|
|
for(i = 0; i < tetraCNT; i ++){
|
|
|
tet = &(tetARRAY[i]);
|
|
|
tet->CountDOF_E();
|
|
|
tet->CountDOF_H();
|
|
|
|
|
|
dimE = dimE + tet->LocalEDOF;
|
|
|
dimH = dimH + tet->LocalHDOF;
|
|
|
|
|
|
tet->set_LocalOffsetE(OffsetE);
|
|
|
OffsetE = OffsetE + tet->LocalEDOF;
|
|
|
|
|
|
tet->set_LocalOffsetH(OffsetH);
|
|
|
OffsetH = OffsetH + tet->LocalHDOF;
|
|
|
}
|
|
|
cout << " " << endl;
|
|
|
cout << "=================" << endl;
|
|
|
cout << " Dimensions " << endl;
|
|
|
cout << "=================" << endl;
|
|
|
cout << " dimE = " << dimE << endl;
|
|
|
cout << " dimH = " << dimH << endl;
|
|
|
cout << "=================" << endl;
|
|
|
cout << " " << endl;
|
|
|
}
|
|
|
|
|
|
void FemGrp::Get_dt_min_max(){
|
|
|
int printSc = tetraCNT / 10;
|
|
|
fp_t V_P;
|
|
|
fp_t LocaldtMin = 1.0 * 1e6;
|
|
|
fp_t LocalDt;
|
|
|
fp_t LocaldtMax = 0.0;
|
|
|
|
|
|
// #pragma omp parallel for schedule(dynamic) shared(LocaldtMin) private(LocalDt, V_P)
|
|
|
for(int i = 0; i < tetraCNT; i ++){
|
|
|
tetra* tet = &(tetARRAY[i]);
|
|
|
|
|
|
tet->TimeStepEstimate(LocalDt, V_P);
|
|
|
tet->set_Stability_dt(LocalDt); // May 5 2011
|
|
|
|
|
|
if(LocalDt < LocaldtMin){
|
|
|
#pragma omp atomic write
|
|
|
LocaldtMin = LocalDt;
|
|
|
}
|
|
|
|
|
|
if(LocalDt > LocaldtMax){
|
|
|
#pragma omp atomic write
|
|
|
LocaldtMax = LocalDt;
|
|
|
}
|
|
|
|
|
|
if(i % printSc == 0)
|
|
|
DEBUG_INFO(" Finished: " + to_string(i / (fp_t)tetraCNT * 100.0) + " %");
|
|
|
}
|
|
|
dt_min = LocaldtMin;
|
|
|
dt_max = LocaldtMax;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::LocalTimeSteppingClassPartioning()
|
|
|
{
|
|
|
cout.setf(ios::scientific,ios_base::floatfield);
|
|
|
cout.precision(20);
|
|
|
|
|
|
cout << " " << endl;
|
|
|
cout << "========================================================" << endl;
|
|
|
cout << " LocalTimeSteppingClassPartioning " << endl << flush;
|
|
|
cout << "========================================================" << endl;
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
// In this part we calculate the minimum and maximum time-step, with these //
|
|
|
// values, we calculate the number of classes and the ttime-step of each class as: //
|
|
|
// dt_k = (2.0 * m + 1)^k * dt_min //
|
|
|
// - m = class factor //
|
|
|
// - k = number of the class(starts in 0) //
|
|
|
// - dt_k = timestep of class k //
|
|
|
// - dt_min = minimun timestep //
|
|
|
// we also assign to each tetra the class they belong to //
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
int ClassCnt = 0;
|
|
|
int PMLClassCnt = 0; // For PML
|
|
|
|
|
|
setClassMul(1);// this is actually the m not (2m+1)
|
|
|
fp_t m = getClassMul();
|
|
|
cout << " Class Factor: (2m + 1), m = " << m << " " << endl << flush;
|
|
|
cout << " " << endl;
|
|
|
fp_t LocalDt;
|
|
|
fp_t LocalDt_down;
|
|
|
fp_t LocalDt_up;
|
|
|
tetra *tet;
|
|
|
cout << " Calculating Time steps " << endl;
|
|
|
Get_dt_min_max();
|
|
|
cout << " " << endl;
|
|
|
cout << " Get_dt_min = " << dt_min << endl;
|
|
|
cout << " Get_dt_max = " << dt_max << endl;
|
|
|
cout << " " << endl;
|
|
|
|
|
|
cout.setf(ios::scientific,ios_base::floatfield);
|
|
|
cout.precision(8);
|
|
|
|
|
|
cout << " Starting class partitioning" << endl;
|
|
|
N_class = (int)ceil(log((dt_max / dt_min)) / log(2.0 * m + 1.0));
|
|
|
|
|
|
if(scalbSty == 1 || N_class == 0) //only 1 if DGTD_USE_LTS is NOT defined
|
|
|
N_class = 1;
|
|
|
|
|
|
LocTimeSteps = new double[N_class];
|
|
|
ClassTetraCnt = new int[N_class];
|
|
|
ClassPMLTetraCnt = new int[N_class];
|
|
|
|
|
|
for(int i = 0 ; i < N_class; i++)
|
|
|
{
|
|
|
ClassTetraCnt[i] = 0;
|
|
|
ClassPMLTetraCnt[i] = 0;
|
|
|
}
|
|
|
|
|
|
cout << " " << endl;
|
|
|
cout << " N_class: " << N_class << endl;
|
|
|
|
|
|
if(scalbSty)
|
|
|
TimeStep_dt = dt_min;
|
|
|
|
|
|
numberPML = 0;
|
|
|
|
|
|
|
|
|
for(int i = 0 ; i < N_class; i++)
|
|
|
{
|
|
|
LocalDt_down = pow((2.0 * m + 1.0), i) * dt_min;
|
|
|
LocalDt_up = pow((2.0 * m + 1.0), (i + 1)) * dt_min;
|
|
|
LocTimeSteps[i] = 1.0 * LocalDt_down;
|
|
|
|
|
|
#pragma omp parallel for schedule(dynamic) shared(ClassCnt,PMLClassCnt) private(tet, LocalDt)
|
|
|
for(int j = 0; j < tetraCNT; j ++)
|
|
|
{
|
|
|
tet = &(tetARRAY[j]);
|
|
|
if(scalbSty)
|
|
|
{
|
|
|
tet->set_LTS_Flag(i);
|
|
|
tet->set_Class_dt(1.0 * LocalDt_down);
|
|
|
bool isExcitation = tet->get_ExcitationFlag();
|
|
|
|
|
|
#pragma omp atomic
|
|
|
ClassCnt++;
|
|
|
|
|
|
if (tet->get_PML_Flag() && !isExcitation)
|
|
|
{
|
|
|
#pragma omp atomic
|
|
|
PMLClassCnt++;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
// Increment the count of tetrahedra in this class
|
|
|
#pragma omp atomic
|
|
|
ClassCnt++;
|
|
|
}
|
|
|
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
LocalDt = tet->get_Stability_dt();
|
|
|
//LocalDt = 0.93 * LocalDt;
|
|
|
if(LocalDt_down <= LocalDt && (LocalDt < LocalDt_up || i == N_class - 1))
|
|
|
{
|
|
|
tet->set_LTS_Flag(i);
|
|
|
tet->set_Class_dt(1.0 * LocalDt_down);
|
|
|
bool isExcitation = tet->get_ExcitationFlag();
|
|
|
|
|
|
if (tet->get_PML_Flag() && !isExcitation)
|
|
|
{
|
|
|
#pragma omp atomic
|
|
|
PMLClassCnt++;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
// Increment the count of tetrahedra in this class
|
|
|
#pragma omp atomic
|
|
|
ClassCnt++;
|
|
|
}
|
|
|
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
ClassTetraCnt[i] = ClassCnt;
|
|
|
ClassPMLTetraCnt[i] = PMLClassCnt;
|
|
|
|
|
|
numberPML += PMLClassCnt;
|
|
|
|
|
|
cout << " Number of Tetra in class: " << i << " = " << ClassTetraCnt[i] << endl;
|
|
|
cout << " Number of PML Tetra in class: " << i << " = " << ClassPMLTetraCnt[i] << std::endl;
|
|
|
cout << "-------------------------------------------------------------" << endl;
|
|
|
ClassCnt = 0;
|
|
|
PMLClassCnt = 0;
|
|
|
}
|
|
|
cout << "Total Number of PML Tetras = " << numberPML << endl;
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////
|
|
|
// In this part we check if there is enough elements in one class to be efficient //
|
|
|
// if not, those elements will be moved to the previous class //
|
|
|
////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
if(N_class > 1)
|
|
|
{
|
|
|
bool reduceN_class = false;
|
|
|
bool balanced = false;
|
|
|
for(int i = 0; i < N_class - 1; i++)
|
|
|
{
|
|
|
int classN = (N_class - 1) - i;
|
|
|
|
|
|
fp_t number_of_tetra_in_classN = (fp_t)ClassTetraCnt[classN] + (fp_t)ClassPMLTetraCnt[classN];
|
|
|
fp_t relClassCnt = number_of_tetra_in_classN / tetraCNT;
|
|
|
fp_t previousClassDt = pow((2.0 * m + 1.0), classN - 1) * dt_min;
|
|
|
if (relClassCnt < ClassRelMinCNT && number_of_tetra_in_classN < ClassMinCNT)
|
|
|
{
|
|
|
if(i == 0)
|
|
|
{
|
|
|
reduceN_class = true;
|
|
|
}
|
|
|
balanced = true;
|
|
|
ClassTetraCnt[classN - 1] += ClassTetraCnt[classN];
|
|
|
ClassTetraCnt[classN] = 0;
|
|
|
ClassPMLTetraCnt[classN - 1] += ClassPMLTetraCnt[classN];
|
|
|
ClassPMLTetraCnt[classN] = 0;
|
|
|
#pragma omp parallel for schedule(dynamic) private(tet)
|
|
|
for(int j = 0; j < tetraCNT; j ++)
|
|
|
{
|
|
|
tet = &(tetARRAY[j]);
|
|
|
if(tetARRAY[j].get_LTS_Flag() == classN)
|
|
|
{
|
|
|
tet->set_LTS_Flag(classN - 1);
|
|
|
tet->set_Class_dt(1.0 * previousClassDt);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if(reduceN_class)
|
|
|
{
|
|
|
N_class -= 1;
|
|
|
}
|
|
|
|
|
|
if(balanced)
|
|
|
{
|
|
|
cout << "=================================" << endl;
|
|
|
|
|
|
cout << "Classes have been balanced\n";
|
|
|
for (int i = 0; i < N_class; i++)
|
|
|
{
|
|
|
cout << " Number of Tetra in class: " << i << " = " << ClassTetraCnt[i] << std::endl;
|
|
|
cout << " Number of PML Tetra in class: " << i << " = " << ClassPMLTetraCnt[i] << std::endl << endl;
|
|
|
}
|
|
|
cout << "=================================" << endl;
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Check that all the elements are associated with a class
|
|
|
for(int j = 0; j < tetraCNT; j ++)
|
|
|
{
|
|
|
if(tetARRAY[j].get_LTS_Flag() < 0)
|
|
|
cout << " tet " << tetARRAY[j].getcnt() << " has LTS_flag = " << tetARRAY[j].get_LTS_Flag() << " and LTS time step " << tetARRAY[j].get_Class_dt() << endl;
|
|
|
}
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// In this part we order the tetras in the most efficient way for the GPU //
|
|
|
// - 1st: we order by class, from smaller time-step to larger //
|
|
|
// - 2nd: each class is ordered by nonConformal tetras 1st and then conformal ones //
|
|
|
// - 3rd: we order the nonconformal ones as: excitation (ordered by number of exciting faces 1-2-3) - nonExcitation //
|
|
|
// - 4th: we order the conformal ones as: nonRegular - Reg1 - Reg2 - ... //
|
|
|
// //
|
|
|
// *** NOTE: in nonConformal we also include any tetra with a face without neighbor *** //
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------------------------------
|
|
|
// Determine cutoff between Normal-regular groups and Regular-PML groups.
|
|
|
// Assumptions:
|
|
|
// - regularGroup == 0 -> Irregular (both non-PML and PML)
|
|
|
// - regularGroup > 0 -> Regular
|
|
|
// - Groups are assigned so that all non-PML regular groups use smaller IDs
|
|
|
// than any PML regular groups (i.e., there exists a clean cutoff).
|
|
|
//
|
|
|
// Outputs:
|
|
|
// regularCNT_Normal : number of regular groups used by non-PML (g in [1 .. cutoff-1])
|
|
|
// regularCNT_PML : number of regular groups used by PML (g in [cutoff .. regularCNT-1])
|
|
|
// -----------------------------------------------------------------------------------------------------
|
|
|
|
|
|
cout << "-----------------------" << endl;
|
|
|
|
|
|
|
|
|
|
|
|
if (regularCNT > 1)
|
|
|
{
|
|
|
regularCNT_Normal = 0;
|
|
|
for(int j = 0; j < tetraCNT; j ++)
|
|
|
{
|
|
|
tet = &(tetARRAY[j]);
|
|
|
int groupID = tet->getRegularGroup();
|
|
|
bool isPML = tet->get_PML_Flag();
|
|
|
|
|
|
if (!isPML)
|
|
|
{
|
|
|
if ((groupID > regularCNT_Normal))
|
|
|
{
|
|
|
regularCNT_Normal = groupID;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
regularCNT_PML = regularCNT - regularCNT_Normal - 1;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
regularCNT_Normal = 0;
|
|
|
regularCNT_PML = 0;
|
|
|
}
|
|
|
|
|
|
cout << "regularCNT = " << regularCNT << endl;
|
|
|
std::cout << "regularCNT_Normal = " << regularCNT_Normal << "\n";
|
|
|
std::cout << "regularCNT_PML = " << regularCNT_PML << "\n";
|
|
|
|
|
|
|
|
|
int NumGroups = regularCNT + 4 + portCNT;
|
|
|
cout << "NumGroups = " << NumGroups << endl;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// -----------------------
|
|
|
// Populate the TetraIndex
|
|
|
// -----------------------
|
|
|
// ----------------------------------------------------------------- //
|
|
|
// Store the tetrahedra in the ClassTetraIndexAux array //
|
|
|
// ----------------------------------------------------------------- //
|
|
|
|
|
|
list<int>* ClassTetraIndexAux = new list<int>[NumGroups];
|
|
|
ClassTetraIndex = new int*[N_class];
|
|
|
ClassExcitationCount = new int[N_class];
|
|
|
ClassExcitationOffset = new int[N_class];
|
|
|
ClassExcitation_sc_CNT = new int[N_class];
|
|
|
list<int> ClassExcitationPerFaceList[(int)pow(2, NumOfFaces) - 1];
|
|
|
|
|
|
if (portCNT > 0)
|
|
|
{
|
|
|
ClassPortCnt_h = new int[N_class * portCNT];
|
|
|
ClassPortOffset_h = new int[N_class * portCNT];
|
|
|
ClassPortNum_h = new int[N_class * portCNT];
|
|
|
}
|
|
|
|
|
|
for(int i = 0 ; i < N_class; i++)
|
|
|
{
|
|
|
ClassTetraIndex[i] = new int[ClassTetraCnt[i] + ClassPMLTetraCnt[i]];
|
|
|
ClassExcitationCount[i] = 0;
|
|
|
ClassExcitationOffset[i] = 0;
|
|
|
ClassExcitation_sc_CNT[i] = 0;
|
|
|
}
|
|
|
|
|
|
|
|
|
int PML_Case = NumGroups - 1;
|
|
|
int Scattering_Excited_Case = NumGroups - 2;
|
|
|
int Total_Excited_Case = NumGroups - 3;
|
|
|
int NC_Case = NumGroups - 4;
|
|
|
int Port_Case = NumGroups - 4 - portCNT; // First port case
|
|
|
int Conformal_Case = 0;
|
|
|
|
|
|
int index;
|
|
|
int DGface_bc;
|
|
|
int auxCNT = 0;
|
|
|
excitationFaces = 0;
|
|
|
|
|
|
int ClassOffSet = 0;
|
|
|
ClassTetraOffset = new int[N_class];
|
|
|
ClassPMLTetraOffset = new int[N_class];
|
|
|
|
|
|
for(int i = 0 ; i < N_class; i++)
|
|
|
{
|
|
|
for(int j = 0; j < tetraCNT; j ++)
|
|
|
{
|
|
|
tet = &(tetARRAY[j]);
|
|
|
|
|
|
bool isExcite = tet->ExcitationFlag;
|
|
|
bool isPML = tet->get_PML_Flag();
|
|
|
bool isNC = tet->getIsNC();
|
|
|
|
|
|
if(tet->LTS_Flag == i)
|
|
|
{
|
|
|
if(tet->getRegularGroup() > 0)
|
|
|
ClassTetraIndexAux[tet->getRegularGroup()].push_back(tet->getcnt());
|
|
|
else if(!isNC && tet->get_NeighNum() == 4 && !isPML && !isExcite)
|
|
|
ClassTetraIndexAux[Conformal_Case].push_back(tet->getcnt());
|
|
|
else if (isPML)
|
|
|
ClassTetraIndexAux[PML_Case].push_back(tet->getcnt());
|
|
|
else
|
|
|
{
|
|
|
if(isExcite)
|
|
|
{
|
|
|
ClassExcitationCount[i]++;
|
|
|
int face = 0;
|
|
|
for(int k = 0; k < NumOfFaces; k++)
|
|
|
{
|
|
|
if (!tet->fc[k] || !tet->fc[k]->bcPtr) continue; // optional null guard
|
|
|
DGface_bc = tet->fc[k]->bcPtr->getbType();
|
|
|
if(DGface_bc == planeWaveType || DGface_bc == portType || DGface_bc == pmlType)
|
|
|
{
|
|
|
face += (1 << k);
|
|
|
excitationFaces++;
|
|
|
}
|
|
|
}
|
|
|
if (face > 0)
|
|
|
ClassExcitationPerFaceList[face - 1].push_back(tet->getcnt());
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
ClassTetraIndexAux[NC_Case].push_back(tet->getcnt());
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------- //
|
|
|
// Excitation //
|
|
|
// ----------------------------------------------------------------- //
|
|
|
|
|
|
|
|
|
ClassExcitationOffset[i] = auxCNT;
|
|
|
auxCNT += ClassExcitationCount[i];
|
|
|
|
|
|
for(int j = (1 << NumOfFaces) - 2; j >= 0; j--)
|
|
|
{
|
|
|
int listIndex = faceExcitationOrder[j] - 1;
|
|
|
int auxSize = ClassExcitationPerFaceList[listIndex].size();
|
|
|
|
|
|
for(int k = 0; k < auxSize; k++)
|
|
|
{
|
|
|
int tet_id = ClassExcitationPerFaceList[listIndex].back();
|
|
|
tet = &(tetARRAY[tet_id]);
|
|
|
|
|
|
if (PlaneWaveBCFlag)
|
|
|
{
|
|
|
if (tet->scattering_region)
|
|
|
ClassTetraIndexAux[Scattering_Excited_Case].push_back(tet_id);
|
|
|
else
|
|
|
ClassTetraIndexAux[Total_Excited_Case].push_front(tet_id);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
int port_id = -1;
|
|
|
for (int k=0; k<NumOfFaces; k++)
|
|
|
{
|
|
|
int bc_number = tet->getbc(k);
|
|
|
if (tet->fc[k]->bcPtr->getbType() == portType)
|
|
|
{
|
|
|
int pnum = bcNumToPnum[bc_number];
|
|
|
ClassTetraIndexAux[Port_Case+pnum].push_front(tet_id);
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
ClassExcitationPerFaceList[listIndex].pop_back();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------- //
|
|
|
// Store the tetrahedra in the ClassTetraIndex array //
|
|
|
// ----------------------------------------------------------------- //
|
|
|
|
|
|
index = 0;
|
|
|
|
|
|
auto addGroupToIndex = [&](int group) {
|
|
|
int size = ClassTetraIndexAux[group].size();
|
|
|
for (int l = 0; l < size; l++)
|
|
|
{
|
|
|
ClassTetraIndex[i][index++] = ClassTetraIndexAux[group].front();
|
|
|
ClassTetraIndexAux[group].pop_front();
|
|
|
}
|
|
|
};
|
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------------------------
|
|
|
// Order: Scattered Field Excited, Total Field Excited, NC, Conformal, Regular, PML, Regular PML
|
|
|
// -----------------------------------------------------------------------------------------------
|
|
|
|
|
|
if (PlaneWaveBCFlag)
|
|
|
{
|
|
|
addGroupToIndex(Scattering_Excited_Case);
|
|
|
ClassExcitation_sc_CNT[i] = index;
|
|
|
addGroupToIndex(Total_Excited_Case);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
for(int p = 0; p < portCNT; p++)
|
|
|
{
|
|
|
ClassPortOffset_h[i * portCNT + p] = index;
|
|
|
addGroupToIndex(Port_Case + p);
|
|
|
ClassPortCnt_h[i * portCNT + p] = index - ClassPortOffset_h[i * portCNT + p];
|
|
|
ClassPortNum_h[i * portCNT + p] = p;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
addGroupToIndex(NC_Case);
|
|
|
addGroupToIndex(Conformal_Case);
|
|
|
|
|
|
|
|
|
|
|
|
// Add Regular Tetrahedra
|
|
|
// WE assume that there are only 6 regular tetrehedron that are non-PML
|
|
|
if ( regularCNT > 1)
|
|
|
{
|
|
|
for (int k = 1; k <= regularCNT_Normal; k++)
|
|
|
{
|
|
|
addGroupToIndex(k);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
cout << "Class " << i << " | PML index = " << index << endl;
|
|
|
|
|
|
addGroupToIndex(PML_Case);
|
|
|
|
|
|
// Add PML Regular Tetrahedra
|
|
|
if ( regularCNT > 6)
|
|
|
{
|
|
|
for (int k = regularCNT_Normal; k < regularCNT; k++)
|
|
|
{
|
|
|
addGroupToIndex(k);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
ClassTetraOffset[i] = ClassOffSet;
|
|
|
ClassOffSet += ClassTetraCnt[i] + ClassPMLTetraCnt[i];
|
|
|
ClassPMLTetraOffset[i] = ClassOffSet - ClassPMLTetraCnt[i];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
for(int i = 0; i < N_class; i++)
|
|
|
{
|
|
|
std::cout << " ClassExcitationCount[" << i << "] = " << ClassExcitationCount[i] << std::endl;
|
|
|
std::cout << " ClassTetraOffset[" << i << "] = " << ClassTetraOffset[i] << std::endl;
|
|
|
std::cout << " ClassPMLTetraOffset[" << i << "] = " << ClassPMLTetraOffset[i] << std::endl;
|
|
|
}
|
|
|
|
|
|
std::cout << "excitationFaces = " << excitationFaces << std::endl;
|
|
|
std::cout << "========================================================" << std::endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
OpenMP Local Time-Stepping for matrix free Recursive
|
|
|
|
|
|
Explained in "Dissipative terms and local time-stepping improvements
|
|
|
in a spatial high order Discontinuous Galerkin scheme
|
|
|
for the time-domain Maxwell’s equations" by E. Montseny
|
|
|
*/
|
|
|
|
|
|
void FemGrp::ComputeE_MatrixFree(int class_i, fp_t dt_i){
|
|
|
if(class_i == 0){
|
|
|
LeapFrogE(class_i, LocTimeSteps[class_i]);
|
|
|
}
|
|
|
else{
|
|
|
LeapFrogE(class_i, LocTimeSteps[class_i]);
|
|
|
ComputeE_MatrixFree(class_i - 1, LocTimeSteps[class_i-1]);
|
|
|
ComputeH_MatrixFree(class_i - 1, LocTimeSteps[class_i-1]);
|
|
|
ComputeE_MatrixFree(class_i - 1, LocTimeSteps[class_i-1]);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::ComputeH_MatrixFree(int class_i, fp_t dt_i){
|
|
|
if(class_i == 0){
|
|
|
LeapFrogH(class_i, LocTimeSteps[class_i]);
|
|
|
}
|
|
|
else{
|
|
|
LeapFrogH(class_i, LocTimeSteps[class_i]);
|
|
|
ComputeH_MatrixFree(class_i - 1, LocTimeSteps[class_i - 1]);
|
|
|
ComputeE_MatrixFree(class_i - 1, LocTimeSteps[class_i - 1]);
|
|
|
ComputeH_MatrixFree(class_i - 1, LocTimeSteps[class_i - 1]);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::LeapFrogE(int class_i, fp_t dt_i){
|
|
|
int i;
|
|
|
int n;
|
|
|
fp_t InitTime = 0.0;
|
|
|
n = LocalExciIndexE[class_i];
|
|
|
|
|
|
#pragma omp parallel for schedule(dynamic) private(i)
|
|
|
for(i = 0; i < ClassTetraCnt[class_i]; i++){
|
|
|
tetra* tet = &(tetARRAY[ClassTetraIndex[class_i][i]]);
|
|
|
tet->LocalFaceToTetraMapE_NMF1(*en_1, *en, *hn_12, dt_i, InitTime + (n + 0.5) * dt_i);
|
|
|
}
|
|
|
|
|
|
#pragma omp parallel for schedule(dynamic) private(i)
|
|
|
for(i = 0 ; i < DimE; i++){
|
|
|
en->setentry(i, en_1->getentry(i));
|
|
|
}
|
|
|
LocalExciIndexE[class_i] = LocalExciIndexE[class_i] + 1;
|
|
|
}
|
|
|
|
|
|
void FemGrp::LeapFrogH(int class_i, fp_t dt_i){
|
|
|
int i;
|
|
|
int n;
|
|
|
fp_t InitTime = 0.0;
|
|
|
n = LocalExciIndexH[class_i];
|
|
|
|
|
|
#pragma omp parallel for schedule(dynamic) private(i)
|
|
|
for(i = 0; i < ClassTetraCnt[class_i]; i ++){
|
|
|
tetra* tet = &(tetARRAY[ClassTetraIndex[class_i][i]]);
|
|
|
tet->LocalFaceToTetraMapH_NMF1(*hn_32, *en_1, *hn_12, dt_i, InitTime + (n + 1.0) * dt_i);
|
|
|
}
|
|
|
|
|
|
#pragma omp parallel for schedule(dynamic) private(i)
|
|
|
for(i = 0 ; i < DimH ; i++){
|
|
|
hn_12->setentry(i, hn_32->getentry(i));
|
|
|
}
|
|
|
LocalExciIndexH[class_i] = LocalExciIndexH[class_i] + 1;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
Local Time-Stepping Update
|
|
|
*/
|
|
|
void FemGrp::LTS_TimeUpdateGlobal_MatrixFree(){
|
|
|
int i, n;
|
|
|
fp_t InitTime = 0.0;
|
|
|
|
|
|
LocalExciIndexE = new int[N_class];
|
|
|
LocalExciIndexH = new int[N_class];
|
|
|
|
|
|
for(i = 0; i < N_class; i++){
|
|
|
LocalExciIndexE[i] = 0;
|
|
|
LocalExciIndexH[i] = 0;
|
|
|
}
|
|
|
|
|
|
NtimeSteps = (int)ceil((FinalTime - InitTime) / LocTimeSteps[N_class -1]);
|
|
|
|
|
|
cout.setf(ios::scientific,ios_base::floatfield);
|
|
|
cout.precision(15);
|
|
|
|
|
|
cout << "Start Time Stepping " << endl;
|
|
|
cout << "FinalTime = " << FinalTime << endl;
|
|
|
cout << "TimeStep_dt = " << LocTimeSteps[N_class -1] << endl;
|
|
|
cout << "tetraCNT = " << tetraCNT << endl;
|
|
|
cout << "NtimeSteps = " << NtimeSteps << endl;
|
|
|
|
|
|
timer_start("Time Stepping", ' ');
|
|
|
fp_t Frequency = freq;
|
|
|
fp_t dt_nyquist = 1.0 / (2.0 * Frequency * MEGA);
|
|
|
// fp_t dt_nyquist = 2.0 / (Frequency * MEGA); //That's wrong
|
|
|
fp_t dt_sample = (1 / SamplingRate) * dt_nyquist;
|
|
|
int postProcIters = (int)ceil(dt_sample / LocTimeSteps[N_class - 1]);
|
|
|
int printScreenIters = 2 * postProcIters;
|
|
|
|
|
|
Write_TD_Data(postProcIters, NtimeSteps);
|
|
|
|
|
|
cout << "dt_nyquist = " << dt_nyquist << endl;
|
|
|
cout << "dt_sample = " << dt_sample << endl;
|
|
|
cout << "printScreenIters = " << printScreenIters << endl;
|
|
|
cout << "postProcIters = " << postProcIters << endl;
|
|
|
cout << "N_class = " << N_class <<endl;
|
|
|
|
|
|
size_t total_time = 0;
|
|
|
fp_t current_time = 0;
|
|
|
current_time -= (double)dt_sample * 1e9;
|
|
|
|
|
|
SYSTEM_MEM_USAGE();
|
|
|
timer_start("Start Time Stepping", ' ');
|
|
|
|
|
|
for(n = 0 ; n < NtimeSteps ; n++)
|
|
|
{
|
|
|
ComputeE_MatrixFree(N_class - 1 , LocTimeSteps[N_class - 1]);
|
|
|
ComputeH_MatrixFree(N_class - 1 , LocTimeSteps[N_class - 1]);
|
|
|
|
|
|
if(n % postProcIters == 0)
|
|
|
{
|
|
|
if(write_AnalyticalIncidentProbes)
|
|
|
{
|
|
|
if(probeCNT > 0)
|
|
|
{
|
|
|
CalculateL2Error(n, LocTimeSteps[N_class - 1], ExcitFlag);
|
|
|
CalculateL2ErrorProbes(n, LocTimeSteps[N_class - 1], ExcitFlag);
|
|
|
}
|
|
|
writeAnalyticalIncidentPWProbes(n);
|
|
|
}
|
|
|
|
|
|
if(write_probes && probeCNT > 0)
|
|
|
{
|
|
|
writeFieldProbe(n);
|
|
|
}
|
|
|
|
|
|
if(write_fields)
|
|
|
{
|
|
|
writeFieldGlobal(n);
|
|
|
}
|
|
|
|
|
|
|
|
|
if(portCNT != 0)
|
|
|
{
|
|
|
EvaluateSparametersGlobal(n, LocTimeSteps[N_class -1], true);
|
|
|
}
|
|
|
|
|
|
cout << "E field norm " << en_1->magnitude() << endl;
|
|
|
//cout << "H field norm " << hn_32->magnitude() << endl;
|
|
|
total_time += timer_stop(' ');
|
|
|
timer_start(to_string(postProcIters)+" steps ", ' ');
|
|
|
DEBUG_INFO("Percentage Completed :" + to_string((double)n / (double)NtimeSteps * 100.0) + "%");
|
|
|
current_time += (double)dt_sample * 1e9;
|
|
|
DEBUG_INFO("Current Time : " + to_string(current_time) + "ns");
|
|
|
DEBUG_INFO("Average iteration time : "+ to_string(((double)total_time / (double)(n + 1.0))) + " sec");
|
|
|
}
|
|
|
}
|
|
|
|
|
|
DEBUG_INFO("Total iteration time: "+ to_string(((double)total_time)) + " sec");
|
|
|
timer_stop(' ');
|
|
|
}
|
|
|
|
|
|
//*****************
|
|
|
|
|
|
void FemGrp::Write_TD_Data(int tsPerSample, int nTimeSteps){
|
|
|
// fp_t to = 4.0 * pow(10.0, -9.0);
|
|
|
// fp_t tau = 0.8 * pow(10.0, -9.0);
|
|
|
char TD_data[180];
|
|
|
|
|
|
sprintf(TD_data, "./PROBES/%s.TD_Data", fname);
|
|
|
ofstream TD_datafile(TD_data, ios_base::out);
|
|
|
if(!TD_datafile){
|
|
|
cout << "Error in opening file: " << TD_data << "for write"<< endl;
|
|
|
}
|
|
|
|
|
|
TD_datafile << LocTimeSteps[N_class -1] << endl;
|
|
|
TD_datafile << nTimeSteps << endl;
|
|
|
TD_datafile << To << endl;
|
|
|
TD_datafile << Tau << endl;
|
|
|
TD_datafile << tsPerSample << endl;
|
|
|
TD_datafile << probeCNT << endl;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Modifed by qi jian to use octree to store the probes barycentric coordinates
|
|
|
void FemGrp::readPROBE()
|
|
|
{
|
|
|
// Read only the nodes belonging to this subdomain and neighbors
|
|
|
char nname[StrLenShort];
|
|
|
|
|
|
// Read the probe file
|
|
|
sprintf(nname, "%s.probe", fname);
|
|
|
rapidcsv::Document probe_doc(nname);
|
|
|
std::vector<double> x_col = probe_doc.GetColumn<double>("X");
|
|
|
std::vector<double> y_col = probe_doc.GetColumn<double>("Y");
|
|
|
std::vector<double> z_col = probe_doc.GetColumn<double>("Z");
|
|
|
|
|
|
// Check that all the columns have the same size
|
|
|
assert(x_col.size() == y_col.size());
|
|
|
assert(y_col.size() == z_col.size());
|
|
|
assert(z_col.size() == x_col.size());
|
|
|
|
|
|
|
|
|
probeCNT = x_col.size();
|
|
|
if(padeCNT > probeCNT)
|
|
|
{
|
|
|
padeCNT = probeCNT;
|
|
|
cout << "Pade Number Of Elements REDUCED to " << probeCNT << endl;
|
|
|
}
|
|
|
|
|
|
probes_bary.resize(probeCNT);
|
|
|
std::cout << "Compute the Barycentric coordinates of the Probes" << std::endl;
|
|
|
const double tol = 1e-8;
|
|
|
|
|
|
//#pragma omp parallel for schedule(dynamic)
|
|
|
for (int node_id = 0; node_id < probeCNT; ++node_id)
|
|
|
{
|
|
|
double probe_xyz[3] = {x_col[node_id] * unit, y_col[node_id] * unit, z_col[node_id] * unit};
|
|
|
|
|
|
std::vector<std::pair<int, std::array<double, 4>>> found_tets;
|
|
|
bool success = octree_object.findTetraInOctree(probe_xyz, found_tets, tol);
|
|
|
|
|
|
if (success)
|
|
|
{
|
|
|
probes_bary[node_id].first = static_cast<int>(found_tets.size());
|
|
|
probes_bary[node_id].second = found_tets;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
probes_bary[node_id].first = -1;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Report and verify
|
|
|
bool error_flag = false;
|
|
|
for (int i = 0; i < probeCNT; ++i)
|
|
|
{
|
|
|
if (probes_bary[i].first < 0)
|
|
|
{
|
|
|
std::cerr << "Node " << i << " not found in simulation domain" << std::endl;
|
|
|
double probe_xyz[3] = {x_col[i] * unit, y_col[i] * unit, z_col[i] * unit};
|
|
|
std::cerr << probe_xyz[0] << " " << probe_xyz[1] << " " << probe_xyz[2] << std::endl;
|
|
|
error_flag = true;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if (error_flag)
|
|
|
{
|
|
|
std::cerr << "Error: Some nodes were not found in the simulation domain. Exiting." << std::endl;
|
|
|
std::exit(EXIT_FAILURE);
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TODO!!!
|
|
|
/*
|
|
|
// - excitationFaces (flattened exc. faces count)
|
|
|
// - PortFacePidx_h (int[excitationFaces], -1 for non-port faces)
|
|
|
// - PortFaceCentroid_h (fp_t_ts[excitationFaces*3], centroid coords per face)
|
|
|
*/
|
|
|
// Uses TetID_excitation_h (owner tet id) to compute barycentrics of each
|
|
|
// port-face centroid inside its owning tetra. No octree/hydra traversal.
|
|
|
//
|
|
|
// Inputs assumed ready:
|
|
|
// - excitationFaces
|
|
|
// - PortFacePidx_h : int[excitationFaces], -1 if NOT a port face
|
|
|
// - PortFaceCentroid_h : fp_t_ts[3*excitationFaces] (cx,cy,cz per face)
|
|
|
// - TetID_excitation_h : int[excitationFaces] (owner tetra index 0..tetraCNT-1)
|
|
|
// - FaceID_excitation_h : int[excitationFaces] (optional, not strictly needed here)
|
|
|
//
|
|
|
// Output:
|
|
|
// - portFaceCentroid_bary[f].first = 1 on success, -1 if non-port or error
|
|
|
// - portFaceCentroid_bary[f].second = { { tetId, {l0,l1,l2,l3} } } (exactly one entry)
|
|
|
void FemGrp::prepPortFaceCentroidPROBE()
|
|
|
{
|
|
|
if (portCNT <= 0 || !PortFacePidx_h || !PortFaceCentroid_h || !TetID_excitation_h)
|
|
|
{
|
|
|
std::cerr << "[prepPortFaceCentroidPROBE] Missing inputs or no ports.\n";
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
auto det3 = [](const double x[3], const double y[3], const double z[3])
|
|
|
{
|
|
|
return x[0]*(y[1]*z[2]-y[2]*z[1])
|
|
|
- x[1]*(y[0]*z[2]-y[2]*z[0])
|
|
|
+ x[2]*(y[0]*z[1]-y[1]*z[0]);
|
|
|
};
|
|
|
|
|
|
std::cout << "Compute barycentric coords of port-face centroids (using TetID_excitation_h)\n";
|
|
|
|
|
|
portFaceCentroid_bary.clear();
|
|
|
portFaceCentroid_bary.resize(excitationFaces);
|
|
|
|
|
|
int done = 0, errors = 0;
|
|
|
|
|
|
for (int f = 0; f < excitationFaces; ++f)
|
|
|
{
|
|
|
// Skip non-port faces
|
|
|
if (PortFacePidx_h[f] < 0)
|
|
|
{
|
|
|
portFaceCentroid_bary[f].first = -1;
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
// Owner tetra index from your pre-filled array
|
|
|
const int tId = TetID_excitation_h[f];
|
|
|
if (tId < 0 || tId >= tetraCNT)
|
|
|
{
|
|
|
std::cerr << "[PortCentroid] Invalid owner tId=" << tId << " for excitation face f=" << f << "\n";
|
|
|
portFaceCentroid_bary[f].first = -1;
|
|
|
++errors;
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
const tetra& T = tetARRAY[tId];
|
|
|
|
|
|
// Tetra vertices
|
|
|
double v[4][3];
|
|
|
for (int i = 0; i < 4; ++i)
|
|
|
{
|
|
|
v[i][0] = T.nd[i]->getCoord().getx();
|
|
|
v[i][1] = T.nd[i]->getCoord().gety();
|
|
|
v[i][2] = T.nd[i]->getCoord().getz();
|
|
|
}
|
|
|
|
|
|
// Face centroid (cx,cy,cz)
|
|
|
const fp_t_ts* C = &PortFaceCentroid_h[3 * f];
|
|
|
const double P[3] = { (double)C[0], (double)C[1], (double)C[2] };
|
|
|
|
|
|
// Barycentric via Cramer's rule
|
|
|
double a[3] = { v[0][0]-v[3][0], v[0][1]-v[3][1], v[0][2]-v[3][2] };
|
|
|
double b[3] = { v[1][0]-v[3][0], v[1][1]-v[3][1], v[1][2]-v[3][2] };
|
|
|
double c[3] = { v[2][0]-v[3][0], v[2][1]-v[3][1], v[2][2]-v[3][2] };
|
|
|
double r[3] = { P[0]-v[3][0], P[1]-v[3][1], P[2]-v[3][2] };
|
|
|
|
|
|
const double D = det3(a,b,c);
|
|
|
if (std::abs(D) == 0.0)
|
|
|
{
|
|
|
std::cerr << "[PortCentroid] Degenerate tetra (D=0) at tId=" << tId << " for f=" << f << "\n";
|
|
|
portFaceCentroid_bary[f].first = -1;
|
|
|
++errors;
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
double l0 = det3(r,b,c) / D;
|
|
|
double l1 = det3(a,r,c) / D;
|
|
|
double l2 = det3(a,b,r) / D;
|
|
|
double l3 = 1.0 - (l0 + l1 + l2);
|
|
|
|
|
|
// Gentle renormalization (handles tiny FP drift)
|
|
|
double sumL = l0 + l1 + l2 + l3;
|
|
|
if (std::abs(sumL - 1.0) > 1e-10)
|
|
|
{
|
|
|
l3 = 1.0 - (l0 + l1 + l2);
|
|
|
}
|
|
|
|
|
|
// Store exactly one (tet, lambdas)
|
|
|
std::vector<std::pair<int, std::array<double,4>>> vec;
|
|
|
vec.emplace_back(tId, std::array<double,4>{l0,l1,l2,l3});
|
|
|
|
|
|
portFaceCentroid_bary[f].first = 1;
|
|
|
portFaceCentroid_bary[f].second = std::move(vec);
|
|
|
++done;
|
|
|
//cout << l0 << " " << l1 << " " << l2 << " " << l3 << "\n";
|
|
|
}
|
|
|
|
|
|
std::cout << "[prepPortFaceCentroidPROBE] Completed: " << done
|
|
|
<< " faces; errors=" << errors << ".\n";
|
|
|
|
|
|
if (errors > 0) {
|
|
|
std::cerr << "Error: Some port-face centroids could not be assigned.\n";
|
|
|
std::exit(EXIT_FAILURE);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
void FemGrp::prepPortFaceCentroidPROBE()
|
|
|
{
|
|
|
// Requires:
|
|
|
// - excitationFaces (flattened exc. faces count)
|
|
|
// - PortFacePidx_h (int[excitationFaces], -1 for non-port faces)
|
|
|
// - PortFaceCentroid_h (fp_t_ts[excitationFaces*3], centroid coords per face)
|
|
|
// - octree_object.findTetraInOctree(double[3], out, tol)
|
|
|
|
|
|
if (portCNT <= 0 || !PortFacePidx_h || !PortFaceCentroid_h)
|
|
|
{
|
|
|
std::cerr << "[readPortFaceCentroidPROBE] No ports or centroid buffers not ready.\n";
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
const double tol = 1e-3;
|
|
|
std::cout << "Compute the Barycentric coordinates of Probes on Ports" << std::endl;
|
|
|
|
|
|
portFaceCentroid_bary.clear();
|
|
|
portFaceCentroid_bary.resize(excitationFaces);
|
|
|
|
|
|
int not_found = 0;
|
|
|
int done = 0;
|
|
|
|
|
|
long long total_found_tets = 0; // sum of found_tets.size() over successes
|
|
|
int success_faces = 0; // number of faces with success==true
|
|
|
|
|
|
|
|
|
// #pragma omp parallel for schedule(dynamic) reduction(+:not_found,done) // (optional)
|
|
|
for (int f = 0; f < excitationFaces; ++f)
|
|
|
{
|
|
|
// Only process port faces
|
|
|
if (PortFacePidx_h[f] < 0)
|
|
|
{
|
|
|
portFaceCentroid_bary[f].first = -1; // mark as N/A (non-port)
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
// Centroid coordinates of face f
|
|
|
// NOTE: These come from node coords directly; do NOT rescale unless your mesh needs it.
|
|
|
const fp_t_ts* C = &PortFaceCentroid_h[3 * f];
|
|
|
double xyz[3] = { (double)C[0], (double)C[1], (double)C[2] };
|
|
|
std::vector<std::pair<int, std::array<double,4>>> found_tets;
|
|
|
bool success = octree_object.findTetraInOctree(xyz, found_tets, tol);
|
|
|
|
|
|
if (success)
|
|
|
{
|
|
|
portFaceCentroid_bary[f].first = static_cast<int>(found_tets.size());
|
|
|
portFaceCentroid_bary[f].second = std::move(found_tets);
|
|
|
++done;
|
|
|
|
|
|
// [NEW] accumulate for average
|
|
|
total_found_tets += portFaceCentroid_bary[f].first;
|
|
|
++success_faces;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
portFaceCentroid_bary[f].first = -1;
|
|
|
++not_found;
|
|
|
|
|
|
// Debug print (can be silenced)
|
|
|
std::cerr << "[PortCentroid] face f=" << f
|
|
|
<< " (port " << PortFacePidx_h[f] << ") NOT found at "
|
|
|
<< xyz[0] << " " << xyz[1] << " " << xyz[2] << "\n";
|
|
|
}
|
|
|
}
|
|
|
|
|
|
std::cout << "[readPortFaceCentroidPROBE] Located " << done
|
|
|
<< " port-face centroids; " << not_found << " not found.\n";
|
|
|
|
|
|
|
|
|
if (not_found == 0 && success_faces > 0) {
|
|
|
const double avg = static_cast<double>(total_found_tets) / static_cast<double>(success_faces);
|
|
|
std::cout << "[PortCentroid] average owning tets per centroid = " << avg
|
|
|
<< " (over " << success_faces << " faces)\n";
|
|
|
}
|
|
|
|
|
|
// Hard error if any were not found (match readPROBE behavior if you prefer)
|
|
|
if (not_found > 0)
|
|
|
{
|
|
|
std::cerr << "Error: Some port-face centroids were not found in the domain. Exiting.\n";
|
|
|
std::exit(EXIT_FAILURE);
|
|
|
}
|
|
|
}
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::readREGULAR(){
|
|
|
// writeFieldGlobal(1);
|
|
|
char tname[StrLenShort];
|
|
|
|
|
|
sprintf(tname, "%s.regular", fname);
|
|
|
ifstream regularAreaFile(tname, ios::in);
|
|
|
|
|
|
if(!regularAreaFile){
|
|
|
cout << "File " << tname << " does NOT exist " << endl;
|
|
|
exit(1);
|
|
|
}
|
|
|
|
|
|
int numOfRegions;
|
|
|
int region;
|
|
|
|
|
|
regularAreaFile >> numOfRegions;
|
|
|
regularTetraCNT = 0;
|
|
|
// Only one domain exists
|
|
|
regularCNT = numOfRegions;
|
|
|
if(regularCNT >= 1){
|
|
|
regularReferenceARRAY = new int[tetraCNT];
|
|
|
regionARRAY = new int[regularCNT];
|
|
|
for(int i = 0; i < regularCNT; i++)
|
|
|
regionARRAY[i] = -1;
|
|
|
|
|
|
for(int i = 0; i < tetraCNT; i ++){
|
|
|
tetra* tet = &(tetARRAY[i]);
|
|
|
regularAreaFile >> region;
|
|
|
tet->setRegularGroup(region);
|
|
|
if(region == 0){
|
|
|
regularReferenceARRAY[i] = i;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
regularTetraCNT++;
|
|
|
if(regionARRAY[region] == -1)
|
|
|
{
|
|
|
regionARRAY[region] = i;
|
|
|
regularReferenceARRAY[i] = i;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
regularReferenceARRAY[i] = regionARRAY[region];
|
|
|
}
|
|
|
}
|
|
|
// cout << "i = " << i << " reference = " << regularReferenceARRAY[i] << " region = " << region << endl;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::initializeMaxMinPoints(){
|
|
|
maxPoint.setvtr(std::numeric_limits<fp_t>::min(), std::numeric_limits<fp_t>::min(), std::numeric_limits<fp_t>::min());
|
|
|
minPoint.setvtr(std::numeric_limits<fp_t>::max(), std::numeric_limits<fp_t>::max(), std::numeric_limits<fp_t>::max());
|
|
|
}
|
|
|
|
|
|
void FemGrp::setMaxMinPoints(fp_t x, fp_t y, fp_t z){
|
|
|
maxPoint.setvtr(x > maxPoint.getx() ? x : maxPoint.getx(),
|
|
|
y > maxPoint.gety() ? y : maxPoint.gety(),
|
|
|
z > maxPoint.getz() ? z : maxPoint.getz());
|
|
|
minPoint.setvtr(x < minPoint.getx() ? x : minPoint.getx(),
|
|
|
y < minPoint.gety() ? y : minPoint.gety(),
|
|
|
z < minPoint.getz() ? z : minPoint.getz());
|
|
|
}
|
|
|
|
|
|
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
|
|
|
// 0000000000000000000000000000000000000 Post-processing 0000000000000000000000000000000000000000 //
|
|
|
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
|
|
|
|
|
|
|
|
|
// Modified by qi jian to write field at probes (CPU VERSION)
|
|
|
void FemGrp::writeFieldProbe(int timeStep)
|
|
|
{
|
|
|
int i, j;
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
|
|
|
int tetraMAP_aux[TetPolyOrderDim[getPolyFlag()]];
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
fp_t_ts E_coeff[TetPolyOrderDim[getPolyFlag()]];
|
|
|
fp_t_ts H_coeff[TetPolyOrderDim[getPolyFlag()]];
|
|
|
#else
|
|
|
fp_t E_coeff[TetPolyOrderDim[getPolyFlag()]];
|
|
|
fp_t H_coeff[TetPolyOrderDim[getPolyFlag()]];
|
|
|
#endif
|
|
|
|
|
|
vtr eField;
|
|
|
vtr hField;
|
|
|
vtr eField_all;
|
|
|
vtr hField_all;
|
|
|
|
|
|
char csvFileName[StrOutput];
|
|
|
std::ofstream csvFile;
|
|
|
|
|
|
if(padeCNT == 0 || writeWhilePade)
|
|
|
{
|
|
|
sprintf(csvFileName, "Probes_%s_%04d.csv", fname, timeStep);
|
|
|
csvFile.open(csvFileName);
|
|
|
csvFile << "Ex" << "," << "Ey" << "," << "Ez" << "," << "Hx" << "," << "Hy" << "," << "Hz" << "\n";
|
|
|
}
|
|
|
|
|
|
|
|
|
const int num_nodes = probeCNT;
|
|
|
|
|
|
// Calculate Total Fields at the points
|
|
|
for(i = 0; i < num_nodes; i++)
|
|
|
{
|
|
|
int number_of_associated_tets = probes_bary.at(i).first;
|
|
|
|
|
|
eField.reset();
|
|
|
hField.reset();
|
|
|
|
|
|
std::vector<std::pair<int, std::array<double, 4>>> found_tets = probes_bary.at(i).second;
|
|
|
eField_all.reset();
|
|
|
hField_all.reset();
|
|
|
|
|
|
for (int t = 0; t < number_of_associated_tets; t++)
|
|
|
{
|
|
|
|
|
|
int tet_id = found_tets.at(t).first;
|
|
|
array<double,4> tri_bary_coord = found_tets.at(t).second;
|
|
|
tetra& tet = tetARRAY[tet_id];
|
|
|
|
|
|
tet.geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
eField.reset();
|
|
|
hField.reset();
|
|
|
zeta[0] = static_cast<fp_t>(tri_bary_coord[0]);
|
|
|
zeta[1] = static_cast<fp_t>(tri_bary_coord[1]);
|
|
|
zeta[2] = static_cast<fp_t>(tri_bary_coord[2]);
|
|
|
zeta[3] = static_cast<fp_t>(tri_bary_coord[3]);
|
|
|
|
|
|
|
|
|
// Calculate E field
|
|
|
tet.Local_DG_mapE(tetraMAP_aux, tet.LocalOffsetE);
|
|
|
|
|
|
for(j = 0 ; j < TetPolyOrderDim[getPolyFlag()] ; j++)
|
|
|
{
|
|
|
if(tetraMAP_aux[j] < 0)
|
|
|
E_coeff[j] = 0.0;
|
|
|
else
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
E_coeff[j] = En1_h[tetraMAP_aux[j]];
|
|
|
#else
|
|
|
E_coeff[j] = en_1->getentry(tetraMAP_aux[j]);
|
|
|
#endif
|
|
|
}
|
|
|
// Calculate H field
|
|
|
tet.Local_DG_mapH(tetraMAP_aux, tet.LocalOffsetH);
|
|
|
|
|
|
for(j = 0 ; j < TetPolyOrderDim[getPolyFlag()] ; j++){
|
|
|
if(tetraMAP_aux[j] < 0)
|
|
|
H_coeff[j] = 0.0;
|
|
|
else
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
H_coeff[j] = Hn32_h[tetraMAP_aux[j]];
|
|
|
#else
|
|
|
H_coeff[j] = hn_32->getentry(tetraMAP_aux[j]);
|
|
|
#endif
|
|
|
}
|
|
|
|
|
|
eField = CalcEfield(E_coeff, avtr, vol, zeta, PolyFlag);
|
|
|
hField = CalcEfield(H_coeff, avtr, vol, zeta, PolyFlag);
|
|
|
|
|
|
|
|
|
eField_all = eField_all + eField;
|
|
|
hField_all = hField_all + hField;
|
|
|
|
|
|
}
|
|
|
|
|
|
eField_all = eField_all / ((fp_t) number_of_associated_tets);
|
|
|
hField_all = hField_all / ((fp_t) number_of_associated_tets);
|
|
|
|
|
|
if(usePade){ // && i < padeCNT
|
|
|
int row = ((int)(timeStep / tsPerSampling)) * NumOfFieldComponents * probeCNT;
|
|
|
int column = i * NumOfFieldComponents;
|
|
|
fieldProbes[row + column + 0] = eField_all.getx();
|
|
|
fieldProbes[row + column + 1] = eField_all.gety();
|
|
|
fieldProbes[row + column + 2] = eField_all.getz();
|
|
|
fieldProbes[row + column + 3] = hField_all.getx();
|
|
|
fieldProbes[row + column + 4] = hField_all.gety();
|
|
|
fieldProbes[row + column + 5] = hField_all.getz();
|
|
|
}
|
|
|
|
|
|
if(padeCNT == 0 || writeWhilePade)
|
|
|
{
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
csvFile << std::setprecision(max_precision) << eField_all.getx() << "," << eField_all.gety() << "," << eField_all.getz() << "," << hField_all.getx() << "," << hField_all.gety() << "," << hField_all.getz() << "\n";
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
if(padeCNT == 0 || writeWhilePade)
|
|
|
{
|
|
|
usleep(100);
|
|
|
csvFile.close();
|
|
|
}
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::writeFieldProbeAfterPade(int tsSize)
|
|
|
{
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
|
|
|
#pragma omp parallel for
|
|
|
for(int i = 0; i < (int)ceil((1.0 * NtimeSteps) / tsPerSampling); i++){
|
|
|
char csvFileName[StrOutput];
|
|
|
std::ofstream csvFile;
|
|
|
sprintf(csvFileName, "./PROBES/Probes_%s_%04d.csv", fname, i * tsSize);
|
|
|
csvFile.open(csvFileName);
|
|
|
csvFile << "Ex" << "," << "Ey" << "," << "Ez" << "," << "Hx" << "," << "Hy" << "," << "Hz" << "\n";
|
|
|
|
|
|
for(int probe = 0; probe < probeCNT; probe++)
|
|
|
{
|
|
|
int column = probe * NumOfFieldComponents;
|
|
|
int row = i * NumOfFieldComponents * probeCNT;
|
|
|
for(int j = 0; j < NumOfFieldComponents; j++)
|
|
|
{
|
|
|
csvFile << std::setprecision(max_precision) << fieldProbes[row + column + j];
|
|
|
|
|
|
if(j == NumOfFieldComponents - 1)
|
|
|
csvFile << "\n";
|
|
|
else
|
|
|
csvFile << ",";
|
|
|
}
|
|
|
}
|
|
|
usleep(100);
|
|
|
csvFile.close();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::writeFieldGlobal(int timeStep){
|
|
|
int i, j;
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
vtr coord[4];
|
|
|
vtr eLocal[4];
|
|
|
vtr hLocal[4];
|
|
|
|
|
|
int* tetraMAP_aux;
|
|
|
int* MapE_Pe;
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
fp_t_ts* E_coeff;
|
|
|
fp_t_ts* H_coeff;
|
|
|
#else
|
|
|
fp_t* E_coeff;
|
|
|
fp_t* H_coeff;
|
|
|
#endif
|
|
|
|
|
|
|
|
|
vtr* eField = new vtr[nodeCNT];
|
|
|
vtr* hField = new vtr[nodeCNT];
|
|
|
int* count = new int[nodeCNT];
|
|
|
memset(count, 0, nodeCNT * sizeof(int));
|
|
|
|
|
|
// only initialize the memory for the first solution
|
|
|
if(regE.TetraReg == 0)
|
|
|
regE.initial(tetraCNT);
|
|
|
if(regH.TetraReg == 0)
|
|
|
regH.initial(tetraCNT);
|
|
|
|
|
|
int* polyOrder = new int[tetraCNT];
|
|
|
for(i = 0; i < tetraCNT; i++){
|
|
|
tetra& tet = tetARRAY[i];
|
|
|
polyOrder[i] = tet.PolyOrderFlag;
|
|
|
for(j = 0; j < NumOfNodes; j++){
|
|
|
coord[j] = (tet.getNode(j))->getCoord();
|
|
|
}
|
|
|
tet.geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
tetraMAP_aux = new int[TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
MapE_Pe = new int[2 * TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
E_coeff = new fp_t_ts[TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
H_coeff = new fp_t_ts[TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
#else
|
|
|
E_coeff = new fp_t[TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
H_coeff = new fp_t[TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
#endif
|
|
|
|
|
|
|
|
|
// E field
|
|
|
tet.Local_DG_mapE(tetraMAP_aux, tet.LocalOffsetE);
|
|
|
|
|
|
for(j = 0 ; j < TetPolyOrderDim[tet.PolyOrderFlag] ; j++){
|
|
|
if(tetraMAP_aux[j] < 0)
|
|
|
E_coeff[j] = 0.0;
|
|
|
else
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
E_coeff[j] = En1_h[tetraMAP_aux[j]];
|
|
|
#else
|
|
|
E_coeff[j] = en_1->getentry(tetraMAP_aux[j]);
|
|
|
#endif
|
|
|
}
|
|
|
// H field
|
|
|
tet.Local_DG_mapH(tetraMAP_aux, tet.LocalOffsetH);
|
|
|
|
|
|
for(j = 0 ; j < TetPolyOrderDim[tet.PolyOrderFlag] ; j++){
|
|
|
if(tetraMAP_aux[j] < 0)
|
|
|
H_coeff[j] = 0.0;
|
|
|
else
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
H_coeff[j] = Hn32_h[tetraMAP_aux[j]];
|
|
|
#else
|
|
|
H_coeff[j] = hn_32->getentry(tetraMAP_aux[j]);
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
for(j = 0; j < 4; j++){
|
|
|
zeta[0] = BaryCoord[j][0];
|
|
|
zeta[1] = BaryCoord[j][1];
|
|
|
zeta[2] = BaryCoord[j][2];
|
|
|
zeta[3] = BaryCoord[j][3];
|
|
|
|
|
|
eLocal[j] = CalcEfield(E_coeff, avtr, vol, zeta, tet.PolyOrderFlag);
|
|
|
hLocal[j] = CalcEfield(H_coeff, avtr, vol, zeta, tet.PolyOrderFlag);
|
|
|
|
|
|
int index = tet.nd[j]->getid();
|
|
|
eField[index] = eField[index] + eLocal[j] /*- Einc*/;
|
|
|
hField[index] = hField[index] + hLocal[j] /*- Hinc*/;
|
|
|
count[index] += 1;
|
|
|
}
|
|
|
regE.setRegister(i, eLocal);
|
|
|
regH.setRegister(i, hLocal);
|
|
|
delete [] tetraMAP_aux;
|
|
|
delete [] MapE_Pe;
|
|
|
delete [] E_coeff;
|
|
|
delete [] H_coeff;
|
|
|
}
|
|
|
|
|
|
for(i = 0; i < nodeCNT; i++){
|
|
|
eField[i] = eField[i] / static_cast<fp_t>(count[i]);
|
|
|
hField[i] = hField[i] / static_cast<fp_t>(count[i]);
|
|
|
}
|
|
|
|
|
|
VtkWriter vtkWriter(1.0);
|
|
|
// VtkWriter vtkWriter(unit);
|
|
|
char vtkFilePrefix[128];
|
|
|
memset(vtkFilePrefix, 0, 128 * sizeof(char));
|
|
|
|
|
|
sprintf(vtkFilePrefix, "%s_%04d", fname, timeStep);
|
|
|
|
|
|
vtkWriter.writeField(vtkFilePrefix, nodeCNT, ndARRAY, tetraCNT, tetARRAY, eField, hField, polyOrder, 0, 0); //TODO: why here polyorder is not 1
|
|
|
|
|
|
delete [] eField;
|
|
|
delete [] hField;
|
|
|
delete [] polyOrder;
|
|
|
delete [] count;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Modified by qi jian to compute the analytical incident field at the probes
|
|
|
void FemGrp::writeAnalyticalIncidentPWProbes(int timeStep){
|
|
|
int i;
|
|
|
vtr Einc;
|
|
|
vtr Hinc;
|
|
|
vtr r;
|
|
|
vtr Einc_field;
|
|
|
vtr Hinc_field;
|
|
|
|
|
|
fp_t zeta[4];
|
|
|
char csvFileName[StrOutput];
|
|
|
sprintf(csvFileName, "AnalyticalIncidentField_%s_%04d.csv", fname, timeStep);
|
|
|
|
|
|
std::ofstream csvFile(csvFileName);
|
|
|
csvFile << "Ex" << "," << "Ey" << "," << "Ez" << "," << "Hx" << "," << "Hy" << "," << "Hz" << "\n";
|
|
|
|
|
|
|
|
|
|
|
|
for(i = 0; i < probeCNT; i++)
|
|
|
{
|
|
|
|
|
|
// Get the Incident Field at the probe
|
|
|
int number_of_associated_tets = probes_bary.at(i).first;
|
|
|
|
|
|
Einc.reset();
|
|
|
Hinc.reset();
|
|
|
std::vector<std::pair<int, std::array<double, 4>>> found_tets = probes_bary.at(i).second;
|
|
|
Einc_field.reset(); // Store for all valid candidate tets
|
|
|
Hinc_field.reset(); // Store for all valid candidate tets
|
|
|
|
|
|
for (int t = 0; t < number_of_associated_tets; t++)
|
|
|
{
|
|
|
int tet_id = found_tets.at(t).first;
|
|
|
array<double,4> tri_bary_coord = found_tets.at(t).second;
|
|
|
tetra& tet = tetARRAY[tet_id];
|
|
|
|
|
|
zeta[0] = static_cast<fp_t>(tri_bary_coord[0]);
|
|
|
zeta[1] = static_cast<fp_t>(tri_bary_coord[1]);
|
|
|
zeta[2] = static_cast<fp_t>(tri_bary_coord[2]);
|
|
|
zeta[3] = static_cast<fp_t>(tri_bary_coord[3]);
|
|
|
|
|
|
SimplexToCartesian(tet, r, zeta);
|
|
|
getAnalyticalPWField(tet, r, Einc, Hinc, timeStep, LocTimeSteps[N_class -1]);
|
|
|
|
|
|
Einc_field = Einc_field + Einc;
|
|
|
Hinc_field = Hinc_field + Hinc;
|
|
|
|
|
|
}
|
|
|
|
|
|
Einc_field = Einc_field / ((fp_t) number_of_associated_tets);
|
|
|
Hinc_field = Hinc_field / ((fp_t) number_of_associated_tets);
|
|
|
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
csvFile << std::setprecision(max_precision) << Einc_field.getx() << "," << Einc_field.gety() << "," << Einc_field.getz() << "," << Hinc_field.getx() << "," << Hinc_field.gety() << "," << Hinc_field.getz() << "\n";
|
|
|
|
|
|
}
|
|
|
usleep(100);
|
|
|
csvFile.close();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::getAnalyticalPWField(tetra& tet, vtr& r, vtr& Einc, vtr& Hinc, int timeStep, fp_t dt){
|
|
|
fp_t eta = No * sqrt(tet.mat->mur.getEntry(0,0) / tet.mat->epsr.getEntry(0,0));
|
|
|
fp_t V_light = Vo / sqrt(tet.mat->epsr.getEntry(0,0) * tet.mat->mur.getEntry(0,0));
|
|
|
fp_t omega = 2.0 * Pi * freq * MEGA;
|
|
|
fp_t Exponent;
|
|
|
fp_t SinModul;
|
|
|
fp_t Neuman;
|
|
|
fp_t IncidExcit_E;
|
|
|
fp_t IncidExcit_H;
|
|
|
fp_t t;
|
|
|
|
|
|
for(int i = 0; i < bcCNT; i++){
|
|
|
bc bc_i = bcARRAY[i];
|
|
|
if(bc_i.getbType() == planeWaveType || bc_i.getbType() == pmlType){
|
|
|
fp_t Emagnitude = bc_i.getMagE();
|
|
|
fp_t theta_in_rad = bc_i.getTheta() * Pi / 180.0;
|
|
|
fp_t phi_in_rad = bc_i.getPhi() * Pi / 180.0;
|
|
|
vtr Epol = bc_i.getField();
|
|
|
vtr kvtr(sin(theta_in_rad) * cos(phi_in_rad), sin(theta_in_rad) * sin(phi_in_rad), cos(theta_in_rad));
|
|
|
vtr Hpol = kvtr * Epol;
|
|
|
vtr ro = bc_i.getPW_ro();
|
|
|
fp_t Hmagnitude = Emagnitude / eta;
|
|
|
|
|
|
Hpol.unitvtr();
|
|
|
Epol.unitvtr();
|
|
|
switch(ExcitFlag){
|
|
|
case 0: //(not tested)
|
|
|
|
|
|
if(Exponent >= 0.0){
|
|
|
// Plane wave E
|
|
|
t = dt * (timeStep + 1.0);
|
|
|
Exponent = t - To - dotP(kvtr, r - ro) / Vo;
|
|
|
SinModul = cos(omega * Exponent);
|
|
|
IncidExcit_E = Emagnitude * SinModul;
|
|
|
t = dt * (timeStep + 1.5);
|
|
|
Exponent = t - To - dotP(kvtr, r - ro) / Vo;
|
|
|
SinModul = cos(omega * Exponent);
|
|
|
IncidExcit_H = Hmagnitude * SinModul;
|
|
|
}else{
|
|
|
IncidExcit_E = 0.0;
|
|
|
IncidExcit_H = 0.0;
|
|
|
}
|
|
|
break;
|
|
|
case 1:
|
|
|
// Gauss Pulse
|
|
|
t = dt * (timeStep + 1.0);
|
|
|
Exponent = t - To - dotP(kvtr, r - ro) / Vo;
|
|
|
SinModul = ModuleFlag ? cos(omega * Exponent) : 1.0;
|
|
|
IncidExcit_E = Emagnitude * SinModul * exp(-(Exponent * Exponent) / (Tau * Tau));
|
|
|
t = dt * (timeStep + 1.5);
|
|
|
Exponent = t - To - dotP(kvtr, r - ro) / Vo;
|
|
|
SinModul = ModuleFlag ? cos(omega * Exponent) : 1.0;
|
|
|
IncidExcit_H = Hmagnitude * SinModul * exp(-(Exponent * Exponent) / (Tau * Tau));
|
|
|
break;
|
|
|
case 2: //(not tested)
|
|
|
// Neuman Pulse E
|
|
|
t = dt * (timeStep + 1.0);
|
|
|
Exponent = t - To - dotP(kvtr, r - ro) / Vo;
|
|
|
Neuman = (2.0 * Exponent) / (Tau * Tau);
|
|
|
IncidExcit_E = (Emagnitude * Neuman) * exp(-(Exponent * Exponent) / (Tau * Tau));
|
|
|
t = dt * (timeStep + 1.5);
|
|
|
Exponent = t - To - dotP(kvtr, r - ro) / Vo;
|
|
|
Neuman = (2.0 * Exponent) / (Tau * Tau);
|
|
|
IncidExcit_H = Hmagnitude * Neuman * exp(-(Exponent * Exponent) / (Tau * Tau));
|
|
|
break;
|
|
|
|
|
|
case 3:
|
|
|
{
|
|
|
// DC-Free Hann-Modulated Cosine Pulse (with time delay)
|
|
|
fp_t tdelay = To; // To represents the delay time
|
|
|
t = dt * (timeStep + 1.0);
|
|
|
Exponent = t - tdelay - dotP(kvtr, r - ro) / Vo;
|
|
|
|
|
|
if (Exponent >= 0.0 && Exponent <= Tau) {
|
|
|
// Shift exponent relative to pulse center
|
|
|
fp_t t_rel = Exponent - Tau / 2.0;
|
|
|
fp_t window = 0.5 * (1.0 - cos(2.0 * Pi * Exponent / Tau)); // Hann window
|
|
|
SinModul = cos(omega * t_rel);
|
|
|
IncidExcit_E = Emagnitude * SinModul * window;
|
|
|
} else {
|
|
|
IncidExcit_E = 0.0;
|
|
|
}
|
|
|
|
|
|
t = dt * (timeStep + 1.5);
|
|
|
Exponent = t - tdelay - dotP(kvtr, r - ro) / Vo;
|
|
|
|
|
|
if (Exponent >= 0.0 && Exponent <= Tau) {
|
|
|
fp_t t_rel = Exponent - Tau / 2.0;
|
|
|
fp_t window = 0.5 * (1.0 - cos(2.0 * Pi * Exponent / Tau)); // Hann window
|
|
|
SinModul = cos(omega * t_rel);
|
|
|
IncidExcit_H = Hmagnitude * SinModul * window;
|
|
|
} else {
|
|
|
IncidExcit_H = 0.0;
|
|
|
}
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
|
|
|
case 4: // Linear Chirp Excitation with sine start and Hann window
|
|
|
{
|
|
|
fp_t f_end = freq * MEGA;
|
|
|
fp_t B = Tau * MEGA;
|
|
|
fp_t f0 = f_end - B;
|
|
|
fp_t f1 = f_end;
|
|
|
fp_t Tchirp = To;
|
|
|
|
|
|
// Incident Electric Field (E)
|
|
|
t = dt * (timeStep + 1.0);
|
|
|
Exponent = t - dotP(kvtr, r - ro) / Vo;
|
|
|
if (Exponent >= 0.0 && Exponent <= Tchirp)
|
|
|
{
|
|
|
fp_t chirpArg = 2.0 * Pi * f0 * Exponent + Pi * (f1 - f0) / Tchirp * Exponent * Exponent;
|
|
|
fp_t window = 0.5 * (1.0 - cos(2.0 * Pi * Exponent / Tchirp)); // Hann window
|
|
|
IncidExcit_E = Emagnitude * sin(chirpArg) * window;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
IncidExcit_E = 0.0;
|
|
|
}
|
|
|
|
|
|
// Incident Magnetic Field (H)
|
|
|
t = dt * (timeStep + 1.5);
|
|
|
Exponent = t - To - dotP(kvtr, r - ro) / Vo;
|
|
|
if (Exponent >= 0.0 && Exponent <= Tchirp)
|
|
|
{
|
|
|
fp_t chirpArg = 2.0 * Pi * f0 * Exponent + Pi * (f1 - f0) / Tchirp * Exponent * Exponent;
|
|
|
fp_t window = 0.5 * (1.0 - cos(2.0 * Pi * Exponent / Tchirp)); // Hann window
|
|
|
IncidExcit_H = Hmagnitude * sin(chirpArg) * window;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
IncidExcit_H = 0.0;
|
|
|
}
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
break;
|
|
|
}
|
|
|
Einc = Epol * IncidExcit_E;
|
|
|
Hinc = Hpol * IncidExcit_H;
|
|
|
// cout << "Einc at: (" << r.getx() << ", " << r.gety() << ", " << r.getz() << ") = (" << Einc.getx() << ", " << Einc.gety() << ", " << Einc.getz() << ")" << endl;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::writeEquivalentSurfaceCurrents_(int timeStep){
|
|
|
int i, j;
|
|
|
int m;
|
|
|
int index;
|
|
|
int FaceNum;
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
fp_t Area;
|
|
|
vtr NormalVtr;
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
vtr coord[4];
|
|
|
vtr eLocal[4];
|
|
|
vtr hLocal[4];
|
|
|
vtr eLocalFace[3];
|
|
|
vtr hLocalFace[3];
|
|
|
tetra* tet;
|
|
|
|
|
|
ArrayFP<fp_t>* origEn_1 = new ArrayFP<fp_t>(TetPolyOrderDim[PolyFlag]);
|
|
|
ArrayFP<fp_t>* origHn_32 = new ArrayFP<fp_t>(TetPolyOrderDim[PolyFlag]);
|
|
|
|
|
|
char Currents_vtkFile[StrOutput];
|
|
|
sprintf(Currents_vtkFile, "Currents_%s_%04d", fname, timeStep);
|
|
|
|
|
|
// fill the port field with averaged values
|
|
|
vtr* JField = new vtr[SurfMesh->nodeCNT];
|
|
|
vtr* MField = new vtr[SurfMesh->nodeCNT];
|
|
|
int* count = new int[SurfMesh->nodeCNT];
|
|
|
memset(count, 0, SurfMesh->nodeCNT * sizeof(int));
|
|
|
|
|
|
regMface = new Register[SurfMesh->faceCNT];
|
|
|
regJface = new Register[SurfMesh->faceCNT];
|
|
|
|
|
|
for(i = 0; i < SurfMesh->faceCNT; i++){
|
|
|
SurfMesh->fcArray[i]->getAreaNormal(&Area, &NormalVtr);
|
|
|
tet = SurfMesh->fcArray[i]->hydra[0];
|
|
|
tet->geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
Get_Coefficients_(tet, origEn_1, origHn_32);
|
|
|
|
|
|
for(m = 0; m < NumOfFaces; m++){
|
|
|
zeta[m] = 0.0;
|
|
|
if(SurfMesh->fcArray[i] == tet->getFacePtr(m))
|
|
|
FaceNum = m;
|
|
|
}
|
|
|
|
|
|
for(j = 0; j < 4; j++){
|
|
|
zeta[0] = BaryCoord[j][0];
|
|
|
zeta[1] = BaryCoord[j][1];
|
|
|
zeta[2] = BaryCoord[j][2];
|
|
|
zeta[3] = BaryCoord[j][3];
|
|
|
eLocal[j] = CalcEfield(origEn_1->getEntryPtr(), avtr, vol, zeta, tet->PolyOrderFlag);
|
|
|
hLocal[j] = CalcEfield(origHn_32->getEntryPtr(), avtr, vol, zeta, tet->PolyOrderFlag);
|
|
|
}
|
|
|
|
|
|
regMface[i].initial(3);
|
|
|
regJface[i].initial(3);
|
|
|
for(j = 0; j < 3; j++){
|
|
|
eLocalFace[j] = eLocal[faceMAP[FaceNum][j]];
|
|
|
hLocalFace[j] = hLocal[faceMAP[FaceNum][j]];
|
|
|
index = SurfMesh->globToLocMap_->find(SurfMesh->fcArray[i]->getNode(j)->getid())->second;
|
|
|
MField[index] = MField[index] + NormalVtr * eLocalFace[j] * (-1.0);
|
|
|
JField[index] = JField[index] + NormalVtr * hLocalFace[j] * (1.0);
|
|
|
// No averaging
|
|
|
regMface[i].setField(j, NormalVtr * eLocalFace[j] * (-1.0));
|
|
|
regJface[i].setField(j, NormalVtr * hLocalFace[j] * (1.0));
|
|
|
count[index] += 1;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// This is for visualization in the vtk format
|
|
|
for(i = 0; i < SurfMesh->nodeCNT; i++){
|
|
|
MField[i] = MField[i] / static_cast<fp_t>(count[i]);
|
|
|
JField[i] = JField[i] / static_cast<fp_t>(count[i]);
|
|
|
}
|
|
|
|
|
|
node** locNodeArray = new node*[SurfMesh->nodeCNT];
|
|
|
for(i = 0; i < SurfMesh->nodeCNT; i++){
|
|
|
node& Node = *(SurfMesh->ndArray[i]);
|
|
|
int index = SurfMesh->globToLocMap_->find(Node.getid())->second;
|
|
|
locNodeArray[index] = new node(index, Node.getPType(), Node.getSingOrder(), Node.getCoord().getx(), Node.getCoord().gety(), Node.getCoord().getz());
|
|
|
}
|
|
|
|
|
|
face** locFaceArray = new face*[SurfMesh->faceCNT];
|
|
|
for(i = 0; i < SurfMesh->faceCNT; i++){
|
|
|
face& Face = *(SurfMesh->fcArray[i]);
|
|
|
locFaceArray[i] = new face(Face);
|
|
|
locFaceArray[i]->setFace(
|
|
|
locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(0)->getid())->second],
|
|
|
locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(1)->getid())->second],
|
|
|
locNodeArray[SurfMesh->globToLocMap_->find(Face.getNode(2)->getid())->second]);
|
|
|
}
|
|
|
|
|
|
// Vtk
|
|
|
VtkWriter vtkWriter(1.);
|
|
|
vtkWriter.writeTriUg(Currents_vtkFile, SurfMesh->nodeCNT, locNodeArray, SurfMesh->faceCNT, locFaceArray, MField, JField, 1);
|
|
|
|
|
|
// Register
|
|
|
char regFileName[StrOutput];
|
|
|
char regFileNameDebug[StrOutput];
|
|
|
memset(regFileName, 0, StrOutput * sizeof(char));
|
|
|
sprintf(regFileName, "Currents_%s_%05d", fname, timeStep);
|
|
|
sprintf(regFileNameDebug, "Currents_%s_%05d_dbg", fname, timeStep);
|
|
|
|
|
|
printRegister(regMface, regJface, SurfMesh->faceCNT, regFileName,1);
|
|
|
// printRegisterDebug(regMface, regJface, SurfMesh->faceCNT, regFileNameDebug,2);
|
|
|
|
|
|
if(timeStep == 0)
|
|
|
printTriMesh(SurfMesh->nodeCNT, locNodeArray, SurfMesh->faceCNT, locFaceArray, fname);
|
|
|
|
|
|
for(i = 0; i < SurfMesh->nodeCNT; i++)
|
|
|
delete locNodeArray[i];
|
|
|
delete [] locNodeArray;
|
|
|
for(i = 0; i < SurfMesh->faceCNT; i++)
|
|
|
delete locFaceArray[i];
|
|
|
delete [] locFaceArray;
|
|
|
delete [] MField;
|
|
|
delete [] JField;
|
|
|
delete [] count;
|
|
|
delete origEn_1;
|
|
|
delete origHn_32;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Print face registers
|
|
|
void FemGrp::printRegister(Register* regMface, Register* regJface, int FaceCnt, char *prjName, int order){
|
|
|
int i, j;
|
|
|
char fnameJ3[180];
|
|
|
char fnameM3[180];
|
|
|
|
|
|
sprintf(fnameM3, "%s_BC.curM", prjName);
|
|
|
sprintf(fnameJ3, "%s_BC.curJ", prjName);
|
|
|
|
|
|
ofstream foutJ3, foutM3;
|
|
|
foutM3.open(fnameM3, ios::out);
|
|
|
foutJ3.open(fnameJ3, ios::out);
|
|
|
|
|
|
for(i = 0; i < FaceCnt; i++){
|
|
|
if(order == 1){
|
|
|
for(j = 0; j < 3; j ++){
|
|
|
foutM3 << regMface[i].getField(j).getx() << endl;
|
|
|
foutM3 << regMface[i].getField(j).gety() << endl;
|
|
|
foutM3 << regMface[i].getField(j).getz() << endl;
|
|
|
}
|
|
|
foutM3 << endl;
|
|
|
for(j = 0; j < 3; j ++){
|
|
|
foutJ3 << regJface[i].getField(j).getx() << endl;
|
|
|
foutJ3 << regJface[i].getField(j).gety() << endl;
|
|
|
foutJ3 << regJface[i].getField(j).getz() << endl;
|
|
|
}
|
|
|
foutJ3 << endl;
|
|
|
}else if(order == 2){
|
|
|
for(j = 0; j < 3; j ++){
|
|
|
foutM3 << regMface[i].getField(j).getx() << endl;
|
|
|
foutM3 << regMface[i].getField(j).gety() << endl;
|
|
|
foutM3 << regMface[i].getField(j).getz() << endl;
|
|
|
}
|
|
|
for(j = 0 ; j < 3 ; j++){
|
|
|
int index0 = First2Second[j][0];
|
|
|
int index1 = First2Second[j][1];
|
|
|
foutM3 << 0.5 * (regMface[i].getField(index0).getx() + regMface[i].getField(index1).getx()) << endl;
|
|
|
foutM3 << 0.5 * (regMface[i].getField(index0).gety() + regMface[i].getField(index1).gety()) << endl;
|
|
|
foutM3 << 0.5 * (regMface[i].getField(index0).getz() + regMface[i].getField(index1).getz()) << endl;
|
|
|
}
|
|
|
foutM3 << endl;
|
|
|
|
|
|
for(j = 0; j < 3; j ++){
|
|
|
foutJ3 << regJface[i].getField(j).getx() << endl;
|
|
|
foutJ3 << regJface[i].getField(j).gety() << endl;
|
|
|
foutJ3 << regJface[i].getField(j).getz() << endl;
|
|
|
}
|
|
|
|
|
|
for(j = 0 ; j < 3 ; j++){
|
|
|
int index0 = First2Second[j][0];
|
|
|
int index1 = First2Second[j][1];
|
|
|
foutJ3 << 0.5 * (regJface[i].getField(index0).getx() + regJface[i].getField(index1).getx()) << endl;
|
|
|
foutJ3 << 0.5 * (regJface[i].getField(index0).gety() + regJface[i].getField(index1).gety()) << endl;
|
|
|
foutJ3 << 0.5 * (regJface[i].getField(index0).getz() + regJface[i].getField(index1).getz()) << endl;
|
|
|
}
|
|
|
|
|
|
foutJ3 << endl;
|
|
|
}
|
|
|
}
|
|
|
foutJ3.close();
|
|
|
foutM3.close();
|
|
|
}
|
|
|
|
|
|
// Print out Outer Surface node & triangle info on *.tri
|
|
|
void FemGrp::printTriMesh(int ndNum, node **ndArray, int fcNum, face **fcArray, char *prjName){
|
|
|
int i;
|
|
|
face* fcPtr;
|
|
|
FILE* fd;
|
|
|
char triName[360];
|
|
|
|
|
|
sprintf(triName, "%s.tri", prjName);
|
|
|
|
|
|
fd = fopen(triName, "wt");
|
|
|
fprintf(fd, "%f\n", unit);
|
|
|
fprintf(fd, "%d\n", ndNum);
|
|
|
|
|
|
for(i = 0; i < ndNum; i ++){
|
|
|
fprintf(fd, "%f %f %f\n",
|
|
|
(ndArray[i]->getCoord().getx()) / unit,
|
|
|
(ndArray[i]->getCoord().gety()) / unit,
|
|
|
(ndArray[i]->getCoord().getz()) / unit);
|
|
|
}
|
|
|
|
|
|
fprintf(fd,"%d\n", fcNum);
|
|
|
for(i = 0; i < fcNum; i ++){
|
|
|
fcPtr = fcArray[i];
|
|
|
node* n0Ptr;
|
|
|
node* n1Ptr;
|
|
|
node* n2Ptr;
|
|
|
|
|
|
n0Ptr = fcPtr->getNode(0);
|
|
|
n1Ptr = fcPtr->getNode(1);
|
|
|
n2Ptr = fcPtr->getNode(2);
|
|
|
fprintf(fd, "%d %d %d\n", n0Ptr->getid(), n1Ptr->getid(), n2Ptr->getid());
|
|
|
}
|
|
|
fclose(fd);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Modified by qi jian to compute the L2 error at the probes
|
|
|
void FemGrp::CalculateL2ErrorProbes(int& timeStep, fp_t dt, int TimeDistFlag){
|
|
|
int i, j;
|
|
|
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
vtr eLocal;
|
|
|
vtr hLocal;
|
|
|
vtr eLocal_exa;
|
|
|
vtr hLocal_exa;
|
|
|
|
|
|
vtr eLocal_all;
|
|
|
vtr hLocal_all;
|
|
|
vtr eLocal_exa_all;
|
|
|
vtr hLocal_exa_all;
|
|
|
|
|
|
fp_t E_coeff[TetPolyOrderDim[getPolyFlag()]];
|
|
|
fp_t H_coeff[TetPolyOrderDim[getPolyFlag()]];
|
|
|
fp_t IntegrOmegaE = 0.0;
|
|
|
fp_t IntegrOmegaH = 0.0;
|
|
|
vtr r;
|
|
|
vtr Exa_NumE;
|
|
|
vtr Exa_NumH;
|
|
|
char Error_E_TimeLog[180];
|
|
|
char Error_H_TimeLog[180];
|
|
|
|
|
|
int outOfModelProbes = 0;
|
|
|
|
|
|
for(i = 0; i < probeCNT; i++)
|
|
|
{
|
|
|
|
|
|
int number_of_associated_tets = probes_bary.at(i).first;
|
|
|
|
|
|
eLocal.reset();
|
|
|
hLocal.reset();
|
|
|
|
|
|
std::vector<std::pair<int, std::array<double, 4>>> found_tets = probes_bary.at(i).second;
|
|
|
eLocal_exa.reset();
|
|
|
hLocal_exa.reset();
|
|
|
|
|
|
eLocal_all.reset();
|
|
|
hLocal_all.reset();
|
|
|
eLocal_exa_all.reset();
|
|
|
hLocal_exa_all.reset();
|
|
|
|
|
|
for (int t = 0; t < number_of_associated_tets; t++)
|
|
|
{
|
|
|
|
|
|
int tet_id = found_tets.at(t).first;
|
|
|
array<double,4> probe_bary_coord = found_tets.at(t).second;
|
|
|
tetra& tet = tetARRAY[tet_id];
|
|
|
|
|
|
int tetraMAP[TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
|
|
|
tet.geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
// Compute the Efield
|
|
|
tet.Local_DG_mapE(tetraMAP, tet.LocalOffsetE);
|
|
|
for(j = 0 ; j < TetPolyOrderDim[tet.PolyOrderFlag] ; j++){
|
|
|
if(tetraMAP[j] < 0)
|
|
|
E_coeff[j] = 0.0;
|
|
|
else
|
|
|
E_coeff[j] = en_1->getentry(tetraMAP[j]);
|
|
|
}
|
|
|
|
|
|
// Compute the Hfield
|
|
|
tet.Local_DG_mapH(tetraMAP, tet.LocalOffsetH);
|
|
|
for(j = 0 ; j < TetPolyOrderDim[tet.PolyOrderFlag] ; j++){
|
|
|
if(tetraMAP[j] < 0)
|
|
|
H_coeff[j] = 0.0;
|
|
|
else
|
|
|
H_coeff[j] = hn_32->getentry(tetraMAP[j]);
|
|
|
}
|
|
|
|
|
|
eLocal.reset();
|
|
|
hLocal.reset();
|
|
|
eLocal_exa.reset();
|
|
|
hLocal_exa.reset();
|
|
|
|
|
|
|
|
|
|
|
|
zeta[0] = static_cast<fp_t>(probe_bary_coord[0]);
|
|
|
zeta[1] = static_cast<fp_t>(probe_bary_coord[1]);
|
|
|
zeta[2] = static_cast<fp_t>(probe_bary_coord[2]);
|
|
|
zeta[3] = static_cast<fp_t>(probe_bary_coord[3]);
|
|
|
SimplexToCartesian(tet, r, zeta);
|
|
|
|
|
|
eLocal = CalcEfield(E_coeff, avtr, vol, zeta, tet.PolyOrderFlag);
|
|
|
hLocal = CalcEfield(H_coeff, avtr, vol, zeta, tet.PolyOrderFlag);
|
|
|
GetExactSolution(tet, r, eLocal_exa, hLocal_exa, timeStep, dt, TimeDistFlag);
|
|
|
|
|
|
|
|
|
// Add all the local fields from all relevant tets
|
|
|
eLocal_all = eLocal_all + eLocal;
|
|
|
hLocal_all = hLocal_all + hLocal;
|
|
|
eLocal_exa_all = eLocal_exa_all + eLocal_exa;
|
|
|
hLocal_exa_all = hLocal_exa_all + hLocal_exa;
|
|
|
|
|
|
}
|
|
|
|
|
|
eLocal_all = eLocal_all / ((fp_t) number_of_associated_tets);
|
|
|
hLocal_all = hLocal_all / ((fp_t) number_of_associated_tets);
|
|
|
eLocal_exa_all = eLocal_exa_all / ((fp_t) number_of_associated_tets);
|
|
|
hLocal_exa_all = hLocal_exa_all / ((fp_t) number_of_associated_tets);
|
|
|
|
|
|
|
|
|
Exa_NumE = eLocal_exa_all - eLocal_all;
|
|
|
Exa_NumH = hLocal_exa_all - hLocal_all;
|
|
|
|
|
|
IntegrOmegaE += Exa_NumE.magnitude() * Exa_NumE.magnitude();
|
|
|
IntegrOmegaH += Exa_NumH.magnitude() * Exa_NumH.magnitude();
|
|
|
|
|
|
sprintf(Error_E_TimeLog, "%s_Probe_%d.TDerrorE", fname, i);
|
|
|
sprintf(Error_H_TimeLog, "%s_Probe_%d.TDerrorH", fname, i);
|
|
|
|
|
|
ofstream Error_E(Error_E_TimeLog, ios_base::out | ios::app);
|
|
|
Error_E.setf(ios::scientific, ios::floatfield);
|
|
|
Error_E.precision(15);
|
|
|
|
|
|
if(!Error_E)
|
|
|
cout << "Error in opening file: " << Error_E_TimeLog << " for write " << endl;
|
|
|
|
|
|
Error_E << "[" << (timeStep + 1.0) * dt << ", " << Exa_NumE.magnitude() << "]; \n";
|
|
|
Error_E.close();
|
|
|
|
|
|
ofstream Error_H(Error_H_TimeLog, ios_base::out | ios::app);
|
|
|
Error_H.setf(ios::scientific, ios::floatfield);
|
|
|
Error_H.precision(15);
|
|
|
|
|
|
if(!Error_H)
|
|
|
cout << "Error in opening file: " << Error_H_TimeLog << " for write " << endl;
|
|
|
|
|
|
Error_H << "[" << (timeStep + 1.5) * dt << ", " << Exa_NumH.magnitude() << "]; \n";
|
|
|
Error_H.close();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Write to file
|
|
|
if(outOfModelProbes < probeCNT)
|
|
|
{
|
|
|
sprintf(Error_E_TimeLog, "%s_Probes_Global.TDerrorE", fname);
|
|
|
sprintf(Error_H_TimeLog, "%s_Probes_Global.TDerrorH", fname);
|
|
|
|
|
|
ofstream Error_E(Error_E_TimeLog, ios_base::out | ios::app);
|
|
|
Error_E.setf(ios::scientific, ios::floatfield);
|
|
|
Error_E.precision(15);
|
|
|
|
|
|
if(!Error_E)
|
|
|
cout << "Error in opening file: " << Error_E_TimeLog << " for write " << endl;
|
|
|
|
|
|
Error_E << "[" << (timeStep + 1.0) * dt << ", " << sqrt(IntegrOmegaE / (probeCNT - outOfModelProbes)) << "]; \n";
|
|
|
Error_E.close();
|
|
|
|
|
|
ofstream Error_H(Error_H_TimeLog, ios_base::out | ios::app);
|
|
|
Error_H.setf(ios::scientific, ios::floatfield);
|
|
|
Error_H.precision(15);
|
|
|
|
|
|
if(!Error_H)
|
|
|
cout << "Error in opening file: " << Error_H_TimeLog << " for write " << endl;
|
|
|
|
|
|
Error_H << "[" << (timeStep + 1.5) * dt << ", " << sqrt(IntegrOmegaH / (probeCNT - outOfModelProbes)) << "]; \n";
|
|
|
Error_H.close();
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::CalculateL2Error(int& timeStep, fp_t dt, int TimeDistFlag){
|
|
|
int i, j;
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
vtr coord[4];
|
|
|
vtr eLocal[4];
|
|
|
vtr hLocal[4];
|
|
|
vtr eLocal_exa[4];
|
|
|
vtr hLocal_exa[4];
|
|
|
int QuadOrder = 2; //TODO: Recheck with the order of the basis
|
|
|
int points = 4;
|
|
|
fp_t** ZetaMat = new fp_t*[points];
|
|
|
fp_t* weights = new fp_t[points];
|
|
|
for(int i = 0; i < points; i++)
|
|
|
ZetaMat[i] = new fp_t[4];
|
|
|
GetTetQuadRule(QuadOrder, points, ZetaMat, weights);
|
|
|
|
|
|
fp_t IntegrOmegaE = 0.0;
|
|
|
fp_t IntegrOmegaH = 0.0;
|
|
|
fp_t NormalizeOmegaE = 0.0;
|
|
|
fp_t NormalizeOmegaH = 0.0;
|
|
|
|
|
|
for(i = 0; i < tetraCNT; i++){
|
|
|
tetra& tet = tetARRAY[i];
|
|
|
int tetraMAP_E[TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
int tetraMAP_H[TetPolyOrderDim[tet.PolyOrderFlag]];
|
|
|
auto origEn_1 = new ArrayFP<fp_t>(TetPolyOrderDim[tet.PolyOrderFlag]);
|
|
|
auto origHn_32 = new ArrayFP<fp_t>(TetPolyOrderDim[tet.PolyOrderFlag]);
|
|
|
for(j = 0; j < 4; j++){
|
|
|
coord[j] = (tet.getNode(j))->getCoord();
|
|
|
}
|
|
|
tet.geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
|
|
|
tet.Local_DG_mapE(tetraMAP_E, tet.LocalOffsetE);
|
|
|
tet.Local_DG_mapH(tetraMAP_H, tet.LocalOffsetH);
|
|
|
origEn_1->reset();
|
|
|
origHn_32->reset();
|
|
|
for(j = 0 ; j < TetPolyOrderDim[tet.PolyOrderFlag]; j++){
|
|
|
if(tetraMAP_E[j] < 0)
|
|
|
origEn_1->setentry(j, 0.0);
|
|
|
else
|
|
|
origEn_1->setentry(j, en_1->getentry(tetraMAP_E[j]));
|
|
|
|
|
|
if(tetraMAP_H[j] < 0)
|
|
|
origHn_32->setentry(j, 0.0);
|
|
|
else
|
|
|
origHn_32->setentry(j, hn_32->getentry(tetraMAP_H[j]));
|
|
|
}
|
|
|
|
|
|
fp_t IntegrValueE = 0.0;
|
|
|
fp_t IntegrValueH = 0.0;
|
|
|
fp_t NormalizeValueE = 0.0;
|
|
|
fp_t NormalizeValueH = 0.0;
|
|
|
|
|
|
vtr r;
|
|
|
vtr Exa_NumE;
|
|
|
Exa_NumE.reset();
|
|
|
vtr Exa_NumH;
|
|
|
Exa_NumH.reset();
|
|
|
|
|
|
//Tetrahedron integration
|
|
|
for(j = 0; j < points; j++){
|
|
|
zeta[0] = ZetaMat[j][0];
|
|
|
zeta[1] = ZetaMat[j][1];
|
|
|
zeta[2] = ZetaMat[j][2];
|
|
|
zeta[3] = ZetaMat[j][3];
|
|
|
SimplexToCartesian(tet, r, zeta);
|
|
|
eLocal[j] = CalcEfield(origEn_1->getEntryPtr(), avtr, vol, zeta, tet.PolyOrderFlag);
|
|
|
hLocal[j] = CalcEfield(origHn_32->getEntryPtr(), avtr, vol, zeta, tet.PolyOrderFlag);
|
|
|
GetExactSolution(tet, r, eLocal_exa[j], hLocal_exa[j], timeStep, dt, TimeDistFlag);
|
|
|
|
|
|
Exa_NumE = eLocal_exa[j] - eLocal[j];
|
|
|
Exa_NumH = hLocal_exa[j] - hLocal[j];
|
|
|
IntegrValueE += weights[j] * vol * (Exa_NumE.magnitude() * Exa_NumE.magnitude());
|
|
|
IntegrValueH += weights[j] * vol * (Exa_NumH.magnitude() * Exa_NumH.magnitude());
|
|
|
NormalizeValueE += weights[j] * vol * (eLocal_exa[j].magnitude() * eLocal_exa[j].magnitude());
|
|
|
NormalizeValueH += weights[j] * vol * (hLocal_exa[j].magnitude() * hLocal_exa[j].magnitude());
|
|
|
}
|
|
|
|
|
|
IntegrOmegaE = IntegrOmegaE + IntegrValueE;
|
|
|
IntegrOmegaH = IntegrOmegaH + IntegrValueH;
|
|
|
NormalizeOmegaE = NormalizeOmegaE + NormalizeValueE;
|
|
|
NormalizeOmegaH = NormalizeOmegaH + NormalizeValueH;
|
|
|
}
|
|
|
// Write to file
|
|
|
char Error_E_TimeLog[180];
|
|
|
char Error_H_TimeLog[180];
|
|
|
sprintf(Error_E_TimeLog, "%s.TDerrorE", fname);
|
|
|
sprintf(Error_H_TimeLog, "%s.TDerrorH", fname);
|
|
|
|
|
|
ofstream Error_E(Error_E_TimeLog, ios_base::out | ios::app);
|
|
|
Error_E.setf(ios::scientific, ios::floatfield);
|
|
|
Error_E.precision(15);
|
|
|
if(!Error_E)
|
|
|
cout << "Error in opening file: " << Error_E_TimeLog << " for write " << endl;
|
|
|
|
|
|
Error_E << "[" << (timeStep + 1.0) * dt << ", " << sqrt(IntegrOmegaE) << "]; \n";
|
|
|
Error_E.close();
|
|
|
|
|
|
ofstream Error_H(Error_H_TimeLog, ios_base::out | ios::app);
|
|
|
Error_H.setf(ios::scientific, ios::floatfield);
|
|
|
Error_H.precision(15);
|
|
|
if(!Error_H)
|
|
|
cout << "Error in opening file: " << Error_H_TimeLog << " for write " << endl;
|
|
|
|
|
|
Error_H << "[" << (timeStep + 1.5) * dt << ", " << sqrt(IntegrOmegaH) << "]; \n";
|
|
|
Error_H.close();
|
|
|
|
|
|
delete[] weights;
|
|
|
for(i = 0; i < points; i++)
|
|
|
delete[] ZetaMat[i];
|
|
|
delete[] ZetaMat;
|
|
|
}
|
|
|
|
|
|
void FemGrp::SimplexToCartesian(tetra& tet, vtr& r, fp_t zeta[4]){
|
|
|
fp_t x = 0.;
|
|
|
fp_t y = 0.;
|
|
|
fp_t z = 0.;
|
|
|
for(int i = 0; i < 4 ; i++){
|
|
|
x += tet.getNode(i)->getCoord().getx() * zeta[i];
|
|
|
y += tet.getNode(i)->getCoord().gety() * zeta[i];
|
|
|
z += tet.getNode(i)->getCoord().getz() * zeta[i];
|
|
|
}
|
|
|
r.setvtr(x, y, z);
|
|
|
}
|
|
|
|
|
|
void FemGrp::GetExactSolution(tetra& tet, vtr& r, vtr& Einc, vtr& Hinc, int timeStep, fp_t dt, int Flag){
|
|
|
fp_t to = To;
|
|
|
fp_t tau = Tau;
|
|
|
fp_t eta = No * sqrt(tet.mat->mur.getEntry(0,0) / tet.mat->epsr.getEntry(0,0));
|
|
|
fp_t V_light = Vo / sqrt(tet.mat->epsr.getEntry(0,0) * tet.mat->mur.getEntry(0,0));
|
|
|
fp_t Neuman;
|
|
|
fp_t Frequency = freq;
|
|
|
fp_t omega = 2.0 * Pi * Frequency * MEGA;
|
|
|
|
|
|
fp_t Exponent;
|
|
|
fp_t SinModul;
|
|
|
|
|
|
for(int i = 0; i < bcCNT; i++){
|
|
|
bc bc_i = bcARRAY[i];
|
|
|
if(bc_i.getbType() == planeWaveType || bc_i.getbType() == pmlType){
|
|
|
fp_t Emagnitude = bc_i.getMagE();
|
|
|
fp_t theta_in_rad = bc_i.getTheta() * Pi / 180.0;
|
|
|
fp_t phi_in_rad = bc_i.getPhi() * Pi / 180.0;
|
|
|
vtr Epol = bc_i.getField();
|
|
|
vtr kvtr(sin(theta_in_rad) * cos(phi_in_rad), sin(theta_in_rad) * sin(phi_in_rad), cos(theta_in_rad));
|
|
|
kvtr.unitvtr();
|
|
|
vtr Hpol = kvtr * Epol;
|
|
|
vtr ro = bc_i.getPW_ro();
|
|
|
fp_t Hmagnitude = Emagnitude / eta;
|
|
|
|
|
|
Hpol.unitvtr();
|
|
|
Epol.unitvtr();
|
|
|
switch (Flag){
|
|
|
case 0:
|
|
|
kvtr.Scale((omega / V_light));
|
|
|
Hinc = Hpol * (Hmagnitude * cos(dotP(kvtr, r - ro) - omega * (timeStep + 1.5) * dt));
|
|
|
Einc = Epol * (Emagnitude * cos(dotP(kvtr, r - ro) - omega * (timeStep + 1.0) * dt));
|
|
|
break;
|
|
|
|
|
|
case 1:
|
|
|
Exponent = (timeStep + 1.0) * dt - to - (dotP(kvtr, r - ro) / V_light);
|
|
|
SinModul = ModuleFlag ? cos(omega * Exponent) : 1.0;
|
|
|
Einc = Epol * SinModul * (Emagnitude * exp(- (Exponent * Exponent) / (tau * tau)));
|
|
|
Exponent = (timeStep + 1.5) * dt - to - (dotP(kvtr, r - ro) / V_light);
|
|
|
SinModul = ModuleFlag ? cos(omega * Exponent) : 1.0;
|
|
|
Hinc = Hpol * SinModul * (Hmagnitude * exp(- (Exponent * Exponent) / (tau * tau)));
|
|
|
break;
|
|
|
|
|
|
case 2:
|
|
|
Exponent = (timeStep + 1.5) * dt - to - (dotP(kvtr, r - ro) / V_light);
|
|
|
Neuman = (2.0 * Exponent) / (tau * tau);
|
|
|
Hinc = Hpol * (Hmagnitude * Neuman * exp(- (Exponent * Exponent) / (tau * tau)));
|
|
|
Exponent = (timeStep + 1.0) * dt - to - (dotP(kvtr, r - ro) / V_light);
|
|
|
Neuman = (2.0 * Exponent) / (tau * tau);
|
|
|
Einc = Epol * (Emagnitude * Neuman * exp(- (Exponent * Exponent) / (tau * tau)));
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/* "Early Time Behavior in Reverberation Chambers and
|
|
|
Its Effect on the Relationships Between Coherence
|
|
|
Bandwidth, Chamber Decay Time, RMS Delay
|
|
|
Spread, and the Chamber Buildup Time", Christopher L. Holloway et al. */
|
|
|
bool FemGrp::calculatePade(int currentTimeStep){
|
|
|
int M = currentTimeStep / tsPerSampling;
|
|
|
int totalSamples = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
int N = (int)floor(M / 2.0);
|
|
|
|
|
|
int finish = 0;
|
|
|
timer_start("Process : ", 'm');
|
|
|
#pragma omp parallel for schedule(static) shared(finish)
|
|
|
for(int pade = 0; pade < padeCNT; pade++){
|
|
|
int auxFinish = 0;
|
|
|
fp_t convergence = 0.0;
|
|
|
fp_t maxProbe = 0.0;
|
|
|
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
fp_t a_k[N] = {0};
|
|
|
fp_t b_k[N] = {0};
|
|
|
fp_t_ts maxValComponent = 0.0;
|
|
|
getPadeCoef(a_k, b_k, &fieldProbes[pade * totalSamples * NumOfFieldComponents], N, component, &maxValComponent);
|
|
|
maxProbe += maxValComponent;
|
|
|
convergence += maxValComponent * getFreqDomainPade(a_k, b_k, totalSamples, N, &tranferencePadeFunctionFD[pade * totalSamples * NumOfFieldComponents], component, pade, currentTimeStep / tsPerPade == 1);
|
|
|
|
|
|
cout << "Probe = " << pade << " Component = " << component << " Value = " << (convergence / maxProbe) << endl;
|
|
|
if((currentTimeStep / tsPerPade == 1 || (convergence / maxProbe) < PadeTolerance) && (component == NumOfUnitaryVectors - 1 || component == NumOfFieldComponents - 1)){
|
|
|
auxFinish++;
|
|
|
maxProbe = 0.0;
|
|
|
convergence = 0.0;
|
|
|
}
|
|
|
}
|
|
|
#pragma omp atomic update
|
|
|
finish += auxFinish;
|
|
|
}
|
|
|
timer_stop('m');
|
|
|
return finish == 0;
|
|
|
}
|
|
|
|
|
|
void FemGrp::calculatePadeEnd(int currentTimeStep){
|
|
|
int M = currentTimeStep / tsPerSampling;
|
|
|
int totalSamples = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
int N = (int)floor(M / 2.0);
|
|
|
|
|
|
int finish = 0;
|
|
|
timer_start("Process : ", 'm');
|
|
|
for(int pade = 0; pade < padeCNT; pade++){
|
|
|
Complex* FD = new Complex[totalSamples * NumOfFieldComponents];
|
|
|
|
|
|
// #pragma omp parallel for
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
fp_t a_k[N] = {0};
|
|
|
fp_t b_k[N] = {0};
|
|
|
|
|
|
fp_t_ts maxValComponent = 0.0;
|
|
|
timer_start("Coef: " + std::to_string(component) + ": ",'m');
|
|
|
getPadeCoef(a_k, b_k, &fieldProbes[pade * totalSamples * NumOfFieldComponents], N, component, &maxValComponent);
|
|
|
timer_stop('m');
|
|
|
timer_start("Freq Dom " + std::to_string(component) + ": ",'m');
|
|
|
getFreqDomainPade(a_k, b_k, totalSamples, N, FD, component, pade, true);
|
|
|
timer_stop('m');
|
|
|
}
|
|
|
// getPadeIFFTEnd(pade, FD);
|
|
|
timer_start("IFFF " + std::to_string(pade) + ": ",'m');
|
|
|
getPadeIFFT(pade, FD);
|
|
|
timer_stop('m');
|
|
|
delete[] FD;
|
|
|
|
|
|
cout << "Pade point exported: " << pade << endl;
|
|
|
}
|
|
|
timer_stop('m');
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
void FemGrp::calculatePadeEndCUDA(int currentTimeStep){
|
|
|
int M = currentTimeStep / tsPerSampling;
|
|
|
int totalSamples = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
int N = (int)floor(M / 2.0);
|
|
|
int nFields = padeCNT * NumOfFieldComponents;
|
|
|
int finish = 0;
|
|
|
timer_start("Process : ", 'm');
|
|
|
cudaStream_t* streams = (cudaStream_t*)malloc(NumOfFieldComponents * sizeof(cudaStream_t));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&padeFreqConstant_d, totalSamples * sizeof(int)));
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(padeFreqConstant_d, padeFreqConstant, totalSamples * sizeof(int), cudaMemcpyHostToDevice));
|
|
|
|
|
|
for(int i = 0; i < NumOfFieldComponents; i++){
|
|
|
cudaStreamCreate(&streams[i]);
|
|
|
}
|
|
|
|
|
|
cuDoubleComplex* Hf;
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Hf, totalSamples * nFields * sizeof(cuDoubleComplex), cudaHostAllocMapped));
|
|
|
|
|
|
for(int pade = 0; pade < padeCNT; pade++){
|
|
|
fp_t* maxValComponent = new fp_t[NumOfFieldComponents];
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
fp_t* a_k;
|
|
|
fp_t* b_k;
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&a_k, N * sizeof(fp_t), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&b_k, N * sizeof(fp_t), cudaHostAllocMapped));
|
|
|
|
|
|
cuDoubleComplex* FD = &Hf[totalSamples * (pade * NumOfFieldComponents + component)];
|
|
|
getPadeCoefCUDA(a_k, b_k, &maxValComponent[component], pade * NumOfFieldComponents + component, streams[component], currentTimeStep);
|
|
|
getFreqDomainPadeCUDA(a_k, b_k, totalSamples, N, FD, streams[component]);
|
|
|
}
|
|
|
getPadeIFFT(pade, &Hf[pade * totalSamples * NumOfFieldComponents]);
|
|
|
}
|
|
|
|
|
|
for(int i = 0; i < NumOfFieldComponents; i++){
|
|
|
cudaStreamDestroy(streams[i]);
|
|
|
}
|
|
|
|
|
|
timer_stop('m');
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(padeFreqConstant_d));
|
|
|
CUDA_SAFE_CALL(cudaFreeHost(Hf));
|
|
|
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
bool FemGrp::calculatePadeCUDA(int currentTimeStep, bool isFirst, bool isEnd){
|
|
|
if(isEnd){
|
|
|
FreeGPU();
|
|
|
}
|
|
|
|
|
|
int M = currentTimeStep / tsPerSampling;
|
|
|
int totalSamples = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
int N = (int)floor(M / 2.0);
|
|
|
int nFields = padeCNT * NumOfFieldComponents;
|
|
|
int finish = 0;
|
|
|
bool* exitArray = new bool[padeCNT];
|
|
|
|
|
|
timer_start("Process : ", 'm');
|
|
|
cudaStream_t* streams = (cudaStream_t*)malloc(NumOfFieldComponents * sizeof(cudaStream_t));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&padeFreqConstant_d, totalSamples * sizeof(int)));
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(padeFreqConstant_d, padeFreqConstant, totalSamples * sizeof(int), cudaMemcpyHostToDevice));
|
|
|
|
|
|
for(int i = 0; i < NumOfFieldComponents; i++){
|
|
|
cudaStreamCreate(&streams[i]);
|
|
|
}
|
|
|
|
|
|
int nPoints = isEnd ? probeCNT : padeCNT;
|
|
|
|
|
|
for(int pade = 0; pade < nPoints; pade++){
|
|
|
timer_start("Process : ", 'm');
|
|
|
fp_t* maxValComponent = new fp_t[NumOfFieldComponents];
|
|
|
cuDoubleComplex* Hf;
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Hf, totalSamples * NumOfFieldComponents * sizeof(cuDoubleComplex), cudaHostAllocMapped));
|
|
|
#pragma omp parallel for
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
fp_t* a_k;
|
|
|
fp_t* b_k;
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&a_k, N * sizeof(fp_t), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&b_k, N * sizeof(fp_t), cudaHostAllocMapped));
|
|
|
|
|
|
cuDoubleComplex* FD = &Hf[totalSamples * component];
|
|
|
getPadeCoefCUDA(a_k, b_k, &maxValComponent[component], pade * NumOfFieldComponents + component, streams[component], currentTimeStep);
|
|
|
getFreqDomainPadeCUDA(a_k, b_k, totalSamples, N, FD, streams[component]);
|
|
|
}
|
|
|
|
|
|
if(!isFirst && !isEnd){
|
|
|
exitArray[pade] = studyPadeConvergence(&tranferencePadeFunctionFD_h[pade * NumOfFieldComponents * totalSamples], Hf, maxValComponent, totalSamples, pade);
|
|
|
}
|
|
|
|
|
|
if(isEnd){
|
|
|
printFD(pade, Hf);
|
|
|
if(pade < padeCNT && writePadeTD){
|
|
|
getPadeIFFT(pade, Hf);
|
|
|
}
|
|
|
cout << "Final Pade Point " << pade << "completed" << endl;
|
|
|
}else{
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&tranferencePadeFunctionFD_h[pade * NumOfFieldComponents * totalSamples], Hf, totalSamples * NumOfFieldComponents * sizeof(cuDoubleComplex), cudaMemcpyHostToHost));
|
|
|
}
|
|
|
delete [] maxValComponent;
|
|
|
CUDA_SAFE_CALL(cudaFreeHost(Hf));
|
|
|
timer_stop('m');
|
|
|
}
|
|
|
|
|
|
for(int i = 0; i < NumOfFieldComponents; i++){
|
|
|
cudaStreamDestroy(streams[i]);
|
|
|
}
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(padeFreqConstant_d));
|
|
|
bool exitValue = false;
|
|
|
|
|
|
if(!isFirst && !isEnd){
|
|
|
for(int pade = 0; pade < padeCNT; pade++){
|
|
|
if(pade == 0){
|
|
|
exitValue = exitArray[0];
|
|
|
}else
|
|
|
exitValue = exitValue & exitArray[pade];
|
|
|
}
|
|
|
}
|
|
|
|
|
|
delete [] exitArray;
|
|
|
timer_stop('m');
|
|
|
return exitValue;
|
|
|
}
|
|
|
|
|
|
bool FemGrp::studyPadeConvergence(cuDoubleComplex* oldField, cuDoubleComplex* newField, fp_t* maxFields, int M_global, int point){
|
|
|
for(int typeOfField = 0; typeOfField < TypeOfFields; typeOfField++){
|
|
|
fp_t convergence = 0.0;
|
|
|
fp_t maxProbe = 0.0;
|
|
|
#pragma omp parallel for shared(convergence, maxProbe)
|
|
|
for(int component = 0; component < NumOfUnitaryVectors; component++){
|
|
|
fp_t sum_X = 0.0, sum_Y = 0.0, sum_XY = 0.0, sum_XX = 0.0, sum_YY = 0.0;
|
|
|
fp_t lastYf_abs = 0.0;
|
|
|
fp_t currentYf_abs = 0.0;
|
|
|
|
|
|
for(int i = 0; i < M_global; i++){
|
|
|
int arrayMap = component * M_global + i;
|
|
|
lastYf_abs = sqrt(pow(oldField[arrayMap].x,2)+pow(oldField[arrayMap].y,2));
|
|
|
currentYf_abs = sqrt(pow(newField[arrayMap].x,2)+pow(newField[arrayMap].y,2));
|
|
|
sum_X = sum_X + currentYf_abs;
|
|
|
sum_Y = sum_Y + lastYf_abs;
|
|
|
sum_XY = sum_XY + currentYf_abs * lastYf_abs;
|
|
|
sum_XX = sum_XX + currentYf_abs * currentYf_abs;
|
|
|
sum_YY = sum_YY + lastYf_abs * lastYf_abs;
|
|
|
}
|
|
|
#pragma omp atomic update
|
|
|
convergence += maxFields[component] * (M_global * sum_XY - sum_X * sum_Y) / sqrt((M_global * sum_XX - sum_X * sum_X) * (M_global * sum_YY - sum_Y * sum_Y));
|
|
|
#pragma omp atomic update
|
|
|
maxProbe += maxFields[component];
|
|
|
}
|
|
|
cout << "Convergence Point " << point << " Fields " << (typeOfField ? "H" : "E") << ": " << (convergence / maxProbe) << endl;
|
|
|
if((convergence / maxProbe) < PadeTolerance){
|
|
|
return false;
|
|
|
}
|
|
|
}
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
void FemGrp::getPadeCoefCUDA(fp_t* a_k, fp_t* b_k, fp_t* maxField, int local_id, cudaStream_t stream, int currentTimeStep){
|
|
|
int M = currentTimeStep / tsPerSampling;
|
|
|
int M_global = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
int N = (int)floor(M / 2.0);
|
|
|
int nFields = padeCNT * NumOfFieldComponents;
|
|
|
|
|
|
a_k[0] = fieldProbes[local_id];
|
|
|
b_k[0] = 1;
|
|
|
*maxField = abs(fieldProbes[local_id]);
|
|
|
cusolverDnHandle_t handle;
|
|
|
cusolverDnCreate(&handle);
|
|
|
cusolverDnSetStream(handle, stream);
|
|
|
|
|
|
int n = N-1;
|
|
|
// int n = 2;
|
|
|
int nrhs = 1;
|
|
|
|
|
|
fp_t* G_h;
|
|
|
fp_t* d_h;
|
|
|
|
|
|
fp_t* G_d;
|
|
|
fp_t* d_d;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&G_h, n * n * sizeof(fp_t), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&d_h, n * sizeof(fp_t), cudaHostAllocMapped));
|
|
|
|
|
|
for(int i = 0; i < n; i++){
|
|
|
for(int j = 0; j < n; j++){
|
|
|
G_h[j * n + i] = fieldProbes[(N - j + i) * probeCNT * NumOfFieldComponents + local_id];
|
|
|
*maxField = max(abs(fieldProbes[(N - j + i) * probeCNT * NumOfFieldComponents + local_id]), *maxField);
|
|
|
}
|
|
|
d_h[i] = -fieldProbes[(N + i + 1) * probeCNT * NumOfFieldComponents + local_id];
|
|
|
}
|
|
|
|
|
|
//Copy matrices
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&G_d, n * n * sizeof(fp_t)));
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&d_d, n * sizeof(fp_t)));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMemcpyAsync(G_d, G_h, n * n * sizeof(fp_t), cudaMemcpyHostToDevice, stream));
|
|
|
CUDA_SAFE_CALL(cudaMemcpyAsync(d_d, d_h, n * sizeof(fp_t), cudaMemcpyHostToDevice, stream));
|
|
|
|
|
|
//Calculate buffer
|
|
|
int bufferSize;
|
|
|
cusolverDnDgetrf_bufferSize(handle, n, n, G_d, n, &bufferSize);
|
|
|
|
|
|
//Initialize variables
|
|
|
int* info;
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&info, sizeof(int)));
|
|
|
|
|
|
fp_t* buffer; // workspace for gesv
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&buffer, bufferSize * sizeof(fp_t)));
|
|
|
|
|
|
int *ipiv = NULL; // pivoting sequence
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&ipiv, n * sizeof(int)));
|
|
|
|
|
|
//Solve problem
|
|
|
cusolverDnDgetrf(handle, n, n, G_d, n, buffer, ipiv, info);
|
|
|
cusolverDnDgetrs(handle, CUBLAS_OP_N, n, nrhs, G_d, n, ipiv, d_d, n, info);
|
|
|
|
|
|
//Copy data back to CPU
|
|
|
CUDA_SAFE_CALL(cudaMemcpyAsync(d_h, d_d, n * sizeof(fp_t), cudaMemcpyDeviceToHost, stream));
|
|
|
|
|
|
//Free GPU
|
|
|
CUDA_SAFE_CALL(cudaFree(G_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(d_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(buffer));
|
|
|
CUDA_SAFE_CALL(cudaFree(info));
|
|
|
CUDA_SAFE_CALL(cudaFree(ipiv));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaStreamSynchronize(stream));
|
|
|
cusolverDnDestroy(handle);
|
|
|
CUDA_SAFE_CALL(cudaFreeHost(G_h));
|
|
|
|
|
|
for(int i = 0; i < n; i++){
|
|
|
b_k[i + 1] = d_h[i];
|
|
|
a_k[i + 1] = 0.0;
|
|
|
for(int j = 0; j < i + 1; j++){
|
|
|
a_k[i + 1] += b_k[j] * fieldProbes[(i + 1 - j) * probeCNT * NumOfFieldComponents + local_id];
|
|
|
}
|
|
|
}
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFreeHost(d_h));
|
|
|
}
|
|
|
|
|
|
void FemGrp::getFreqDomainPadeCUDA(fp_t* a_k, fp_t* b_k, int M_global, int N, cuDoubleComplex* H_f, cudaStream_t stream){
|
|
|
fp_t* a_k_d;
|
|
|
fp_t* b_k_d;
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&a_k_d, N * sizeof(fp_t)));
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&b_k_d, N * sizeof(fp_t)));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMemcpyAsync(a_k_d, a_k, N * sizeof(fp_t), cudaMemcpyHostToDevice, stream));
|
|
|
CUDA_SAFE_CALL(cudaMemcpyAsync(b_k_d, b_k, N * sizeof(fp_t), cudaMemcpyHostToDevice, stream));
|
|
|
|
|
|
cuDoubleComplex* H_f_d;
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&H_f_d, M_global * sizeof(cuDoubleComplex)));
|
|
|
|
|
|
dim3 blockDim(256, 1, 1);
|
|
|
dim3 gridDim(ceil_div(M_global, 256), 1, 1);
|
|
|
|
|
|
CalculatePadeFreq<<<gridDim, blockDim, 0, stream>>>(a_k_d, b_k_d, M_global, N, padeFreqConstant_d, H_f_d);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMemcpyAsync(H_f, H_f_d, M_global * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost, stream));
|
|
|
CUDA_SAFE_CALL(cudaFree(a_k_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(b_k_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(H_f_d));
|
|
|
CUDA_SAFE_CALL(cudaStreamSynchronize(stream));
|
|
|
}
|
|
|
|
|
|
void FemGrp::getPadeIFFT(int probe, cuDoubleComplex* fDomainField){
|
|
|
int M_global = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
double* tDomainField = new double[M_global];
|
|
|
double* tDomainFieldOutput = new double[M_global * NumOfFieldComponents];
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
fftw_complex* fft;
|
|
|
fftw_plan ifft;
|
|
|
fft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * M_global);
|
|
|
#pragma omp parallel for
|
|
|
for(int k = 0; k < M_global; k++){
|
|
|
cuDoubleComplex field = fDomainField[component * M_global + k];
|
|
|
Complex aux = (std::complex<float>(field.x, field.y) / sourceFreqDomain[k]) / M_global;
|
|
|
fft[k][0] = (abs(sourceFreqDomain[k]) < SourceTolerancePade) ? 0.0 : aux.real();
|
|
|
fft[k][1] = (abs(sourceFreqDomain[k]) < SourceTolerancePade) ? 0.0 : aux.imag();
|
|
|
}
|
|
|
|
|
|
ifft = fftw_plan_dft_c2r_1d(M_global, fft, tDomainField, FFTW_ESTIMATE);
|
|
|
fftw_execute(ifft);
|
|
|
fftw_destroy_plan(ifft);
|
|
|
fftw_free(fft);
|
|
|
#pragma omp parallel for
|
|
|
for (int i = 0; i < M_global; i++) {
|
|
|
tDomainFieldOutput[component * M_global + i] = 0.0;
|
|
|
for (int j = 0; j <= min(i, tsSource); j++) {
|
|
|
tDomainFieldOutput[component * M_global + i] += tDomainField[i - j] * sourceTimeDomain[j]; // Main convolution operation
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
char csvFileName[StrOutput];
|
|
|
sprintf(csvFileName, "./PROBES/TD_Pade_%s_Probe_%d.csv", fname, probe);
|
|
|
std::ofstream csvFile(csvFileName);
|
|
|
csvFile << "Ex" << "," << "Ey" << "," << "Ez" << "," << "Hx" << "," << "Hy" << "," << "Hz" << "\n";
|
|
|
|
|
|
for(int n = 0; n < M_global; n++){
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
if (component > 0){
|
|
|
csvFile << ",";
|
|
|
}
|
|
|
csvFile << std::setprecision(max_precision) << tDomainFieldOutput[component * M_global + n];
|
|
|
}
|
|
|
csvFile << "\n";
|
|
|
}
|
|
|
usleep(100);
|
|
|
csvFile.close();
|
|
|
delete [] tDomainField;
|
|
|
delete [] tDomainFieldOutput;
|
|
|
}
|
|
|
|
|
|
void FemGrp::printFD(int probe, cuDoubleComplex* fDomainField){
|
|
|
int M_global = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
char csvFileName[StrOutput];
|
|
|
sprintf(csvFileName, "./PROBES/FD_Pade_%s_Probe_%d.csv", fname, probe);
|
|
|
std::ofstream csvFile(csvFileName);
|
|
|
csvFile << "ExRe" << "," << "ExIm" << "," << "EyRe" << "," << "EyIm" << "," << "EzRe" << "," << "EzIm" << "," << "HxRe" << "," << "HxIm" << "," << "HyRe" << "," << "HyIm" << "," << "HzRe" << "," << "HzIm" << "\n";
|
|
|
|
|
|
for(int n = 0; n < M_global; n++){
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
if (component > 0){
|
|
|
csvFile << ",";
|
|
|
}
|
|
|
csvFile << std::setprecision(max_precision) << fDomainField[component * M_global + n].x << "," << fDomainField[component * M_global + n].y;
|
|
|
}
|
|
|
csvFile << "\n";
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::testEnd(){
|
|
|
int ts = 0;
|
|
|
char tname[StrLenShort];
|
|
|
int totalSamples = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
try {
|
|
|
while(1){
|
|
|
sprintf(tname, "PROBES_aux/Probes_%s_%04i.csv", fname, ts * tsPerSampling);
|
|
|
cout << tname << endl;
|
|
|
rapidcsv::Document probe_doc(tname);
|
|
|
std::vector<double> Ex_col = probe_doc.GetColumn<double>("Ex");
|
|
|
std::vector<double> Ey_col = probe_doc.GetColumn<double>("Ey");
|
|
|
std::vector<double> Ez_col = probe_doc.GetColumn<double>("Ez");
|
|
|
std::vector<double> Hx_col = probe_doc.GetColumn<double>("Hx");
|
|
|
std::vector<double> Hy_col = probe_doc.GetColumn<double>("Hy");
|
|
|
std::vector<double> Hz_col = probe_doc.GetColumn<double>("Hz");
|
|
|
|
|
|
for(int i = 0; i < Ey_col.size(); i++){
|
|
|
// fieldProbes[i * totalSamples * NumOfFieldComponents + ts * NumOfFieldComponents + 0] = Ex_col[i];
|
|
|
// fieldProbes[i * totalSamples * NumOfUnitaryVectors * TypeO#pragma omp parallel forfFields + ts * NumOfFieldComponents + 5] = Hz_col[i];
|
|
|
// cout << ts * NumOfFieldComponents * Ey_col.size() + i * NumOfFieldComponents + 0 << endl;
|
|
|
fieldProbes[ts * NumOfFieldComponents * Ey_col.size() + i * NumOfFieldComponents + 0] = Ex_col[i];
|
|
|
fieldProbes[ts * NumOfFieldComponents * Ey_col.size() + i * NumOfFieldComponents + 1] = Ey_col[i];
|
|
|
fieldProbes[ts * NumOfFieldComponents * Ey_col.size() + i * NumOfFieldComponents + 2] = Ez_col[i];
|
|
|
fieldProbes[ts * NumOfFieldComponents * Ey_col.size() + i * NumOfFieldComponents + 3] = Hx_col[i];
|
|
|
fieldProbes[ts * NumOfFieldComponents * Ey_col.size() + i * NumOfFieldComponents + 4] = Hy_col[i];
|
|
|
fieldProbes[ts * NumOfFieldComponents * Ey_col.size() + i * NumOfFieldComponents + 5] = Hz_col[i];
|
|
|
}
|
|
|
ts++;
|
|
|
}
|
|
|
}catch(...){
|
|
|
calculatePadeCUDA(ts * tsPerSampling, false, true);
|
|
|
// for(int i = ts/8 ; i <= ts; i += ts/8){
|
|
|
// cout << i << " " << (ts+1)/8 << " " << ts << " " << (i == (ts/8)) << " " << (i == 8 * (ts/8)) << endl;
|
|
|
// cout << calculatePadeCUDA(i * tsPerSampling, i == ts/8, i == 8 * (ts/8)) << endl;
|
|
|
// }
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
void FemGrp::getPadeCoef(fp_t* a_k, fp_t* b_k, fp_t_ts* field, int N, int component, fp_t_ts* maxField){
|
|
|
denseMat<fp_t>* G = new denseMat<fp_t>(N-1, N-1);
|
|
|
ArrayFP<fp_t> d(N-1);
|
|
|
a_k[0] = field[component];
|
|
|
b_k[0] = 1;
|
|
|
*maxField = field[component];
|
|
|
// timer_start("Fill : ", 'm');
|
|
|
timer_start("getPadeCoef " + std::to_string(1) + ": ",'m');
|
|
|
for(int k = 0; k < N-1; k++){
|
|
|
for(int m = 0; m < N-1; m++){
|
|
|
G->setEntry(k,m, field[(N - m + k) * NumOfFieldComponents + component]); //it has to be in column form
|
|
|
*maxField = max(abs(field[(N - m + k) * NumOfFieldComponents + component]), *maxField);
|
|
|
}
|
|
|
d[k] = -field[(N + k + 1) * NumOfFieldComponents + component];
|
|
|
}
|
|
|
|
|
|
timer_stop('m');
|
|
|
timer_start("getPadeCoef " + std::to_string(2) + ": ",'m');
|
|
|
|
|
|
G->SelfTranspose();
|
|
|
|
|
|
timer_stop('m');
|
|
|
timer_start("getPadeCoef " + std::to_string(3) + ": ",'m');
|
|
|
solveAx_B(*G, d);
|
|
|
timer_stop('m');
|
|
|
timer_start("getPadeCoef " + std::to_string(4) + ": ",'m');
|
|
|
for(int k = 0; k < N-1; k++){
|
|
|
b_k[k + 1] = d[k];
|
|
|
for(int m = 0; m < k + 1; m++){
|
|
|
a_k[k + 1] += b_k[m] * field[(k + 1 - m) * NumOfFieldComponents + component];
|
|
|
}
|
|
|
}
|
|
|
timer_stop('m');
|
|
|
|
|
|
G->Clear();
|
|
|
for(int i = 0; i<N; i++){
|
|
|
cout << a_k[i] << " " << b_k[i] << endl;
|
|
|
}
|
|
|
// timer_stop('m');
|
|
|
}
|
|
|
|
|
|
void FemGrp::getPadeFreq(int N, int tsPerSampling){
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&padeFreqConstant, N * sizeof(int), cudaHostAllocMapped));
|
|
|
#endif
|
|
|
sourceFreqDomain = new Complex[N];
|
|
|
sourceTimeDomain = new fp_t[N];
|
|
|
|
|
|
#pragma omp parallel for
|
|
|
for(int i = 0; i < N; i++){
|
|
|
getSourceTimeDomain(i * tsPerSampling, &sourceTimeDomain[i], ExcitFlag);
|
|
|
if(abs(sourceTimeDomain[i]) > SourceTolerancePade){
|
|
|
tsSource = i;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
int finish = N % 2 == 0 ? N / 2 - 1 : (N - 1) / 2;
|
|
|
fftw_complex* fftOut;
|
|
|
fftOut = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
|
|
|
fftw_plan fft;
|
|
|
fft = fftw_plan_dft_r2c_1d(N, sourceTimeDomain, fftOut, FFTW_ESTIMATE);
|
|
|
fftw_execute(fft);
|
|
|
fftw_destroy_plan(fft);
|
|
|
|
|
|
#pragma omp parallel for
|
|
|
for (int i = 0; i < N; ++i) {
|
|
|
sourceFreqDomain[i] = std::complex<fp_t>(fftOut[i][0], fftOut[i][1]);
|
|
|
if (i <= finish) {
|
|
|
padeFreqConstant[i] = i;
|
|
|
} else {
|
|
|
padeFreqConstant[i] = -N + i;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
fftw_free(fftOut);
|
|
|
}
|
|
|
|
|
|
void FemGrp::getSourceTimeDomain(int timeStep, fp_t* Einc, int ExcitFlag){
|
|
|
fp_t dt = LocTimeSteps[N_class - 1];
|
|
|
fp_t omega = 2.0 * Pi * freq * MEGA;
|
|
|
fp_t to = To;
|
|
|
fp_t tau = Tau;
|
|
|
fp_t Exponent, SinModul;
|
|
|
|
|
|
switch (ExcitFlag){
|
|
|
case 0:
|
|
|
*Einc = static_cast<fp_t>(cos(omega * (timeStep + 1.0) * dt));
|
|
|
break;
|
|
|
|
|
|
case 1:
|
|
|
Exponent = (timeStep + 1.0) * dt - to;
|
|
|
SinModul = ModuleFlag ? cos(omega * Exponent) : 1.0;
|
|
|
*Einc = static_cast<fp_t>(SinModul * exp(- (Exponent * Exponent) / (tau * tau)));
|
|
|
break;
|
|
|
|
|
|
case 2:
|
|
|
Exponent = (timeStep + 1.0) * dt - to;
|
|
|
SinModul = ModuleFlag ? cos(omega * Exponent) : 1.0;
|
|
|
*Einc = static_cast<fp_t>(SinModul * exp(- (Exponent * Exponent) / (tau * tau)));
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
fp_t FemGrp::getFreqDomainPade(fp_t* a_k, fp_t* b_k, int M_global, int N, Complex* H_f, int component, int probe, bool firstValue){
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
char csvFileName[StrOutput];
|
|
|
sprintf(csvFileName, "Pade_Freq_1_%d_%d_%d.csv", N, probe, component);
|
|
|
std::ofstream csvFile(csvFileName);
|
|
|
if(firstValue){
|
|
|
Complex sumA_k = 0;
|
|
|
Complex sumB_k = 0;
|
|
|
Complex j = Complex (0.0, 1.0);
|
|
|
for(int i = 0; i < M_global; i++){
|
|
|
sumA_k = 0;
|
|
|
sumB_k = 0;
|
|
|
for(int k = 0; k < N; k++){
|
|
|
sumA_k += a_k[k] * pow(padeFreqs[i], k);
|
|
|
sumB_k += b_k[k] * pow(padeFreqs[i], k);
|
|
|
}
|
|
|
Complex freqVal = sumA_k / sumB_k;
|
|
|
csvFile << std::setprecision(max_precision) << sqrt(pow(freqVal.real(),2)+pow(freqVal.imag(),2));
|
|
|
csvFile << "\n";
|
|
|
H_f[i * NumOfFieldComponents + component] = freqVal;
|
|
|
}
|
|
|
cout << "First/Final Pade Calculation" << endl;
|
|
|
csvFile.close();
|
|
|
return 0.0;
|
|
|
}else{
|
|
|
Complex sumA_k = 0;
|
|
|
Complex sumB_k = 0;
|
|
|
Complex j = Complex (0.0, 1.0);
|
|
|
fp_t lastYf_abs = 0.0;
|
|
|
fp_t currentYf_abs = 0.0;
|
|
|
fp_t freqNorm = 0.0;
|
|
|
fp_t errorNorm = 0.0;
|
|
|
|
|
|
fp_t sum_X = 0.0, sum_Y = 0.0, sum_XY = 0.0, sum_XX = 0.0, sum_YY = 0.0;
|
|
|
for(int i = 0; i < M_global; i++){
|
|
|
sumA_k = 0;
|
|
|
sumB_k = 0;
|
|
|
for(int k = 0; k < N; k++){
|
|
|
sumA_k += a_k[k] * pow(padeFreqs[i], k);
|
|
|
sumB_k += b_k[k] * pow(padeFreqs[i], k);
|
|
|
}
|
|
|
Complex freqVal = sumA_k / sumB_k;
|
|
|
// csvFile << std::setprecision(max_precision) << sqrt(pow(freqVal.real(),2)+pow(freqVal.imag(),2));
|
|
|
// csvFile << "\n";
|
|
|
lastYf_abs = sqrt(H_f[i * NumOfFieldComponents + component].real() * H_f[i * NumOfFieldComponents + component].real() + H_f[i * NumOfFieldComponents + component].imag() * H_f[i * NumOfFieldComponents + component].imag());
|
|
|
H_f[i * NumOfFieldComponents + component] = freqVal;
|
|
|
currentYf_abs = sqrt(pow(freqVal.real(),2)+pow(freqVal.imag(),2));
|
|
|
|
|
|
sum_X = sum_X + currentYf_abs;
|
|
|
sum_Y = sum_Y + lastYf_abs;
|
|
|
sum_XY = sum_XY + currentYf_abs * lastYf_abs;
|
|
|
sum_XX = sum_XX + currentYf_abs * currentYf_abs;
|
|
|
sum_YY = sum_YY + lastYf_abs * lastYf_abs;
|
|
|
}
|
|
|
|
|
|
fp_t corr = (M_global * sum_XY - sum_X * sum_Y) / sqrt((M_global * sum_XX - sum_X * sum_X) * (M_global * sum_YY - sum_Y * sum_Y));
|
|
|
// cout << "Current Error In Pade (Probe = " << probe << ", Component = " << component <<") = " << corr << endl;
|
|
|
return corr;
|
|
|
}
|
|
|
return 0.0;
|
|
|
}
|
|
|
|
|
|
void FemGrp::getPadeIFFTEnd(int probe, Complex* fDomainField){
|
|
|
int M_global = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
Complex j = Complex (0.0, 1.0);
|
|
|
|
|
|
fp_t* tDomainField = new fp_t[M_global * NumOfFieldComponents];
|
|
|
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
Complex* tDomainTransferFunction = new Complex[M_global];
|
|
|
#pragma omp parallel for
|
|
|
for(int n = 0; n < M_global; n++){
|
|
|
tDomainTransferFunction[n] = 0.0;
|
|
|
for(int k = 0; k < M_global; k++){
|
|
|
tDomainTransferFunction[n] += abs(sourceFreqDomain[k]) < SourceTolerancePade ? 0.0 : fDomainField[k * NumOfFieldComponents + component] / sourceFreqDomain[k] * exp(j * 2 * Pi * n * k / M_global);
|
|
|
}
|
|
|
tDomainTransferFunction[n] /= M_global;
|
|
|
}
|
|
|
#pragma omp parallel for
|
|
|
for(int n = 0; n < M_global; n++){
|
|
|
tDomainField[n * NumOfFieldComponents + component] = 0.0;
|
|
|
for(int k = 0; k <= n; k++){
|
|
|
tDomainField[n * NumOfFieldComponents + component] += tDomainTransferFunction[n-k].real() * sourceTimeDomain[k];
|
|
|
}
|
|
|
}
|
|
|
delete [] tDomainTransferFunction;
|
|
|
}
|
|
|
|
|
|
char csvFileName[StrOutput];
|
|
|
sprintf(csvFileName, "Pade_%s_Probe_%d.csv", fname, probe);
|
|
|
std::ofstream csvFile(csvFileName);
|
|
|
csvFile << "Ex" << "," << "Ey" << "," << "Ez" << "," << "Hx" << "," << "Hy" << "," << "Hz" << "\n";
|
|
|
|
|
|
for(int n = 0; n < M_global; n++){
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
if (component > 0){
|
|
|
csvFile << ",";
|
|
|
}
|
|
|
csvFile << std::setprecision(max_precision) << tDomainField[n * NumOfFieldComponents + component];
|
|
|
}
|
|
|
csvFile << "\n";
|
|
|
}
|
|
|
usleep(100);
|
|
|
csvFile.close();
|
|
|
delete [] tDomainField;
|
|
|
}
|
|
|
|
|
|
void FemGrp::getPadeIFFT(int probe, Complex* fDomainField){
|
|
|
cout << "hello" << endl;
|
|
|
int M_global = (int)ceil((1.0 * NtimeSteps) / tsPerSampling);
|
|
|
double* tDomainField = new double[M_global * NumOfFieldComponents];
|
|
|
double* tDomainFieldOutput = new double[M_global * NumOfFieldComponents];
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
fftw_complex* fft;
|
|
|
fftw_plan ifft;
|
|
|
fft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * M_global);
|
|
|
#pragma omp parallel for
|
|
|
for(int k = 0; k < M_global; k++){
|
|
|
// Complex aux = (fDomainField[k * NumOfFieldComponents + component] / sourceFreqDomain[k]) / M_global;
|
|
|
Complex aux = (fDomainField[probe * M_global * NumOfFieldComponents + component * M_global + k] / sourceFreqDomain[k]) / M_global;
|
|
|
fft[k][0] = (abs(sourceFreqDomain[k]) < SourceTolerancePade) ? 0.0 : aux.real();
|
|
|
fft[k][1] = (abs(sourceFreqDomain[k]) < SourceTolerancePade) ? 0.0 : aux.imag();
|
|
|
// fft[k] = (abs(sourceFreqDomain[k]) < SourceTolerancePade) ? (fftw_complex)0.0 : (fftw_complex)fDomainField[k * NumOfFieldComponents + component];
|
|
|
}
|
|
|
|
|
|
double* tDomainFieldVec = &tDomainField[M_global * component];
|
|
|
|
|
|
ifft = fftw_plan_dft_c2r_1d(M_global, fft, tDomainFieldVec, FFTW_ESTIMATE);
|
|
|
fftw_execute(ifft);
|
|
|
fftw_destroy_plan(ifft);
|
|
|
fftw_free(fft);
|
|
|
#pragma omp parallel for
|
|
|
for (int i = 0; i < M_global; i++) {
|
|
|
tDomainFieldOutput[component * M_global + i] = 0.0;
|
|
|
for (int j = 0; j <= min(i, tsSource); j++) {
|
|
|
tDomainFieldOutput[component * M_global + i] += tDomainField[component * M_global + i - j] * sourceTimeDomain[j]; // Main convolution operation
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
char csvFileName[StrOutput];
|
|
|
sprintf(csvFileName, "Pade_%s_Probe_%d.csv", fname, probe);
|
|
|
std::ofstream csvFile(csvFileName);
|
|
|
csvFile << "Ex" << "," << "Ey" << "," << "Ez" << "," << "Hx" << "," << "Hy" << "," << "Hz" << "\n";
|
|
|
|
|
|
for(int n = 0; n < M_global; n++){
|
|
|
for(int component = 0; component < NumOfFieldComponents; component++){
|
|
|
if (component > 0){
|
|
|
csvFile << ",";
|
|
|
}
|
|
|
csvFile << std::setprecision(max_precision) << tDomainFieldOutput[component * M_global + n];
|
|
|
}
|
|
|
csvFile << "\n";
|
|
|
}
|
|
|
usleep(100);
|
|
|
csvFile.close();
|
|
|
delete [] tDomainField;
|
|
|
delete [] tDomainFieldOutput;
|
|
|
}
|
|
|
|
|
|
void FemGrp::GetTetQuadRule(int PolyOrder, int& points, fp_t** zeta, fp_t* weights){
|
|
|
if(PolyOrder == 1){
|
|
|
zeta[0][0] = 0.25;
|
|
|
zeta[0][0] = 0.25;
|
|
|
zeta[0][0] = 0.25;
|
|
|
weights[0] = 1.0;
|
|
|
}else if(PolyOrder == 2){
|
|
|
zeta[0][0] = 0.585410196624969;
|
|
|
zeta[0][1] = 0.138196601125011;
|
|
|
zeta[0][2] = 0.138196601125011;
|
|
|
zeta[0][3] = 0.138196601125011;
|
|
|
//
|
|
|
zeta[1][0] = 0.138196601125011;
|
|
|
zeta[1][1] = 0.585410196624969;
|
|
|
zeta[1][2] = 0.138196601125011;
|
|
|
zeta[1][3] = 0.138196601125011;
|
|
|
//
|
|
|
zeta[2][0] = 0.138196601125011;
|
|
|
zeta[2][1] = 0.138196601125011;
|
|
|
zeta[2][2] = 0.585410196624969;
|
|
|
zeta[2][3] = 0.138196601125011;
|
|
|
//
|
|
|
zeta[3][0] = 0.138196601125011;
|
|
|
zeta[3][1] = 0.138196601125011;
|
|
|
zeta[3][2] = 0.138196601125011;
|
|
|
zeta[3][3] = 0.585410196624969;
|
|
|
//
|
|
|
weights[0] = 0.250000000000000;
|
|
|
weights[1] = 0.250000000000000;
|
|
|
weights[2] = 0.250000000000000;
|
|
|
weights[3] = 0.250000000000000;
|
|
|
}else if(PolyOrder == 3){
|
|
|
zeta[0][0] = 0.250000000000000;
|
|
|
zeta[0][1] = 0.250000000000000;
|
|
|
zeta[0][2] = 0.250000000000000;
|
|
|
zeta[0][3] = 0.250000000000000;
|
|
|
//
|
|
|
zeta[1][0] = 0.500000000000000;
|
|
|
zeta[1][1] = 0.166666666666667;
|
|
|
zeta[1][2] = 0.166666666666667;
|
|
|
zeta[1][3] = 0.166666666666667;
|
|
|
//
|
|
|
zeta[2][0] = 0.166666666666667;
|
|
|
zeta[2][1] = 0.500000000000000;
|
|
|
zeta[2][2] = 0.166666666666667;
|
|
|
zeta[2][3] = 0.166666666666667;
|
|
|
//
|
|
|
zeta[3][0] = 0.166666666666667;
|
|
|
zeta[3][1] = 0.166666666666667;
|
|
|
zeta[3][2] = 0.500000000000000;
|
|
|
zeta[3][3] = 0.166666666666667;
|
|
|
//
|
|
|
//
|
|
|
zeta[4][0] = 0.166666666666667;
|
|
|
zeta[4][1] = 0.166666666666667;
|
|
|
zeta[4][2] = 0.166666666666667;
|
|
|
zeta[4][3] = 0.500000000000000;
|
|
|
//
|
|
|
weights[0] = -0.800000000000000;
|
|
|
weights[1] = 0.450000000000000;
|
|
|
weights[2] = 0.450000000000000;
|
|
|
weights[3] = 0.450000000000000;
|
|
|
weights[4] = 0.450000000000000;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::Get_Coefficients_(tetra* tet, ArrayFP<fp_t>* origEn_1, ArrayFP<fp_t>* origHn_32){
|
|
|
int* tetraMAP_E = new int[TetPolyOrderDim[tet->PolyOrderFlag]];
|
|
|
int* tetraMAP_H = new int[TetPolyOrderDim[tet->PolyOrderFlag]];
|
|
|
|
|
|
tet->Local_DG_mapE(tetraMAP_E, tet->LocalOffsetE);
|
|
|
tet->Local_DG_mapH(tetraMAP_H, tet->LocalOffsetH);
|
|
|
|
|
|
origEn_1->reset();
|
|
|
origHn_32->reset();
|
|
|
|
|
|
for(int i = 0 ; i < TetPolyOrderDim[tet->PolyOrderFlag]; i++){
|
|
|
origEn_1->setentry(i, tetraMAP_E[i] < 0 ? 0.0 : en_1->getentry(tetraMAP_E[i]));
|
|
|
origHn_32->setentry(i, tetraMAP_H[i] < 0 ? 0.0 : hn_32->getentry(tetraMAP_H[i]));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::numberDofs(){
|
|
|
tetra* tet = 0;
|
|
|
int LocalDim = TetPolyOrderDim[PolyFlag];
|
|
|
int *tetraEMap = 0;
|
|
|
int *tetraHMap = 0;
|
|
|
|
|
|
int EdofOffset = 0;//[E H] offset
|
|
|
int HdofOffset = DimE;
|
|
|
|
|
|
for(int i = 0; i < tetraCNT; i++){
|
|
|
tet = &(tetARRAY[i]);
|
|
|
tet->allocDofMap();
|
|
|
|
|
|
tetraEMap = tet->get_LocalEMap(); // obtained from SetupMatrixFree
|
|
|
tetraHMap = tet->get_LocalHMap();
|
|
|
|
|
|
for(int j = 0; j < LocalDim; j++){
|
|
|
//in case there is -1
|
|
|
tet->setEHGlobalMap(j,
|
|
|
(tetraEMap[j] != NOT_NUMBERED) ? (tetraEMap[j] + EdofOffset) : (tetraEMap[j]),
|
|
|
(tetraHMap[j] != NOT_NUMBERED) ? (tetraHMap[j] + HdofOffset) : (tetraHMap[j]));
|
|
|
}
|
|
|
}
|
|
|
size_t matrixDIM_com = dimE + dimH;
|
|
|
cout << " " << endl;
|
|
|
cout << "==============================================" << endl;
|
|
|
cout << " NUMBER OF DEGREES OF FREEDOM " << endl;
|
|
|
cout << "==============================================" << endl;
|
|
|
cout << " Global Number of dof is " << matrixDIM_com << endl;
|
|
|
cout << " Global Matrix dim is (w/o compress) " << tetraCNT * LocalDim * 2 << endl;
|
|
|
cout << "==============================================" << endl;
|
|
|
cout << " " << endl;
|
|
|
}
|
|
|
|
|
|
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
|
|
|
// 0000000000000000000000000000000000000 Port Meshes 00000000000000000000000000000000000000000000 //
|
|
|
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
|
|
|
|
|
|
|
|
|
/*
|
|
|
void FemGrp::makePortMeshes()
|
|
|
{
|
|
|
int i, j;
|
|
|
if(portCNT == 0)
|
|
|
return;
|
|
|
map<int,int> PortMap, PortMapRes;
|
|
|
set<int> PortSet;
|
|
|
set<int>::iterator it;
|
|
|
int DGface_bc;
|
|
|
for(int idx = 0; idx < tetraCNT; idx++){
|
|
|
for(j = 0 ; j < NumOfFaces ; j++){
|
|
|
DGface_bc = tetARRAY[idx].fc[j]->getbType();
|
|
|
if(DGface_bc >= portType && DGface_bc < pecType)
|
|
|
PortSet.insert(DGface_bc);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
LocPortCnt = (int)PortSet.size();
|
|
|
|
|
|
cout << "PortSet.size = " << (int)PortSet.size() << endl;
|
|
|
cout << "portCNT = " << portCNT << endl;
|
|
|
cout << "LocPortCnt = " << LocPortCnt << endl;
|
|
|
|
|
|
portCNT = LocPortCnt;
|
|
|
|
|
|
cout << "portCNT = " << portCNT << endl;
|
|
|
cout << "LocPortCnt = " << LocPortCnt << endl;
|
|
|
|
|
|
if(LocPortCnt == 0)
|
|
|
return;
|
|
|
|
|
|
for(it = PortSet.begin(); it != PortSet.end(); it++)
|
|
|
cout << "Port_type:" << *it << endl;
|
|
|
|
|
|
int counter = 0;
|
|
|
for(it = PortSet.begin(); it != PortSet.end(); it++){
|
|
|
PortMap[*it] = counter;
|
|
|
PortMapRes[counter] = *it;
|
|
|
counter++;
|
|
|
}
|
|
|
|
|
|
pMeshARRAY = new portMesh[LocPortCnt];
|
|
|
|
|
|
// count the port faces (portFaceNums)
|
|
|
// get pointers to port faces (portFaceLists)
|
|
|
// keep set of unique global node ids for faces (portNodeIds)
|
|
|
int* portFaceNums = new int[LocPortCnt];
|
|
|
list<face*>* portFaceLists = new list<face*>[LocPortCnt];
|
|
|
set<int>* portNodeIds = new set<int>[LocPortCnt];
|
|
|
memset(portFaceNums, 0, portCNT * sizeof(int));
|
|
|
|
|
|
for(i = 0; i < faceCNT; i++){
|
|
|
|
|
|
int bType = faceARRAY[i]->getbType();
|
|
|
|
|
|
if((bType >= portType) && (bType != pecType)){
|
|
|
int portNum = PortMap.find(bType)->second;
|
|
|
(portFaceNums[portNum])++; // increment the face count
|
|
|
portFaceLists[portNum].push_back(faceARRAY[i]); // add face pointer
|
|
|
// add unique node ids
|
|
|
for(j = 0; j < NumOfNodesPerFace; j++)
|
|
|
portNodeIds[portNum].insert(faceARRAY[i]->getNode(j)->getid());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
for(i = 0; i < LocPortCnt; i++){
|
|
|
portMesh& portmesh = pMeshARRAY[i];
|
|
|
|
|
|
// set port name, magnitude and impedance
|
|
|
for(j = 0; j < bcCNT; j++){
|
|
|
if(bcARRAY[j].getbType() == PortMapRes[i]){
|
|
|
portmesh.setName(bcARRAY[j].getName());
|
|
|
cout<<"This is " << portmesh.getName() << endl;
|
|
|
portmesh.setMagE(bcARRAY[j].getMagE());
|
|
|
portmesh.setImpZ(bcARRAY[j].getCval());
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// allocate and add face pointers to array
|
|
|
int faceNum = portFaceNums[i];
|
|
|
portmesh.setFaceCnt(faceNum);
|
|
|
if(faceNum > 0){
|
|
|
face** portFaceArray = portmesh.getFaceArray();
|
|
|
list<face*>::iterator faceListIter = portFaceLists[i].begin();
|
|
|
for(j = 0; j < faceNum; j++){
|
|
|
portFaceArray[j] = *faceListIter;
|
|
|
faceListIter++;
|
|
|
}
|
|
|
|
|
|
// allocate and add node pointers to array
|
|
|
// keep local mapping
|
|
|
int nodeNum = portNodeIds[i].size();
|
|
|
portmesh.setNodeCnt(nodeNum);
|
|
|
portmesh.allocGlobToLocMap();
|
|
|
node** portNodeArray = portmesh.getNodeArray();
|
|
|
map<int, int>& globToLocMap = portmesh.getGlobToLocMap();
|
|
|
set<int>::iterator portNodeIdIter;
|
|
|
int nodeCount = 0;
|
|
|
for(portNodeIdIter = portNodeIds[i].begin(); portNodeIdIter != portNodeIds[i].end(); portNodeIdIter++){
|
|
|
portNodeArray[nodeCount] = &(ndARRAY[*portNodeIdIter]);
|
|
|
globToLocMap[ndARRAY[*portNodeIdIter].getid()] = nodeCount++;
|
|
|
}
|
|
|
|
|
|
// setup the remaining port mesh stuff
|
|
|
scalingLength = 1.0;
|
|
|
portmesh.makeCoordSystem();
|
|
|
portmesh.makeObjMap();
|
|
|
portmesh.readVline(unit);
|
|
|
portmesh.writeMesh(objProp);
|
|
|
|
|
|
cout.setf(ios::scientific);
|
|
|
cout.precision(15);
|
|
|
|
|
|
#if defined(DGTD_USE_CUDA) || defined(DGTD_USE_CUDA_OPENCL)
|
|
|
vtr PortDirection_vtr = portmesh.getPortDirection();
|
|
|
excitationProp.PortDirection[0] = PortDirection_vtr.getx();
|
|
|
excitationProp.PortDirection[1] = PortDirection_vtr.gety();
|
|
|
excitationProp.PortDirection[2] = PortDirection_vtr.getz();
|
|
|
#endif
|
|
|
}
|
|
|
}
|
|
|
|
|
|
delete [] portFaceNums;
|
|
|
delete [] portFaceLists;
|
|
|
delete [] portNodeIds;
|
|
|
}
|
|
|
*/
|
|
|
|
|
|
|
|
|
void FemGrp::makePortMeshes()
|
|
|
{
|
|
|
int i, j;
|
|
|
if (portCNT == 0) return;
|
|
|
|
|
|
LocPortCnt = portCNT;
|
|
|
pMeshARRAY = new portMesh[LocPortCnt];
|
|
|
|
|
|
// Collectors per port
|
|
|
int* portFaceNums = new int[LocPortCnt];
|
|
|
std::list<face*>* portFaceLists = new std::list<face*>[LocPortCnt];
|
|
|
std::set<int>* portNodeIds = new std::set<int>[LocPortCnt];
|
|
|
std::memset(portFaceNums, 0, LocPortCnt * sizeof(int));
|
|
|
|
|
|
// Pass 1: walk faces and collect them by portNum (via bcNumToPnum)
|
|
|
for (i = 0; i < faceCNT; ++i)
|
|
|
{
|
|
|
int bType = faceARRAY[i]->getbType();
|
|
|
if (bType != portType) continue; // only port faces
|
|
|
|
|
|
|
|
|
|
|
|
// pick the valid owning tetra (check hydra pointers BEFORE deref)
|
|
|
tetra* tet = nullptr;
|
|
|
if (faceARRAY[i]->hydra[0] != nullptr)
|
|
|
{
|
|
|
tet = faceARRAY[i]->hydra[0];
|
|
|
}
|
|
|
else if (faceARRAY[i]->hydra[1] != nullptr)
|
|
|
{
|
|
|
tet = faceARRAY[i]->hydra[1];
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
continue; // no owner; defensive
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Find bc_number for THIS face inside its tetra (match same face by pointer)
|
|
|
int bc_number = -1;
|
|
|
for (int k = 0; k < NumOfFaces; ++k)
|
|
|
{
|
|
|
if (tet->fc[k] == faceARRAY[i])
|
|
|
{
|
|
|
bc_number = tet->getbc(k);
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
if (bc_number < 0) continue;
|
|
|
|
|
|
int portNum = bcNumToPnum[bc_number]-1;
|
|
|
|
|
|
++portFaceNums[portNum];
|
|
|
portFaceLists[portNum].push_back(faceARRAY[i]);
|
|
|
|
|
|
for (j = 0; j < NumOfNodesPerFace; ++j)
|
|
|
{
|
|
|
portNodeIds[portNum].insert(faceARRAY[i]->getNode(j)->getid());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Optional: sanity check
|
|
|
for (int p = 1; p < LocPortCnt+1; ++p)
|
|
|
{
|
|
|
std::cout << "Port " << p
|
|
|
<< " (BCNum=" << pnumToBcNum[p] << ") has "
|
|
|
<< portNodeIds[p-1].size() << " unique nodes and "
|
|
|
<< portFaceNums[p-1] << " faces.\n";
|
|
|
}
|
|
|
|
|
|
// Pass 2: finalize each port mesh
|
|
|
for (int p = 0; p < LocPortCnt; ++p)
|
|
|
{
|
|
|
portMesh& portmesh = pMeshARRAY[p];
|
|
|
|
|
|
// Initialize from bcARRAY using BCNum directly
|
|
|
int bc_number = pnumToBcNum[p+1];
|
|
|
if (bc_number >= 0 && bc_number < bcCNT)
|
|
|
{
|
|
|
auto& rec = bcARRAY[bc_number]; // <-- no bcRec type name
|
|
|
string name = rec.getName();
|
|
|
fp_t magnitudeE = rec.getMagE();
|
|
|
cout << "bc_number = " << bc_number << " name = " << name << " | magE = " << magnitudeE << endl;
|
|
|
portmesh.setName(rec.getName());
|
|
|
portmesh.setMagE(magnitudeE);
|
|
|
portmesh.setImpZ(rec.getCval());
|
|
|
}
|
|
|
|
|
|
// Faces
|
|
|
int faceNum = portFaceNums[p];
|
|
|
portmesh.setFaceCnt(faceNum);
|
|
|
if (faceNum > 0) {
|
|
|
face** portFaceArray = portmesh.getFaceArray();
|
|
|
auto itF = portFaceLists[p].begin();
|
|
|
for (j = 0; j < faceNum; ++j, ++itF) {
|
|
|
portFaceArray[j] = *itF;
|
|
|
}
|
|
|
|
|
|
// Nodes + local map
|
|
|
int nodeNum = static_cast<int>(portNodeIds[p].size());
|
|
|
portmesh.setNodeCnt(nodeNum);
|
|
|
portmesh.allocGlobToLocMap();
|
|
|
|
|
|
node** portNodeArray = portmesh.getNodeArray();
|
|
|
std::map<int,int>& globToLocMap = portmesh.getGlobToLocMap();
|
|
|
|
|
|
int nodeCount = 0;
|
|
|
for (int gid : portNodeIds[p]) {
|
|
|
// If ids aren't dense indices into ndARRAY, replace with your id->index lookup.
|
|
|
portNodeArray[nodeCount] = &(ndARRAY[gid]);
|
|
|
globToLocMap[ ndARRAY[gid].getid() ] = nodeCount++;
|
|
|
}
|
|
|
|
|
|
// Remaining setup
|
|
|
scalingLength = 1.0;
|
|
|
portmesh.makeCoordSystem();
|
|
|
portmesh.makeObjMap();
|
|
|
portmesh.readVline(unit);
|
|
|
portmesh.writeMesh(objProp);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
delete [] portFaceNums;
|
|
|
delete [] portFaceLists;
|
|
|
delete [] portNodeIds;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
void FemGrp::solveWaveguidePorts()
|
|
|
{
|
|
|
char command[1000];
|
|
|
memset(command, 0, 1000 * sizeof(char));
|
|
|
sprintf(command, "anwg_h1 %s %e 1 \n",pMeshARRAY->portName, freq);
|
|
|
cout<<"=============Running Command:============"<<endl;
|
|
|
cout << command << endl;
|
|
|
system(command);
|
|
|
}
|
|
|
*/
|
|
|
/*
|
|
|
void FemGrp::WriteWaveguidePortFields()
|
|
|
{
|
|
|
// For each port
|
|
|
for(int i = 0; i < portCNT ; i++)
|
|
|
{
|
|
|
portMesh& portmesh = pMeshARRAY[i];
|
|
|
portmesh.writeVtk();
|
|
|
}
|
|
|
}
|
|
|
*/
|
|
|
|
|
|
|
|
|
// Using anwg to solve for the port excitation mode (1st mode)
|
|
|
void FemGrp::solveWaveguidePorts()
|
|
|
{
|
|
|
// run for each detected port
|
|
|
for (int i = 0; i < portCNT; ++i)
|
|
|
{
|
|
|
const std::string name = pMeshARRAY[i].getName(); // uses the name you set from bcARRAY
|
|
|
char command[1024];
|
|
|
// quote the name in case it has spaces; print freq with good precision
|
|
|
std::snprintf(command, sizeof(command), "anwg_h1 \"%s\" %.16e 1", name.c_str(), freq);
|
|
|
|
|
|
std::cout << "============= Running Command (port " << i << "): =============\n";
|
|
|
std::cout << command << std::endl;
|
|
|
|
|
|
int rc = std::system(command);
|
|
|
if (rc != 0)
|
|
|
{
|
|
|
std::cerr << "anwg_h1 failed for port " << i << " (rc = " << rc << ")\n";
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::WriteWaveguidePortFields()
|
|
|
{
|
|
|
// For each port
|
|
|
for (int i = 0; i < portCNT; ++i)
|
|
|
{
|
|
|
portMesh& portmesh = pMeshARRAY[i];
|
|
|
std::cout << "Writing VTK for port " << i << " (" << portmesh.getName() << ")\n";
|
|
|
portmesh.writeVtk();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::AssignPortFieldsInFaces()
|
|
|
{
|
|
|
for(int i = 0 ; i < portCNT ; i++)
|
|
|
{
|
|
|
pMeshARRAY[i].makeRHS_E();
|
|
|
pMeshARRAY[i].makeRHS_H();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
void FemGrp::AssignPortFieldsInFaces_TEM()
|
|
|
{
|
|
|
for (int i = 0; i < portCNT; ++i)
|
|
|
{
|
|
|
const auto& ex = portExcitations[i];
|
|
|
portMesh& pm = pMeshARRAY[i];
|
|
|
|
|
|
pm.makeRHS_TEM(ex.freq_m * 1e6, ex.epr,
|
|
|
ex.vpath[0], ex.vpath[1], ex.vpath[2],
|
|
|
ex.PortDirection[0], ex.PortDirection[1], ex.PortDirection[2]);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
//TODO: make dynamic
|
|
|
void FemGrp::EvaluateSparametersGlobal(int timeStep, fp_t dt, bool isCompact)
|
|
|
{
|
|
|
int i, j, k, m;
|
|
|
int FaceNum;
|
|
|
int Nsample = 102;
|
|
|
int GaussPnt = Nsample - 1;
|
|
|
int IsOnFace;
|
|
|
int tetraMAP_P2[30];
|
|
|
int tetraMAP_P1[12];
|
|
|
int tetraMAP_P0[6];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
fp_t vol;
|
|
|
fp_t zeta0, zeta1, zeta2;
|
|
|
fp_t zetaFace[3];
|
|
|
fp_t zeta[4];
|
|
|
fp_t wgt = 1.0;
|
|
|
fp_t EvalueTotal;
|
|
|
fp_t EvalueInc;
|
|
|
fp_t h;
|
|
|
fp_t* VoltEntryInc = new fp_t[portCNT];
|
|
|
fp_t* VoltEntryTotal = new fp_t[portCNT];
|
|
|
vtr Total_E_Local;
|
|
|
vtr Inc_E_Local;
|
|
|
vtr Point;
|
|
|
vtr PortDirection;
|
|
|
vtr Normal;
|
|
|
fp_t area = 0.0;
|
|
|
tetra* tet;
|
|
|
|
|
|
ArrayFP<fp_t>* origEn_1_P2 = new ArrayFP<fp_t>(30);
|
|
|
ArrayFP<fp_t>* origEn_1_P1 = new ArrayFP<fp_t>(12);
|
|
|
ArrayFP<fp_t>* origEn_1_P0 = new ArrayFP<fp_t>(6);
|
|
|
|
|
|
for(i = 0; i < portCNT; i++){
|
|
|
VoltEntryInc[i] = 0.0;
|
|
|
VoltEntryTotal[i] = 0.0;
|
|
|
}
|
|
|
|
|
|
for(i = 0; i < portCNT; i++)
|
|
|
{
|
|
|
vtr VoltLine = pMeshARRAY[i].vline.coord[1] - pMeshARRAY[i].vline.coord[0];
|
|
|
vtr VoltLineUnit = pMeshARRAY[i].vline.coord[1] - pMeshARRAY[i].vline.coord[0];
|
|
|
VoltLineUnit.unitvtr();
|
|
|
|
|
|
h = VoltLine.magnitude() / GaussPnt;
|
|
|
for(k = 0; k < GaussPnt; k++){
|
|
|
Point = pMeshARRAY[i].vline.coord[0] + VoltLineUnit * (k + 0.5) * h;
|
|
|
//cout << "k = " << k << " FCCNT = " << pMeshARRAY[i].faceCNT << endl;
|
|
|
|
|
|
for(j = 0; j < pMeshARRAY[i].faceCNT; j++){
|
|
|
IsOnFace = pMeshARRAY[i].fcArray[j]->PointInFace(Point, zeta0, zeta1, zeta2);
|
|
|
zetaFace[0] = zeta0;
|
|
|
zetaFace[1] = zeta1;
|
|
|
zetaFace[2] = zeta2;
|
|
|
|
|
|
if(IsOnFace == 1)
|
|
|
{
|
|
|
pMeshARRAY[i].fcArray[j]->getAreaNormal(&area, &Normal);
|
|
|
PortDirection = pMeshARRAY[i].fcArray[j]->bcPtr->get_PortDirection();
|
|
|
|
|
|
if(dotP(Normal, PortDirection) < 0.0)
|
|
|
tet = pMeshARRAY[i].fcArray[j]->hydra[0];
|
|
|
else
|
|
|
tet = pMeshARRAY[i].fcArray[j]->hydra[1];
|
|
|
|
|
|
tet->geometry(lvtr, avtr, &vol);
|
|
|
for(m = 0 ; m < 4; m++){
|
|
|
if(pMeshARRAY[i].fcArray[j] == tet->getFacePtr(m))
|
|
|
FaceNum = m;
|
|
|
}
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
// 0th order polynomial
|
|
|
if(tet->PolyOrderFlag == 0){
|
|
|
tet->Local_DG_mapE(tetraMAP_P0, tet->LocalOffsetE);
|
|
|
origEn_1_P0->reset();
|
|
|
for(int Cnt1 = 0 ; Cnt1 < 6 ; Cnt1++){
|
|
|
if(tetraMAP_P0[Cnt1] < 0)
|
|
|
origEn_1_P0->setentry(Cnt1, 0.0);
|
|
|
else
|
|
|
origEn_1_P0->setentry(Cnt1, en_1->getentry(tetraMAP_P0[Cnt1]));
|
|
|
}
|
|
|
}else if(tet->PolyOrderFlag == 1){ // 1st order polynomial
|
|
|
tet->Local_DG_mapE(tetraMAP_P1, tet->LocalOffsetE);
|
|
|
origEn_1_P1->reset();
|
|
|
for(int Cnt2 = 0 ; Cnt2 < 12 ; Cnt2++){
|
|
|
if(tetraMAP_P1[Cnt2] < 0)
|
|
|
origEn_1_P1->setentry(Cnt2, 0.0);
|
|
|
else
|
|
|
origEn_1_P1->setentry(Cnt2, en_1->getentry(tetraMAP_P1[Cnt2]));
|
|
|
}
|
|
|
}else if(tet->PolyOrderFlag == 2){ // 2nd order polynomial
|
|
|
tet->Local_DG_mapE(tetraMAP_P2, tet->LocalOffsetE);
|
|
|
origEn_1_P2->reset();
|
|
|
for(int Cnt2 = 0 ; Cnt2 < 30 ; Cnt2++){
|
|
|
if(tetraMAP_P2[Cnt2] < 0)
|
|
|
origEn_1_P2->setentry(Cnt2, 0.0);
|
|
|
else
|
|
|
origEn_1_P2->setentry(Cnt2, en_1->getentry(tetraMAP_P2[Cnt2]));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
for(m = 0 ; m < 4 ; m++){
|
|
|
zeta[m] = 0.0;
|
|
|
}
|
|
|
|
|
|
zeta[faceMAP[FaceNum][0]] = zetaFace[0];
|
|
|
zeta[faceMAP[FaceNum][1]] = zetaFace[1];
|
|
|
zeta[faceMAP[FaceNum][2]] = zetaFace[2];
|
|
|
|
|
|
|
|
|
// 0th order polynomial
|
|
|
if(tet->PolyOrderFlag == 0){
|
|
|
|
|
|
Total_E_Local = CalcEfield(origEn_1_P0->getEntryPtr(), avtr, vol, zeta, tet->PolyOrderFlag);
|
|
|
pMeshARRAY[i].fcArray[j]->SparEinc((timeStep + 1.0) * dt, Point, Inc_E_Local, zetaFace);
|
|
|
|
|
|
}else if(tet->PolyOrderFlag == 1){// 1st order polynomial
|
|
|
|
|
|
Total_E_Local = CalcEfield(origEn_1_P1->getEntryPtr(), avtr, vol, zeta, tet->PolyOrderFlag);
|
|
|
pMeshARRAY[i].fcArray[j]->SparEinc((timeStep + 1.0) * dt, Point, Inc_E_Local, zetaFace);
|
|
|
|
|
|
}else if(tet->PolyOrderFlag == 2){// 2nd order polynomial
|
|
|
|
|
|
Total_E_Local = CalcEfield(origEn_1_P2->getEntryPtr(), avtr, vol, zeta, tet->PolyOrderFlag);
|
|
|
pMeshARRAY[i].fcArray[j]->SparEinc((timeStep + 1.0) * dt, Point, Inc_E_Local, zetaFace);
|
|
|
|
|
|
}
|
|
|
|
|
|
EvalueTotal = dotP(Total_E_Local, VoltLineUnit);
|
|
|
EvalueInc = dotP(Inc_E_Local, VoltLineUnit);
|
|
|
VoltEntryInc[i] += - 1.0 * h * wgt * EvalueInc;
|
|
|
VoltEntryTotal[i] += - 1.0 * h * wgt * EvalueTotal;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Write a file with all the impendances of the ports
|
|
|
if(timeStep == 0){
|
|
|
char Impedance_Log[180];
|
|
|
sprintf(Impedance_Log, "%s.ImpZ", fname);
|
|
|
ofstream ImpedanceOutfile(Impedance_Log, ios_base::out);
|
|
|
if(!ImpedanceOutfile)
|
|
|
cout << "Error in opening file: " << Impedance_Log << " for write " << endl;
|
|
|
|
|
|
for(i = 0 ; i < portCNT ; i++)
|
|
|
ImpedanceOutfile << pMeshARRAY[i].impZ << " ";
|
|
|
|
|
|
ImpedanceOutfile.close();
|
|
|
}
|
|
|
|
|
|
// Write to file Vinc
|
|
|
if(timeStep == 0)
|
|
|
system("mkdir TimeDomainVoltages");
|
|
|
char IncVoltage_TimeLog[180];
|
|
|
ofstream IncVoltageOutfile;
|
|
|
if(isCompact){
|
|
|
sprintf(IncVoltage_TimeLog, "./TimeDomainVoltages/%s.TD_Vinc", fname);
|
|
|
IncVoltageOutfile.open(IncVoltage_TimeLog, ios_base::out | ios::app);
|
|
|
}else{
|
|
|
sprintf(IncVoltage_TimeLog, "./TimeDomainVoltages/%s_%05d.TD_Vinc", fname, timeStep);
|
|
|
IncVoltageOutfile.open(IncVoltage_TimeLog, ios_base::out);
|
|
|
}
|
|
|
|
|
|
IncVoltageOutfile.setf(ios::scientific, ios::floatfield);
|
|
|
IncVoltageOutfile.precision(15);
|
|
|
if(!IncVoltageOutfile)
|
|
|
cout << "Error in opening file: " << IncVoltage_TimeLog << " for write " << endl;
|
|
|
|
|
|
IncVoltageOutfile << (timeStep + 1.0) * dt << " ";
|
|
|
for(i = 0 ; i < portCNT ; i++)
|
|
|
IncVoltageOutfile << VoltEntryInc[i]<< " ";
|
|
|
|
|
|
IncVoltageOutfile<<endl;
|
|
|
IncVoltageOutfile.close();
|
|
|
|
|
|
// Write to file Vtotal
|
|
|
char TotVoltage_TimeLog[180];
|
|
|
ofstream TotVoltageOutfile;
|
|
|
if(isCompact){
|
|
|
sprintf(TotVoltage_TimeLog, "./TimeDomainVoltages/%s.TD_Vtot", fname);
|
|
|
TotVoltageOutfile.open(TotVoltage_TimeLog, ios_base::out | ios::app);
|
|
|
}else{
|
|
|
sprintf(TotVoltage_TimeLog, "./TimeDomainVoltages/%s_%05d.TD_Vtot", fname, timeStep);
|
|
|
TotVoltageOutfile.open(TotVoltage_TimeLog, ios_base::out);
|
|
|
}
|
|
|
TotVoltageOutfile.setf(ios::scientific, ios::floatfield);
|
|
|
TotVoltageOutfile.precision(15);
|
|
|
if(!TotVoltageOutfile)
|
|
|
cout << "Error in opening file: " << TotVoltage_TimeLog << "for write"<< endl;
|
|
|
|
|
|
TotVoltageOutfile << (timeStep + 1.0) * dt << " ";
|
|
|
for(i = 0 ; i < portCNT ; i++)
|
|
|
TotVoltageOutfile << VoltEntryTotal[i] << " ";
|
|
|
|
|
|
TotVoltageOutfile << endl;
|
|
|
TotVoltageOutfile.close();
|
|
|
|
|
|
// Write to file Vref
|
|
|
char ReflVoltage_TimeLog [180];
|
|
|
ofstream ReflVoltageOutfile;
|
|
|
if(isCompact){
|
|
|
sprintf(ReflVoltage_TimeLog, "./TimeDomainVoltages/%s.TD_Vref", fname);
|
|
|
ReflVoltageOutfile.open(ReflVoltage_TimeLog, ios_base::out | ios::app);
|
|
|
}else{
|
|
|
sprintf(ReflVoltage_TimeLog, "./TimeDomainVoltages/%s_%05d.TD_Vref", fname, timeStep);
|
|
|
ReflVoltageOutfile.open(ReflVoltage_TimeLog, ios_base::out);
|
|
|
}
|
|
|
|
|
|
ReflVoltageOutfile.setf(ios::scientific, ios::floatfield);
|
|
|
ReflVoltageOutfile.precision(15);
|
|
|
if(!ReflVoltageOutfile)
|
|
|
cout << "Error in opening file: " << ReflVoltage_TimeLog << "for write"<< endl;
|
|
|
ReflVoltageOutfile.close();
|
|
|
|
|
|
delete origEn_1_P2;
|
|
|
delete origEn_1_P1;
|
|
|
delete origEn_1_P0;
|
|
|
|
|
|
delete [] VoltEntryInc;
|
|
|
delete [] VoltEntryTotal;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
|
|
|
// 000000000000000000000000000000000000 GPU ROUTINES 00000000000000000000000000000000000000000000 //
|
|
|
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
|
|
|
|
|
|
// DEVICE implementations
|
|
|
#if defined (DGTD_USE_CUDA)
|
|
|
#if defined (CUDA_NON_HEAVY)
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// OUTPUT Functions
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
// Modified by Qi Jian to write field from the PROBES
|
|
|
void FemGrp::writeFieldProbeCuBLAS(int timeStep)
|
|
|
{
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
vtr eField;
|
|
|
vtr hField;
|
|
|
|
|
|
vtr eField_all;
|
|
|
vtr hField_all;
|
|
|
|
|
|
char csvFileName[StrOutput];
|
|
|
std::ofstream csvFile;
|
|
|
|
|
|
if(padeCNT == 0 || writeWhilePade)
|
|
|
{
|
|
|
sprintf(csvFileName, "./PROBES/Probes_%s_%04d.csv", fname, timeStep);
|
|
|
csvFile.open(csvFileName);
|
|
|
csvFile << "Ex" << "," << "Ey" << "," << "Ez" << "," << "Hx" << "," << "Hy" << "," << "Hz" << "\n";
|
|
|
}
|
|
|
|
|
|
const int num_nodes = probeCNT;
|
|
|
|
|
|
// Calculate Total Fields at the points
|
|
|
for(int i = 0; i < num_nodes; i++)
|
|
|
{
|
|
|
int number_of_associated_tets = probes_bary.at(i).first;
|
|
|
|
|
|
eField.reset();
|
|
|
hField.reset();
|
|
|
|
|
|
std::vector<std::pair<int, std::array<double, 4>>> found_tets = probes_bary.at(i).second;
|
|
|
eField_all.reset();
|
|
|
hField_all.reset();
|
|
|
|
|
|
for (int t = 0; t < number_of_associated_tets; t++)
|
|
|
{
|
|
|
|
|
|
int tet_id = found_tets.at(t).first;
|
|
|
array<double,4> tri_bary_coord = found_tets.at(t).second;
|
|
|
tetra& tet = tetARRAY[tet_id];
|
|
|
|
|
|
tet.geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
eField.reset();
|
|
|
hField.reset();
|
|
|
zeta[0] = static_cast<fp_t>(tri_bary_coord[0]);
|
|
|
zeta[1] = static_cast<fp_t>(tri_bary_coord[1]);
|
|
|
zeta[2] = static_cast<fp_t>(tri_bary_coord[2]);
|
|
|
zeta[3] = static_cast<fp_t>(tri_bary_coord[3]);
|
|
|
|
|
|
eField = CalcEfield(&En1_h[mapIdLoc[tet.getcnt()] * TetPolyOrderDim[PolyFlag]], avtr, vol, zeta, PolyFlag);
|
|
|
hField = CalcEfield(&Hn32_h[mapIdLoc[tet.getcnt()] * TetPolyOrderDim[PolyFlag]], avtr, vol, zeta, PolyFlag);
|
|
|
|
|
|
eField_all = eField_all + eField;
|
|
|
hField_all = hField_all + hField;
|
|
|
|
|
|
}
|
|
|
|
|
|
eField_all = eField_all / ((fp_t) number_of_associated_tets);
|
|
|
hField_all = hField_all / ((fp_t) number_of_associated_tets);
|
|
|
|
|
|
if(usePade){ // && i < padeCNT
|
|
|
int row = (int)(timeStep / tsPerSampling)* NumOfFieldComponents * probeCNT ;
|
|
|
int column = i * NumOfFieldComponents;
|
|
|
fieldProbes[row + column + 0] = eField_all.getx();
|
|
|
fieldProbes[row + column + 1] = eField_all.gety();
|
|
|
fieldProbes[row + column + 2] = eField_all.getz();
|
|
|
fieldProbes[row + column + 3] = hField_all.getx();
|
|
|
fieldProbes[row + column + 4] = hField_all.gety();
|
|
|
fieldProbes[row + column + 5] = hField_all.getz();
|
|
|
}
|
|
|
|
|
|
|
|
|
if(padeCNT == 0 || writeWhilePade){
|
|
|
const auto max_precision {std::numeric_limits<fp_t>::digits10 + 1};
|
|
|
csvFile << std::setprecision(max_precision) << eField_all.getx() << "," << eField_all.gety() << "," << eField_all.getz() << "," << hField_all.getx() << "," << hField_all.gety() << "," << hField_all.getz() << "\n";
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if(padeCNT == 0 || writeWhilePade)
|
|
|
{
|
|
|
usleep(100);
|
|
|
csvFile.close();
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------
|
|
|
// Port-face centroid probes: one CSV per port, per timestep, folders
|
|
|
// ----------------------------------------------------------------------
|
|
|
void FemGrp::writePortFieldProbeCuBLAS(int timeStep)
|
|
|
{
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
vtr eField, hField;
|
|
|
vtr eField_all, hField_all;
|
|
|
|
|
|
char csvFileName[StrOutput];
|
|
|
std::ofstream csvFile;
|
|
|
|
|
|
if (portCNT > 0 && PortFacePidx_h && PortFaceCentroid_h && !portFaceCentroid_bary.empty())
|
|
|
{
|
|
|
// Base output directory and per-port subdirs
|
|
|
mkdir("./PortProbes", 0755);
|
|
|
for (int pnum = 0; pnum < portCNT; ++pnum)
|
|
|
{
|
|
|
std::string portDir = "./PortProbes/Port" + std::to_string(pnum);
|
|
|
mkdir(portDir.c_str(), 0755);
|
|
|
|
|
|
// Open CSV for this port + timestep
|
|
|
char pCsv[512];
|
|
|
std::snprintf(pCsv, sizeof(pCsv), "%s/Port%d_%04d.csv", portDir.c_str(), pnum, timeStep);
|
|
|
std::ofstream pcsv(pCsv);
|
|
|
if (!pcsv.is_open())
|
|
|
{
|
|
|
std::cerr << "Error opening file: " << pCsv << "\n";
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
// Header: centroid only
|
|
|
pcsv << "x1,y1,z1,Ex,Ey,Ez,Hx,Hy,Hz\n";
|
|
|
const auto max_precision = std::numeric_limits<fp_t>::digits10 + 1;
|
|
|
pcsv << std::fixed << std::setprecision(max_precision);
|
|
|
|
|
|
// Iterate all flattened excitation faces, pick those of this port
|
|
|
for (int f = 0; f < excitationFaces; ++f)
|
|
|
{
|
|
|
if (PortFacePidx_h[f] != pnum) continue;
|
|
|
|
|
|
// Centroid position from buffer
|
|
|
const fp_t_ts* C = &PortFaceCentroid_h[3*f];
|
|
|
const double cx = static_cast<double>(C[0]);
|
|
|
const double cy = static_cast<double>(C[1]);
|
|
|
const double cz = static_cast<double>(C[2]);
|
|
|
|
|
|
// Bary search results for this centroid (should be present)
|
|
|
int nAssoc = (int)portFaceCentroid_bary[f].first;
|
|
|
if (nAssoc <= 0)
|
|
|
{
|
|
|
// If you prefer hard-fail, you can exit as in readPROBE()
|
|
|
// Here we just skip gracefully.
|
|
|
continue;
|
|
|
}
|
|
|
const auto& found_tets = portFaceCentroid_bary[f].second;
|
|
|
|
|
|
// Average E/H over owning tets (same pattern as node probes)
|
|
|
eField_all.reset();
|
|
|
hField_all.reset();
|
|
|
|
|
|
for (int t = 0; t < nAssoc; ++t)
|
|
|
{
|
|
|
int tet_id = found_tets[t].first;
|
|
|
const std::array<double,4>& b = found_tets[t].second;
|
|
|
|
|
|
tetra& tet = tetARRAY[tet_id];
|
|
|
tet.geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
zeta[0] = (fp_t)b[0];
|
|
|
zeta[1] = (fp_t)b[1];
|
|
|
zeta[2] = (fp_t)b[2];
|
|
|
zeta[3] = (fp_t)b[3];
|
|
|
|
|
|
eField = CalcEfield(&En1_h[mapIdLoc[tet.getcnt()] * TetPolyOrderDim[PolyFlag]], avtr, vol, zeta, PolyFlag);
|
|
|
hField = CalcEfield(&Hn32_h[mapIdLoc[tet.getcnt()] * TetPolyOrderDim[PolyFlag]], avtr, vol, zeta, PolyFlag);
|
|
|
|
|
|
eField_all = eField_all + eField;
|
|
|
hField_all = hField_all + hField;
|
|
|
}
|
|
|
|
|
|
eField_all = eField_all / ((fp_t)nAssoc);
|
|
|
hField_all = hField_all / ((fp_t)nAssoc);
|
|
|
|
|
|
// Write one row: centroid + averaged fields
|
|
|
pcsv << cx << "," << cy << "," << cz << ","
|
|
|
<< eField_all.getx() << "," << eField_all.gety() << "," << eField_all.getz() << ","
|
|
|
<< hField_all.getx() << "," << hField_all.gety() << "," << hField_all.getz() << "\n";
|
|
|
}
|
|
|
|
|
|
pcsv.close();
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::writeFieldGlobalCuBLAS(int timeStep){
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
vtr coord[4];
|
|
|
vtr eLocal[4];
|
|
|
vtr hLocal[4];
|
|
|
|
|
|
vtr* eField = new vtr[nodeCNT];
|
|
|
vtr* hField = new vtr[nodeCNT];
|
|
|
|
|
|
int* count = new int[nodeCNT];
|
|
|
memset(count, 0, nodeCNT * sizeof(int));
|
|
|
|
|
|
int* polyOrder = new int[tetraCNT];
|
|
|
|
|
|
for(int i = 0; i < tetraCNT; i++){
|
|
|
tetra& tet = tetARRAY[i];
|
|
|
polyOrder[i] = tet.PolyOrderFlag;
|
|
|
|
|
|
tet.geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
for(int j = 0; j < 4; j++){
|
|
|
zeta[0] = BaryCoord[j][0];
|
|
|
zeta[1] = BaryCoord[j][1];
|
|
|
zeta[2] = BaryCoord[j][2];
|
|
|
zeta[3] = BaryCoord[j][3];
|
|
|
|
|
|
eLocal[j] = CalcEfield(&En1_h[mapIdLoc[tet.getcnt()] * TetPolyOrderDim[PolyFlag]], avtr, vol, zeta, tet.PolyOrderFlag);
|
|
|
hLocal[j] = CalcEfield(&Hn32_h[mapIdLoc[tet.getcnt()] * TetPolyOrderDim[PolyFlag]], avtr, vol, zeta, tet.PolyOrderFlag);
|
|
|
|
|
|
int index = tet.nd[j]->getid();
|
|
|
eField[index] = eField[index] + eLocal[j] /*- Einc*/;
|
|
|
hField[index] = hField[index] + hLocal[j] /*- Hinc*/;
|
|
|
count[index] += 1;
|
|
|
}
|
|
|
}
|
|
|
for(int i = 0; i < nodeCNT; i++){
|
|
|
eField[i] = eField[i] / static_cast<fp_t>(count[i]);
|
|
|
hField[i] = hField[i] / static_cast<fp_t>(count[i]);
|
|
|
}
|
|
|
VtkWriter vtkWriter(1.0);
|
|
|
// VtkWriter vtkWriter(unit);
|
|
|
char vtkFilePrefix[128];
|
|
|
memset(vtkFilePrefix, 0, 128 * sizeof(char));
|
|
|
|
|
|
sprintf(vtkFilePrefix, "./VTU_LTS/%s_%04d", fname, timeStep);
|
|
|
|
|
|
vtkWriter.writeField(vtkFilePrefix, nodeCNT, ndARRAY, tetraCNT, tetARRAY, eField, hField, polyOrder, 0, 0); //TODO: why here polyorder is not 1
|
|
|
|
|
|
|
|
|
delete [] eField;
|
|
|
delete [] hField;
|
|
|
delete [] count;
|
|
|
delete [] polyOrder;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool FemGrp::checkEnergyDecay(){
|
|
|
fieldEnergy /= numberOfEnergyPoints * NumOfSampleEnergyCheck;
|
|
|
maxFieldEnergy = max(maxFieldEnergy, fieldEnergy);
|
|
|
|
|
|
return (fieldEnergy < energyDecayFactor * maxFieldEnergy);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Organize GPU Memory
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
void FemGrp::PrepareGPUcuBLAS()
|
|
|
{
|
|
|
tetra* tet;
|
|
|
int cntAux;
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Prepare Excitation Info
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
int exciCNT = 0;
|
|
|
for(int i = 0; i < N_class; i ++)
|
|
|
{
|
|
|
exciCNT += ClassExcitationCount[i];
|
|
|
}
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&ExcitationFacesCnt_h, exciCNT * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&ExcitationFacesOffset_h, exciCNT * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&ExcitationFacesNum_h, excitationFaces * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&nd_coords_tet_h, NumOfUnitaryVectors * NumOfNodes * exciCNT * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&nd_coords_face_h, NumOfUnitaryVectors * NumOfNodesPerFace * excitationFaces * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&mapE_h, exciCNT * TetPolyOrderDim[PolyFlag] * sizeof(int8_t), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&mapH_h, exciCNT * TetPolyOrderDim[PolyFlag] * sizeof(int8_t), cudaHostAllocMapped));
|
|
|
|
|
|
// for(int i = 0; i < exciCNT * TetPolyOrderDim[PolyFlag]; i++){
|
|
|
// mapE_h[i] = 1;
|
|
|
// mapH_h[i] = 1;
|
|
|
// }
|
|
|
|
|
|
// ===============================================
|
|
|
// Allocate storage for port fields
|
|
|
// ===============================================
|
|
|
const int Q = GAUSS_POINT_NUM_h[PolyFlag]; // same as GPU kernel uses
|
|
|
|
|
|
cout << "excitationFaces = " << excitationFaces << endl;
|
|
|
cout << "exciCNT = " << exciCNT << endl;
|
|
|
|
|
|
if (portCNT > 0)
|
|
|
{
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Etan_qp_h, excitationFaces * Q * 3 * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Htan_qp_h, excitationFaces * Q * 3 * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&PortFacePidx_h, excitationFaces * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&PortFaceCentroid_h, excitationFaces * 3 * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Etan_center_h, excitationFaces * 3 * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Htan_center_h, excitationFaces * 3 * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&FaceID_excitation_h, excitationFaces * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&TetID_excitation_h, excitationFaces * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
}
|
|
|
|
|
|
// ===============================================
|
|
|
// Allocated Impedance for Planewave
|
|
|
// ===============================================
|
|
|
cout << "PlaneWaveBCFlag = " << PlaneWaveBCFlag << endl;
|
|
|
cout << "Number of Ports = " << portCNT << endl;
|
|
|
if(PlaneWaveBCFlag)
|
|
|
{
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Z_face_pw_h, excitationFaces * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
}
|
|
|
|
|
|
|
|
|
excitationFaces = 0;
|
|
|
exciCNT = 0;
|
|
|
|
|
|
for (int i = 0; i < N_class; i ++)
|
|
|
{
|
|
|
|
|
|
cout << "\nN CLASS = " << i << endl;
|
|
|
|
|
|
for(int j = 0; j < ClassExcitationCount[i]; j ++)
|
|
|
{
|
|
|
tet = &(tetARRAY[ClassTetraIndex[i][j]]);
|
|
|
|
|
|
cout << ClassTetraIndex[i][j] << " ";
|
|
|
|
|
|
for(int k = 0; k < TetPolyOrderDim[PolyFlag]; k++)
|
|
|
{
|
|
|
mapE_h[exciCNT * TetPolyOrderDim[PolyFlag] + k] = (tet->LocMapE[k] < 0 ? 0 : 1);
|
|
|
mapH_h[exciCNT * TetPolyOrderDim[PolyFlag] + k] = (tet->LocMapH[k] < 0 ? 0 : 1);
|
|
|
}
|
|
|
|
|
|
ExcitationFacesOffset_h[exciCNT] = excitationFaces;
|
|
|
for(int k = 0; k < NumOfFaces; k++)
|
|
|
{
|
|
|
for(int node = 0; node < NumOfNodes; node++)
|
|
|
{
|
|
|
nd_coords_tet_h[NumOfUnitaryVectors * (NumOfNodes * exciCNT + node) + 0] = tet->nd[node]->getCoord().getx();
|
|
|
nd_coords_tet_h[NumOfUnitaryVectors * (NumOfNodes * exciCNT + node) + 1] = tet->nd[node]->getCoord().gety();
|
|
|
nd_coords_tet_h[NumOfUnitaryVectors * (NumOfNodes * exciCNT + node) + 2] = tet->nd[node]->getCoord().getz();
|
|
|
|
|
|
//cout << "TET ID = " << tet->getcnt() << " Face ID = " << tet->fc[k]->getcnt() << " BC = " << tet->fc[k]->bcPtr->getbType() << endl;
|
|
|
//cout << tet->nd[node]->getCoord().getx() << " " << tet->nd[node]->getCoord().gety() << " " << tet->nd[node]->getCoord().getz() << endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int DGface_bc = tet->fc[k]->bcPtr->getbType();
|
|
|
if(DGface_bc == planeWaveType || DGface_bc == portType || DGface_bc == pmlType)
|
|
|
{
|
|
|
ExcitationFacesNum_h[excitationFaces] = k;
|
|
|
|
|
|
for(int node = 0; node < NumOfNodesPerFace; node++)
|
|
|
{
|
|
|
nd_coords_face_h[NumOfUnitaryVectors * (NumOfNodesPerFace * excitationFaces + node) + 0] = tet->fc[k]->nd[node]->getCoord().getx();
|
|
|
nd_coords_face_h[NumOfUnitaryVectors * (NumOfNodesPerFace * excitationFaces + node) + 1] = tet->fc[k]->nd[node]->getCoord().gety();
|
|
|
nd_coords_face_h[NumOfUnitaryVectors * (NumOfNodesPerFace * excitationFaces + node) + 2] = tet->fc[k]->nd[node]->getCoord().getz();
|
|
|
|
|
|
//cout << tet->fc[k]->nd[node]->getCoord().getx() << " , "
|
|
|
// << tet->fc[k]->nd[node]->getCoord().gety() << " , "
|
|
|
// << tet->fc[k]->nd[node]->getCoord().getz() << endl;
|
|
|
}
|
|
|
cout << "\n";
|
|
|
|
|
|
if(PlaneWaveBCFlag)
|
|
|
{
|
|
|
Z_face_pw_h[excitationFaces] = No * sqrt(tet->mat->mur.getEntry(0,0) / tet->mat->epsr.getEntry(0,0));
|
|
|
}
|
|
|
|
|
|
excitationFaces++;
|
|
|
}
|
|
|
}
|
|
|
ExcitationFacesCnt_h[exciCNT] = excitationFaces - ExcitationFacesOffset_h[exciCNT];
|
|
|
exciCNT++;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
cout << " exciCNT = " << exciCNT << endl;
|
|
|
|
|
|
// To save the current time step through the execution
|
|
|
LocalExciIndexE = new int[N_class];
|
|
|
LocalExciIndexH = new int[N_class];
|
|
|
for(int i = 0; i < N_class; i ++)
|
|
|
{
|
|
|
LocalExciIndexE[i] = 0;
|
|
|
LocalExciIndexH[i] = 0;
|
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Create the fields at the HOST (only the ones that we will use to calculate the fields at the probes)
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
int sizeField = TetPolyOrderDim[PolyFlag] * tetraCNT;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&En1_h, sizeField * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Hn32_h, sizeField * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// For Regular Tetrahedras
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
flag1 = true;
|
|
|
|
|
|
// ---- Helpers ----
|
|
|
|
|
|
// Check for overflow
|
|
|
auto safe_add = [](int a, int b) -> int
|
|
|
{
|
|
|
if ((b > 0 && a > INT_MAX - b) || (b < 0 && a < INT_MIN - b))
|
|
|
{
|
|
|
fprintf(stderr, "Integer overflow in addition (%d + %d)\n", a, b);
|
|
|
abort();
|
|
|
}
|
|
|
return a + b;
|
|
|
};
|
|
|
|
|
|
// Check if index is within range
|
|
|
auto check_idx = [&](int idx, int lo, int hi, const char* what) {
|
|
|
if (idx < lo || idx > hi) {
|
|
|
fprintf(stderr, "Index out of range for %s: %d (expected [%d, %d])\n",
|
|
|
what, idx, lo, hi);
|
|
|
abort();
|
|
|
}
|
|
|
};
|
|
|
|
|
|
// Check for null pointer
|
|
|
auto check_ptr = [&](void* p, const char* what) {
|
|
|
if (!p) { fprintf(stderr, "Null pointer: %s\n", what); abort(); }
|
|
|
};
|
|
|
|
|
|
// ---- Allocations (pinned) ----
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classregNeighPML_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularTetraCnt_h, (size_t)N_class * regularCNT * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classIrregularTetraOffset_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classPMLTetraOffset_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classNeighIrregular_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classNeighIrregularOffset_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classNeighPML_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classNeighPMLOffset_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classTetraOffset_loc_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classNeighOffset_loc_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classPMLTetraOffset_loc_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classNeighPMLOffset_loc_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&nonRegularTetraCnt_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&nonRegularPMLTetraCnt_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
// Always allocate these “per-class meta” arrays irrespective of regularTetraCNT,
|
|
|
// so we can safely write zeros even if there are no regulars.
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularGroupsCnt_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularPMLGroupsCnt_h, (size_t)N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
// These hold per-class pointers allocated later per class; init to nullptr
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularTetraOffset_h, (size_t)N_class * sizeof(int*), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularGroupsId_h, (size_t)N_class * sizeof(int*), cudaHostAllocMapped));
|
|
|
|
|
|
for (int i = 0; i < N_class; ++i)
|
|
|
{
|
|
|
classRegularTetraOffset_h[i] = nullptr;
|
|
|
classRegularGroupsId_h[i] = nullptr;
|
|
|
}
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularPMLTetraOffset_h, (size_t)N_class * sizeof(int*), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularPMLTetraFaceOffset_h, (size_t)N_class * sizeof(int*), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularPMLGroupsId_h, (size_t)N_class * sizeof(int*), cudaHostAllocMapped));
|
|
|
for (int i = 0; i < N_class; ++i)
|
|
|
{
|
|
|
classRegularPMLTetraOffset_h[i] = nullptr;
|
|
|
classRegularPMLGroupsId_h[i] = nullptr;
|
|
|
classRegularPMLTetraFaceOffset_h[i] = nullptr;
|
|
|
}
|
|
|
|
|
|
// Per group (global)
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularGroupsNeighCnt_h, (size_t)regularCNT * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
// ---- Zero-init everything deterministically ----
|
|
|
memset(classRegularTetraCnt_h, 0, (size_t)N_class * regularCNT * sizeof(int));
|
|
|
memset(classIrregularTetraOffset_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classPMLTetraOffset_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classNeighIrregular_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classNeighIrregularOffset_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classNeighPML_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classregNeighPML_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classNeighPMLOffset_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classTetraOffset_loc_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classNeighOffset_loc_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classPMLTetraOffset_loc_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classNeighPMLOffset_loc_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(nonRegularTetraCnt_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(nonRegularPMLTetraCnt_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classRegularGroupsCnt_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classRegularPMLGroupsCnt_h, 0, (size_t)N_class * sizeof(int));
|
|
|
memset(classRegularGroupsNeighCnt_h, 0, (size_t)regularCNT * sizeof(int));
|
|
|
|
|
|
// ---- Locals ----
|
|
|
std::set<int> ID_aux, ID_aux_PML;
|
|
|
|
|
|
totalRegularNeighFaceCnt = 0;
|
|
|
totalRegularPMLNeighFaceCnt = 0;
|
|
|
numRegTetras = 0;
|
|
|
numRegPMLTetras = 0;
|
|
|
|
|
|
int irregularTetras = 0;
|
|
|
int irregularNeighbours= 0;
|
|
|
int PMLTetras = 0;
|
|
|
int PMLNeighbours = 0;
|
|
|
|
|
|
// ---- Main loop ----
|
|
|
for (int i = 0; i < N_class; ++i)
|
|
|
{
|
|
|
// Safe offsets (depend on previous class)
|
|
|
if (i == 0)
|
|
|
{
|
|
|
classIrregularTetraOffset_h[i] = 0;
|
|
|
classNeighIrregularOffset_h[i] = 0;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
// read-only of previous indices is safe now
|
|
|
int prev = i - 1;
|
|
|
check_idx(prev, 0, N_class-1, "prev class index");
|
|
|
|
|
|
// Prevent overflow and guarantee non-negative
|
|
|
int pml_tetra_off = classPMLTetraOffset_h[prev];
|
|
|
int pml_tetra_cnt = ClassPMLTetraCnt[prev];
|
|
|
int pml_neigh_off = classNeighPMLOffset_h[prev];
|
|
|
int pml_neigh_cnt = classNeighPML_h[prev];
|
|
|
int reg_neigh_cnt = classregNeighPML_h[prev];
|
|
|
|
|
|
if (pml_tetra_off < 0 || pml_tetra_cnt < 0 || pml_neigh_off < 0 || pml_neigh_cnt < 0) {
|
|
|
fprintf(stderr, "Negative offsets/cnts detected for prev class %d\n", prev);
|
|
|
abort();
|
|
|
}
|
|
|
|
|
|
classIrregularTetraOffset_h[i] = pml_tetra_off + pml_tetra_cnt;
|
|
|
classNeighIrregularOffset_h[i] = pml_neigh_off + pml_neigh_cnt + reg_neigh_cnt;
|
|
|
}
|
|
|
|
|
|
classTetraOffset_loc_h[i] = irregularTetras;
|
|
|
classNeighOffset_loc_h[i] = irregularNeighbours;
|
|
|
|
|
|
int totalNeighbors = 0;
|
|
|
|
|
|
|
|
|
// ----- Non-PML tetras in class i -----
|
|
|
for (int j = 0; j < ClassTetraCnt[i]; ++j)
|
|
|
{
|
|
|
int tIdx = ClassTetraIndex[i][j];
|
|
|
tet = &(tetARRAY[tIdx]);
|
|
|
check_ptr(tet, "tet ptr");
|
|
|
|
|
|
int group_ID = tet->getRegularGroup();
|
|
|
|
|
|
// Count per class and group
|
|
|
classRegularTetraCnt_h[i * regularCNT + group_ID]++;
|
|
|
|
|
|
int neigh = tet->get_NeighNum();
|
|
|
|
|
|
if (group_ID == 0)
|
|
|
{
|
|
|
nonRegularTetraCnt_h[i]++;
|
|
|
irregularTetras++;
|
|
|
irregularNeighbours += neigh;
|
|
|
classNeighIrregular_h[i] += neigh;
|
|
|
totalNeighbors += neigh;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
ID_aux.insert(group_ID);
|
|
|
classRegularGroupsNeighCnt_h[group_ID] = neigh;
|
|
|
totalRegularNeighFaceCnt += neigh;
|
|
|
numRegTetras++;
|
|
|
totalNeighbors += neigh;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
// ----- Build per-class arrays for REGULAR groups -----
|
|
|
if (!ID_aux.empty())
|
|
|
{
|
|
|
int G = (int)ID_aux.size();
|
|
|
classRegularGroupsCnt_h[i] = G;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularGroupsId_h[i], (size_t)G * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularTetraOffset_h[i], (size_t)G * sizeof(int), cudaHostAllocMapped));
|
|
|
check_ptr(classRegularGroupsId_h[i], "classRegularGroupsId_h[i]");
|
|
|
check_ptr(classRegularTetraOffset_h[i], "classRegularTetraOffset_h[i]");
|
|
|
|
|
|
cout << "Regular Tet group = " << endl;
|
|
|
int cntAux = 0;
|
|
|
for (int ID : ID_aux)
|
|
|
{
|
|
|
|
|
|
classRegularGroupsId_h[i][cntAux] = ID;
|
|
|
|
|
|
cout << ID << endl;
|
|
|
|
|
|
if (cntAux == 0)
|
|
|
{
|
|
|
classRegularTetraOffset_h[i][0] = 0;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
int prevID = classRegularGroupsId_h[i][cntAux - 1];
|
|
|
int prevCnt = classRegularTetraCnt_h[i * regularCNT + prevID];
|
|
|
classRegularTetraOffset_h[i][cntAux] = classRegularTetraOffset_h[i][cntAux - 1] + prevCnt;
|
|
|
}
|
|
|
cntAux++;
|
|
|
}
|
|
|
ID_aux.clear();
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
classRegularGroupsCnt_h[i] = 0;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ----- PML part -----
|
|
|
if (PML_flag)
|
|
|
{
|
|
|
classPMLTetraOffset_h[i] = classIrregularTetraOffset_h[i] + ClassTetraCnt[i];
|
|
|
classNeighPML_h[i] = 0;
|
|
|
classNeighPMLOffset_h[i] = classNeighIrregularOffset_h[i] + totalNeighbors;
|
|
|
|
|
|
classPMLTetraOffset_loc_h[i] = PMLTetras;
|
|
|
classNeighPMLOffset_loc_h[i] = PMLNeighbours;
|
|
|
|
|
|
cout << "classNeighPMLOffset_loc_h[" << i << "] =" << classNeighPMLOffset_loc_h[i] << endl;
|
|
|
//cout << "classNeighPMLOffset_loc_h[" << i << "] =" << classNeighPMLOffset_loc_h[i] << endl;
|
|
|
cout << " classPMLTetraOffset_loc_h[ " << i << "] " << classPMLTetraOffset_loc_h[i] << endl;
|
|
|
|
|
|
int pml_cnt = ClassPMLTetraCnt[i];
|
|
|
check_idx(pml_cnt, 0, INT_MAX, "ClassPMLTetraCnt[i]");
|
|
|
|
|
|
for (int j = 0; j < pml_cnt; ++j)
|
|
|
{
|
|
|
int idx = safe_add(ClassTetraCnt[i], j);
|
|
|
int tIdx = ClassTetraIndex[i][idx];
|
|
|
tet = &(tetARRAY[tIdx]);
|
|
|
check_ptr(tet, "tet ptr (PML)");
|
|
|
|
|
|
int group_ID = tet->getRegularGroup();
|
|
|
|
|
|
classRegularTetraCnt_h[i * regularCNT + group_ID]++;
|
|
|
|
|
|
int neigh = tet->get_NeighNum();
|
|
|
|
|
|
if (group_ID == 0)
|
|
|
{
|
|
|
nonRegularPMLTetraCnt_h[i]++;
|
|
|
PMLTetras = safe_add(PMLTetras, 1);
|
|
|
PMLNeighbours = safe_add(PMLNeighbours, neigh);
|
|
|
classNeighPML_h[i] = safe_add(classNeighPML_h[i], neigh);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
ID_aux_PML.insert(group_ID);
|
|
|
classRegularGroupsNeighCnt_h[group_ID] = neigh;
|
|
|
totalRegularPMLNeighFaceCnt = safe_add(totalRegularPMLNeighFaceCnt, neigh);
|
|
|
numRegPMLTetras = safe_add(numRegPMLTetras, 1);
|
|
|
classregNeighPML_h[i] += neigh;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
cout << "PMLNeighbours = " << PMLNeighbours << endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
// ----- Build per-class arrays for REGULAR PML groups -----
|
|
|
if (PML_flag)
|
|
|
{
|
|
|
if (!ID_aux_PML.empty())
|
|
|
{
|
|
|
int Gp = (int)ID_aux_PML.size();
|
|
|
classRegularPMLGroupsCnt_h[i] = Gp;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularPMLGroupsId_h[i], (size_t)Gp * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularPMLTetraOffset_h[i], (size_t)Gp * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&classRegularPMLTetraFaceOffset_h[i], (size_t)Gp * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
check_ptr(classRegularPMLGroupsId_h[i], "classRegularPMLGroupsId_h[i]");
|
|
|
check_ptr(classRegularPMLTetraOffset_h[i], "classRegularPMLTetraOffset_h[i]");
|
|
|
check_ptr(classRegularPMLTetraFaceOffset_h[i], "classRegularPMLTetraFaceOffset_h[i]");
|
|
|
|
|
|
|
|
|
cout << "Regular PML Tet group = " << endl;
|
|
|
int cntAux = 0;
|
|
|
for (int ID : ID_aux_PML)
|
|
|
{
|
|
|
cout << ID << endl;
|
|
|
|
|
|
classRegularPMLGroupsId_h[i][cntAux] = ID;
|
|
|
|
|
|
if (cntAux == 0)
|
|
|
{
|
|
|
classRegularPMLTetraOffset_h[i][0] = 0;
|
|
|
classRegularPMLTetraFaceOffset_h[i][0] = 0;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
int prevID = classRegularPMLGroupsId_h[i][cntAux - 1];
|
|
|
int prevCnt = classRegularTetraCnt_h[i * regularCNT + prevID];
|
|
|
classRegularPMLTetraOffset_h[i][cntAux] = classRegularPMLTetraOffset_h[i][cntAux - 1] + prevCnt;
|
|
|
|
|
|
int neigh = classRegularGroupsNeighCnt_h[prevID];
|
|
|
int num_element = classRegularTetraCnt_h[i * regularCNT + prevID];
|
|
|
int number_neigh = neigh * num_element;
|
|
|
classRegularPMLTetraFaceOffset_h[i][cntAux] = classRegularPMLTetraFaceOffset_h[i][cntAux-1] + number_neigh;
|
|
|
}
|
|
|
cntAux++;
|
|
|
}
|
|
|
ID_aux_PML.clear();
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
classRegularPMLGroupsCnt_h[i] = 0;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// ---- Final tallies ----
|
|
|
nonregularCNT_Normal = irregularTetras;
|
|
|
nonregularCNT_PML = PMLTetras;
|
|
|
num_elements_regular_PML = numRegPMLTetras;
|
|
|
cout << "nonregularCNT_Normal = " << nonregularCNT_Normal << endl;
|
|
|
cout << "nonregularCNT_PML = " << nonregularCNT_PML << endl;
|
|
|
cout << "num_elements_regular_PML = " << num_elements_regular_PML << endl;
|
|
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Create the matrices for the regular groups (4 sets per regular group):
|
|
|
// - Loc1E/Loc1H: local matrices that are multiplied by the same type of field (Field1 = Loc1 * Field0 + ...)
|
|
|
// - Loc2E/Loc2H: local matrices that are multiplied by the opposite type of field (E1 = Loc2E * H1_2 + ...)
|
|
|
// - Neigh1E/Neigh1H: matrices related to the neighbors opposite filed
|
|
|
// - Neigh2E/Neigh2H: matrices related to the neighbors same filed
|
|
|
//
|
|
|
// *** NOTE: each of these matrices is Column-Major Order
|
|
|
// *** NOTE: since they are regular, we assume that the elements are conformal and with 4 neighbours
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
int localMatrixSize = TetPolyOrderDim[PolyFlag] * TetPolyOrderDim[PolyFlag];
|
|
|
int neighMatrixSize = TetPolyOrderDim[PolyFlag] * FacePolyOrderDim[PolyFlag];
|
|
|
|
|
|
cout << "--------------------------------------------------------------------------------------------------" << endl;
|
|
|
|
|
|
|
|
|
cout << "regularCNT_Normal = " << regularCNT_Normal << endl;
|
|
|
cout << "totalRegularNeighFaceCnt = " << totalRegularNeighFaceCnt << endl;
|
|
|
|
|
|
if(regularRegionFlag && regularCNT_Normal > 0)
|
|
|
{
|
|
|
cout << "========== FILLING regular ===============" << endl;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularLoc1E_h, regularCNT_Normal * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularLoc2E_h, regularCNT_Normal * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularLoc1H_h, regularCNT_Normal * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularLoc2H_h, regularCNT_Normal * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularNeigh1E_h, totalRegularNeighFaceCnt * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularNeigh2E_h, totalRegularNeighFaceCnt * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularNeigh1H_h, totalRegularNeighFaceCnt * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularNeigh2H_h, totalRegularNeighFaceCnt * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
int localPosition = 0;
|
|
|
int couplingPosition = 0;
|
|
|
|
|
|
for(int i = 1; i < regularCNT_Normal+1; i++)
|
|
|
{
|
|
|
cout << "Group " << i << endl;
|
|
|
tet = &(tetARRAY[regionARRAY[i]]);
|
|
|
|
|
|
tet->prepareCuBLAS(®ularLoc1E_h[localPosition], ®ularLoc2E_h[localPosition], ®ularNeigh1E_h[couplingPosition], ®ularNeigh2E_h[couplingPosition], nullptr,
|
|
|
®ularLoc1H_h[localPosition], ®ularLoc2H_h[localPosition], ®ularNeigh1H_h[couplingPosition], ®ularNeigh2H_h[couplingPosition], nullptr);
|
|
|
|
|
|
localPosition += localMatrixSize;
|
|
|
couplingPosition += classRegularGroupsNeighCnt_h[i] * neighMatrixSize;
|
|
|
}
|
|
|
}
|
|
|
cout << "Complete regular matrices preparation" << endl;
|
|
|
|
|
|
cout << "--------------------------------------------------------------------------------------------------" << endl;
|
|
|
|
|
|
|
|
|
cout << "regularCNT_PML = " << regularCNT_PML << endl;
|
|
|
cout << "totalRegularPMLNeighFaceCnt = " << totalRegularPMLNeighFaceCnt << endl;
|
|
|
|
|
|
if(regularRegionFlag && regularCNT_PML > 0)
|
|
|
{
|
|
|
|
|
|
cout << "========== FILLING regular PML ===============" << endl;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLLoc1E_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLLoc2E_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLLoc1H_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLLoc2H_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLNeigh1E_h, totalRegularPMLNeighFaceCnt * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLNeigh2E_h, totalRegularPMLNeighFaceCnt * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLNeigh1H_h, totalRegularPMLNeighFaceCnt * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLNeigh2H_h, totalRegularPMLNeighFaceCnt * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLAuxE_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLAuxH_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLLoc1M_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLLoc2M_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLLoc1J_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)®ularPMLLoc2J_h, regularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
int localPosition = 0;
|
|
|
int couplingPosition = 0;
|
|
|
|
|
|
for(int i = regularCNT_Normal+1; i < regularCNT_Normal+regularCNT_PML+1; i++)
|
|
|
{
|
|
|
cout << "Group " << i << endl;
|
|
|
tet = &(tetARRAY[regionARRAY[i]]);
|
|
|
cout << "------------" << endl;
|
|
|
|
|
|
tet->prepareCuBLAS_PML(®ularPMLLoc1E_h[localPosition], ®ularPMLLoc2E_h[localPosition],
|
|
|
®ularPMLNeigh1E_h[couplingPosition], ®ularPMLNeigh2E_h[couplingPosition],
|
|
|
®ularPMLLoc1H_h[localPosition], ®ularPMLLoc2H_h[localPosition],
|
|
|
®ularPMLNeigh1H_h[couplingPosition], ®ularPMLNeigh2H_h[couplingPosition],
|
|
|
®ularPMLAuxE_h[localPosition], ®ularPMLAuxH_h[localPosition],
|
|
|
®ularPMLLoc1M_h[localPosition], ®ularPMLLoc2M_h[localPosition],
|
|
|
®ularPMLLoc1J_h[localPosition],®ularPMLLoc2J_h[localPosition]);
|
|
|
|
|
|
|
|
|
localPosition += localMatrixSize;
|
|
|
couplingPosition += classRegularGroupsNeighCnt_h[i] * neighMatrixSize;
|
|
|
}
|
|
|
}
|
|
|
cout << "Complete regular PML matrices preparation" << endl;
|
|
|
|
|
|
cout << "--------------------------------------------------------------------------------------------------" << endl;
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Calculate the neighbors (number per position + offset) so we know the number of matrices that we are going to need
|
|
|
// Also, we generate an array that is going to map the ID and the order
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
cout << "Neighbor matrices preparation" << endl;
|
|
|
|
|
|
cout << "tetraCNT = " << tetraCNT << endl;
|
|
|
|
|
|
int neighCNT = 0;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&mapIdLoc, tetraCNT * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neighbours_h, tetraCNT * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&NeighboursOffset_h, tetraCNT * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
cntAux = 0;
|
|
|
for(int i = 0; i < N_class; i++)
|
|
|
{
|
|
|
for(int j = 0; j < ClassTetraCnt[i] + ClassPMLTetraCnt[i]; j++)
|
|
|
{
|
|
|
tet = &(tetARRAY[ClassTetraIndex[i][j]]);
|
|
|
|
|
|
mapIdLoc[ClassTetraIndex[i][j]] = cntAux;
|
|
|
Neighbours_h[cntAux] = tet->get_NeighNum();
|
|
|
NeighboursOffset_h[cntAux] = neighCNT;
|
|
|
|
|
|
neighCNT += tet->get_NeighNum();
|
|
|
cntAux++;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
cout << "cntAux = " << cntAux << endl;
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&NeighMap_h, neighCNT * FacePolyOrderDim[PolyFlag] * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&NeighClass_h, N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&NeighClassOffset_h, N_class * sizeof(int), cudaHostAllocMapped));
|
|
|
|
|
|
int maxNeighClass = 0;
|
|
|
neighCNT = 0;
|
|
|
cntAux = 0;
|
|
|
for(int i = 0; i < N_class; i++)
|
|
|
{
|
|
|
NeighClassOffset_h[i] = neighCNT;
|
|
|
//cout << "====== Class " << i << endl;
|
|
|
|
|
|
//cout << "Non-PML " << endl;
|
|
|
for(int j = 0; j < ClassTetraCnt[i]; j++)
|
|
|
{
|
|
|
tet = &(tetARRAY[ClassTetraIndex[i][j]]);
|
|
|
bool isPML = tet->get_PML_Flag();
|
|
|
//cout << "TET = " << ClassTetraIndex[i][j] << " | PML = " << isPML << endl;
|
|
|
|
|
|
for(int neigh = 0; neigh < tet->get_NeighNum(); neigh++)
|
|
|
{
|
|
|
tetra* neighbor = tet->get_NeighborTetra(neigh);
|
|
|
int neighFace = tet->getNeighFace(neighbor);
|
|
|
int offset = mapIdLoc[neighbor->getcnt()] * TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
int neighID = mapIdLoc[neighbor->getcnt()];
|
|
|
bool isPML2 = neighbor->get_PML_Flag();
|
|
|
//cout << "TET = " << neighID << " | PML = " << isPML2 << endl;
|
|
|
|
|
|
for(int k = 0; k < FacePolyOrderDim[PolyFlag]; k++)
|
|
|
{
|
|
|
NeighMap_h[cntAux++] = offset + fac2tet[neighFace][k];
|
|
|
}
|
|
|
}
|
|
|
|
|
|
neighCNT += tet->get_NeighNum();
|
|
|
}
|
|
|
|
|
|
for(int j = ClassTetraCnt[i]; j < ClassTetraCnt[i] + ClassPMLTetraCnt[i]; j++)
|
|
|
{
|
|
|
tet = &(tetARRAY[ClassTetraIndex[i][j]]);
|
|
|
|
|
|
bool isPML = tet->get_PML_Flag();
|
|
|
for(int neigh = 0; neigh < tet->get_NeighNum(); neigh++)
|
|
|
{
|
|
|
tetra* neighbor = tet->get_NeighborTetra(neigh);
|
|
|
int neighFace = tet->getNeighFace(neighbor);
|
|
|
int offset = mapIdLoc[neighbor->getcnt()] * TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
int neighID = mapIdLoc[neighbor->getcnt()];
|
|
|
bool isPML2 = neighbor->get_PML_Flag();
|
|
|
for(int k = 0; k < FacePolyOrderDim[PolyFlag]; k++)
|
|
|
{
|
|
|
NeighMap_h[cntAux++] = offset + fac2tet[neighFace][k];
|
|
|
}
|
|
|
}
|
|
|
neighCNT += tet->get_NeighNum();
|
|
|
}
|
|
|
|
|
|
NeighClass_h[i] = neighCNT - NeighClassOffset_h[i];
|
|
|
maxNeighClass = (int)std::max(maxNeighClass, NeighClass_h[i]);
|
|
|
}
|
|
|
cout << "Complete Neighbor matrices preparation" << endl;
|
|
|
cout << "neighCNT = " << neighCNT << endl;
|
|
|
cout << "--------------------------------------------------------------------------------------------------" << endl;
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Create the matrices (4 sets per field + inverse for exited elements):
|
|
|
// - Loc1E/Loc1H: local matrices that are multiplied by the same type of field (Field1 = Loc1 * Field0 + ...)
|
|
|
// - Loc2E/Loc2H: local matrices that are multiplied by the opposite type of field (E1 = Loc2E * H1_2 + ...)
|
|
|
// - Neigh1E/Neigh1H: matrices related to the neighbors opposite filed
|
|
|
// - Neigh2E/Neigh2H: matrices related to the neighbors same filed
|
|
|
// - InvE_h/InvH_h: inverse Mass matrices (only for excited terms)
|
|
|
//
|
|
|
// *** NOTE: each of these matrices is Column-Major Order ***
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
cout << "Excitation preparation" << endl;
|
|
|
cout << "exciCNT = " << exciCNT << endl;
|
|
|
|
|
|
|
|
|
if (nonregularCNT_Normal > 0)
|
|
|
{
|
|
|
cout << "========== FILLING Irregular ===============" << endl;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Loc1E_h, irregularTetras * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Loc2E_h, irregularTetras * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Loc1H_h, irregularTetras * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Loc2H_h, irregularTetras * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neigh1E_h, irregularNeighbours * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neigh2E_h, irregularNeighbours * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neigh1H_h, irregularNeighbours * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neigh2H_h, irregularNeighbours * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&InvE_h, exciCNT * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&InvH_h, exciCNT * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
|
|
|
std::cout << "Begin irregular CuBLAS preparation" << std::endl;
|
|
|
std::cout << "N_class = " << N_class << std::endl;
|
|
|
|
|
|
cout << "irregularTetras = " << irregularTetras << endl;
|
|
|
cout << "nonregularCNT_Normal = " << nonregularCNT_Normal << endl;
|
|
|
|
|
|
exciCNT = 0;
|
|
|
irregularTetras = 0;
|
|
|
irregularNeighbours = 0;
|
|
|
|
|
|
//NOTE: this only works because of the order of the tetras in ClassTetraIndex (Exci0 NonExci0 Exci1 ...) where the number is the class
|
|
|
//NOTE: classRegularTetraCnt_h[i * regularCNT + 0] means that we only take into consideration the group 0 (irregular mesh) since the others were already done in the regular section
|
|
|
for(int i = 0; i < N_class; i++)
|
|
|
{
|
|
|
|
|
|
for(int j = 0; j < nonRegularTetraCnt_h[i]; j++)
|
|
|
{
|
|
|
|
|
|
|
|
|
tet = &(tetARRAY[ClassTetraIndex[i][j]]);
|
|
|
int localPosition = irregularTetras * localMatrixSize;
|
|
|
int couplingPosition = irregularNeighbours * neighMatrixSize;
|
|
|
|
|
|
fp_t_ts* InvEptr = j < ClassExcitationCount[i] ? &InvE_h[(exciCNT + j) * localMatrixSize] : nullptr;
|
|
|
fp_t_ts* InvHptr = j < ClassExcitationCount[i] ? &InvH_h[(exciCNT + j) * localMatrixSize] : nullptr;
|
|
|
|
|
|
tet->prepareCuBLAS(&Loc1E_h[localPosition], &Loc2E_h[localPosition], &Neigh1E_h[couplingPosition], &Neigh2E_h[couplingPosition], InvEptr,
|
|
|
&Loc1H_h[localPosition], &Loc2H_h[localPosition], &Neigh1H_h[couplingPosition], &Neigh2H_h[couplingPosition], InvHptr);
|
|
|
|
|
|
irregularTetras++;
|
|
|
irregularNeighbours += tet->get_NeighNum();
|
|
|
|
|
|
}
|
|
|
exciCNT += ClassExcitationCount[i];
|
|
|
}
|
|
|
cout << "irregularTetras = " << irregularTetras << endl;
|
|
|
cout << "exciCNT = " << exciCNT << endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
cout << "--------------------------------------------------------------------------------------------------" << endl;
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Create the matrices (4 sets per field + inverse for exited elements):
|
|
|
// - Loc1E_PML/Loc1H_PML: local matrices that are multiplied by the same type of field (Field1 = Loc1 * Field0 + ...)
|
|
|
// - Loc2E_PML/Loc2H_PML: local matrices that are multiplied by the opposite type of field (E1 = Loc2E * H1_2 + ...)
|
|
|
// - Neigh1E_PML/Neigh1H_PML: matrices related to the neighbors opposite filed
|
|
|
// - Neigh2E_PML/Neigh2H_PML: matrices related to the neighbors same filed
|
|
|
// - InvE_h/InvH_h: inverse Mass matrices (only for excited terms)
|
|
|
//
|
|
|
// *** NOTE: each of these matrices is Column-Major Order ***
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
cout << "nonregularCNT_PML = " << nonregularCNT_PML << endl;
|
|
|
|
|
|
if (nonregularCNT_PML > 0)
|
|
|
{
|
|
|
cout << "========== FILLING PML ===============" << endl;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Loc1E_PML_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Loc2E_PML_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Loc1H_PML_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Loc2H_PML_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neigh1E_PML_h, PMLNeighbours * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neigh2E_PML_h, PMLNeighbours * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neigh1H_PML_h, PMLNeighbours * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&Neigh2H_PML_h, PMLNeighbours * neighMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&AuxE_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&AuxH_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&AuxM1_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&AuxJ1_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&AuxM2_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&AuxJ2_h, nonregularCNT_PML * localMatrixSize * sizeof(fp_t_ts), cudaHostAllocMapped));
|
|
|
|
|
|
cout << "PMLTetras = " << PMLTetras << endl;
|
|
|
cout << "PMLNeighbours = " << PMLNeighbours << endl;
|
|
|
|
|
|
// Reset counters before starting matrix population
|
|
|
PMLTetras = 0;
|
|
|
PMLNeighbours = 0;
|
|
|
|
|
|
// Loop over all LTS classes
|
|
|
for (int i = 0; i < N_class; i++)
|
|
|
{
|
|
|
|
|
|
for (int j = ClassTetraCnt[i]; j < ClassTetraCnt[i] + nonRegularPMLTetraCnt_h[i]; j++)
|
|
|
{
|
|
|
// Get pointer to the j-th irregular tetrahedron in class i
|
|
|
tet = &(tetARRAY[ClassTetraIndex[i][j]]);
|
|
|
|
|
|
// Non-PML Irregular Tetrahedron: compute memory positions for local and neighbor matrices
|
|
|
int localPos = PMLTetras * localMatrixSize;
|
|
|
int neighPos = PMLNeighbours * neighMatrixSize;
|
|
|
|
|
|
// Fill in the local and coupling matrices for non-PML irregular tetra
|
|
|
tet->prepareCuBLAS_PML(&Loc1E_PML_h[localPos], &Loc2E_PML_h[localPos],
|
|
|
&Neigh1E_PML_h[neighPos], &Neigh2E_PML_h[neighPos],
|
|
|
&Loc1H_PML_h[localPos], &Loc2H_PML_h[localPos],
|
|
|
&Neigh1H_PML_h[neighPos], &Neigh2H_PML_h[neighPos],
|
|
|
&AuxE_h[localPos], &AuxH_h[localPos],
|
|
|
&AuxM1_h[localPos], &AuxM2_h[localPos],
|
|
|
&AuxJ1_h[localPos],&AuxJ2_h[localPos]);
|
|
|
|
|
|
// Increment running totals for non-PML irregular tetrahedra and their neighbors
|
|
|
PMLTetras++;
|
|
|
PMLNeighbours += tet->get_NeighNum();
|
|
|
|
|
|
}
|
|
|
}
|
|
|
cout << "PMLTetras = " << PMLTetras << endl;
|
|
|
}
|
|
|
|
|
|
int sizePML = PMLTetras * TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
cout << "--------------------------------------------------------------------------------------------------" << endl;
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Check GPU Memory
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
struct MemItem { const char* label; size_t bytes; };
|
|
|
|
|
|
auto BYTES_ = [](size_t elems, size_t sizeofT){ return elems * sizeofT; };
|
|
|
auto GB = [](size_t bytes){ return double(bytes) / 1e9; };
|
|
|
auto sum_bytes = [](const std::vector<MemItem>& v)->size_t{
|
|
|
size_t s=0; for (auto& it: v) s += it.bytes; return s;
|
|
|
};
|
|
|
|
|
|
// ===== Memory accounting (exact, by allocation) ===================================
|
|
|
const int TPO = TetPolyOrderDim[PolyFlag];
|
|
|
const int FPO = FacePolyOrderDim[PolyFlag];
|
|
|
const size_t localElems = static_cast<size_t>(TPO) * TPO;
|
|
|
const size_t neighElems = static_cast<size_t>(TPO) * FPO;
|
|
|
|
|
|
const int exciCNT_total = exciCNT;
|
|
|
const int irregularTetras_total = irregularTetras;
|
|
|
const int irregularNeighbours_total = irregularNeighbours;
|
|
|
const int PMLTetras_total = PMLTetras;
|
|
|
const int PMLNeighbours_total = PMLNeighbours;
|
|
|
|
|
|
const int regNormGroups = regularCNT_Normal;
|
|
|
const int regPMLGroups = regularCNT_PML;
|
|
|
const int regNormFacesTotal = totalRegularNeighFaceCnt;
|
|
|
const int regPMLFacesTotal = totalRegularPMLNeighFaceCnt;
|
|
|
|
|
|
const size_t sizeFieldElems = sizeField; // already in elements
|
|
|
const size_t sizePMLElems = sizePML; // already in elements (if you keep a global PML state)
|
|
|
|
|
|
const size_t neighMapElems = static_cast<size_t>(neighCNT) * FPO;
|
|
|
const size_t neighboursElems = tetraCNT;
|
|
|
const size_t auxInElems = static_cast<size_t>(maxNeighClass) * FPO;
|
|
|
const size_t auxOutElems = static_cast<size_t>(maxNeighClass) * TPO;
|
|
|
|
|
|
const size_t mapElemsPerExci = TPO;
|
|
|
const size_t tetNdElems = static_cast<size_t>(NumOfUnitaryVectors) * NumOfNodes * exciCNT_total;
|
|
|
const size_t faceNdElems = static_cast<size_t>(NumOfUnitaryVectors) * NumOfNodesPerFace * excitationFaces;
|
|
|
|
|
|
// ============ Build accounting vectors matching your allocations ==================
|
|
|
std::vector<MemItem> excit, prop, state, neighs;
|
|
|
|
|
|
// ---- Excitation maps & counts ----
|
|
|
excit.push_back({"mapE (int8)", BYTES_(size_t(exciCNT_total) * mapElemsPerExci, sizeof(int8_t))});
|
|
|
excit.push_back({"mapH (int8)", BYTES_(size_t(exciCNT_total) * mapElemsPerExci, sizeof(int8_t))});
|
|
|
excit.push_back({"ExcitationFacesCnt (int)", BYTES_(exciCNT_total, sizeof(int))});
|
|
|
excit.push_back({"ExcitationFacesOffset (int)", BYTES_(exciCNT_total, sizeof(int))});
|
|
|
excit.push_back({"ExcitationFacesNum (int)", BYTES_(excitationFaces, sizeof(int))});
|
|
|
excit.push_back({"nd_coords_tet", BYTES_(tetNdElems, sizeof(fp_t_ts))});
|
|
|
excit.push_back({"nd_coords_face", BYTES_(faceNdElems, sizeof(fp_t_ts))});
|
|
|
if (PlaneWaveBCFlag && excitationFaces > 0) {
|
|
|
excit.push_back({"Z_face_pw", BYTES_(excitationFaces, sizeof(fp_t_ts))});
|
|
|
}
|
|
|
// Inverses only for excitations
|
|
|
excit.push_back({"InvE", BYTES_(size_t(exciCNT_total) * localElems, sizeof(fp_t_ts))});
|
|
|
excit.push_back({"InvH", BYTES_(size_t(exciCNT_total) * localElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
// ---- Irregular (non-PML) ----
|
|
|
prop.push_back({"Loc1E (irreg)", BYTES_(size_t(irregularTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Loc2E (irreg)", BYTES_(size_t(irregularTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Loc1H (irreg)", BYTES_(size_t(irregularTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Loc2H (irreg)", BYTES_(size_t(irregularTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Neigh1E (irreg)", BYTES_(size_t(irregularNeighbours_total) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Neigh2E (irreg)", BYTES_(size_t(irregularNeighbours_total) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Neigh1H (irreg)", BYTES_(size_t(irregularNeighbours_total) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Neigh2H (irreg)", BYTES_(size_t(irregularNeighbours_total) * neighElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
// ---- Regular (non-PML) ----
|
|
|
if (regNormGroups > 0) {
|
|
|
prop.push_back({"regularLoc1E", BYTES_(size_t(regNormGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularLoc2E", BYTES_(size_t(regNormGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularLoc1H", BYTES_(size_t(regNormGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularLoc2H", BYTES_(size_t(regNormGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularNeigh1E", BYTES_(size_t(regNormFacesTotal) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularNeigh2E", BYTES_(size_t(regNormFacesTotal) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularNeigh1H", BYTES_(size_t(regNormFacesTotal) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularNeigh2H", BYTES_(size_t(regNormFacesTotal) * neighElems, sizeof(fp_t_ts))});
|
|
|
}
|
|
|
|
|
|
// ---- Regular PML ----
|
|
|
if (regPMLGroups > 0)
|
|
|
{
|
|
|
prop.push_back({"regularPMLLoc1E", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLLoc2E", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLLoc1H", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLLoc2H", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLNeigh1E", BYTES_(size_t(regPMLFacesTotal) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLNeigh2E", BYTES_(size_t(regPMLFacesTotal) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLNeigh1H", BYTES_(size_t(regPMLFacesTotal) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLNeigh2H", BYTES_(size_t(regPMLFacesTotal) * neighElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
prop.push_back({"regularPMLAuxE", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLAuxH", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLLoc1M", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLLoc2M", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLLoc1J", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"regularPMLLoc2J", BYTES_(size_t(regPMLGroups) * localElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
// per-element state for regular-PML region
|
|
|
state.push_back({"r_Mn", BYTES_(size_t(numRegPMLTetras) * localElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"r_Mn1", BYTES_(size_t(numRegPMLTetras) * localElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"r_Jn12", BYTES_(size_t(numRegPMLTetras) * localElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"r_Jn32", BYTES_(size_t(numRegPMLTetras) * localElems, sizeof(fp_t_ts))});
|
|
|
}
|
|
|
|
|
|
// ---- Irregular PML ----
|
|
|
if (PMLTetras_total > 0)
|
|
|
{
|
|
|
prop.push_back({"Loc1E_PML", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Loc2E_PML", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Loc1H_PML", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Loc2H_PML", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
prop.push_back({"Neigh1E_PML", BYTES_(size_t(PMLNeighbours_total) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Neigh2E_PML", BYTES_(size_t(PMLNeighbours_total) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Neigh1H_PML", BYTES_(size_t(PMLNeighbours_total) * neighElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"Neigh2H_PML", BYTES_(size_t(PMLNeighbours_total) * neighElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
prop.push_back({"AuxE", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"AuxH", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"AuxM1", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"AuxJ1", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"AuxM2", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
prop.push_back({"AuxJ2", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
// per-element PML state arrays
|
|
|
state.push_back({"Mn", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"Mn1", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"Jn12", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"Jn32", BYTES_(size_t(PMLTetras_total) * localElems, sizeof(fp_t_ts))});
|
|
|
}
|
|
|
|
|
|
// ---- Global field buffers ----
|
|
|
state.push_back({"En", BYTES_(sizeFieldElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"En1", BYTES_(sizeFieldElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"Hn12", BYTES_(sizeFieldElems, sizeof(fp_t_ts))});
|
|
|
state.push_back({"Hn32", BYTES_(sizeFieldElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
// ---- Neighbor maps/structs ----
|
|
|
neighs.push_back({"NeighMap (int)", BYTES_(neighMapElems, sizeof(int))});
|
|
|
neighs.push_back({"Neighbours (int)", BYTES_(neighboursElems, sizeof(int))});
|
|
|
neighs.push_back({"NeighboursOffset (int)", BYTES_(neighboursElems, sizeof(int))});
|
|
|
neighs.push_back({"auxFieldInput", BYTES_(auxInElems, sizeof(fp_t_ts))});
|
|
|
neighs.push_back({"auxFieldOutput", BYTES_(auxOutElems, sizeof(fp_t_ts))});
|
|
|
|
|
|
// ============================ Totals & printing ===================================
|
|
|
const size_t bytesExcit = sum_bytes(excit);
|
|
|
const size_t bytesProp = sum_bytes(prop);
|
|
|
const size_t bytesState = sum_bytes(state);
|
|
|
const size_t bytesNeigh = sum_bytes(neighs);
|
|
|
|
|
|
const double factor = usageSecurityThresholdFactor; // e.g., 1.05
|
|
|
const double gExcit = GB(bytesExcit) * factor;
|
|
|
const double gProp = GB(bytesProp ) * factor;
|
|
|
const double gState = GB(bytesState) * factor;
|
|
|
const double gNeigh = GB(bytesNeigh) * factor;
|
|
|
const double gTotal = gExcit + gProp + gState + gNeigh;
|
|
|
|
|
|
size_t free_cudamem=0, total_cudamem=0;
|
|
|
CUDA_SAFE_CALL(cudaMemGetInfo(&free_cudamem, &total_cudamem));
|
|
|
|
|
|
auto print_rows = [](const char* category, std::vector<MemItem> v, bool sort_by_size = true)
|
|
|
{
|
|
|
if (sort_by_size) {
|
|
|
std::sort(v.begin(), v.end(),
|
|
|
[](const MemItem& a, const MemItem& b){ return a.bytes > b.bytes; });
|
|
|
}
|
|
|
for (auto& it: v) if (it.bytes) {
|
|
|
std::cout << std::left << std::setw(16) << category
|
|
|
<< std::setw(36) << it.label
|
|
|
<< std::right << std::setw(12) << std::fixed << std::setprecision(6)
|
|
|
<< (double(it.bytes)/1e9) << '\n';
|
|
|
}
|
|
|
};
|
|
|
|
|
|
std::cout << "============================================================================================\n";
|
|
|
std::cout << std::left << std::setw(16) << "Category"
|
|
|
<< std::setw(36) << "Buffer"
|
|
|
<< std::right << std::setw(12) << "Size [GB]" << '\n';
|
|
|
std::cout << "--------------------------------------------------------------------------------------------\n";
|
|
|
|
|
|
print_rows("Excitation", excit);
|
|
|
print_rows("Propagation", prop);
|
|
|
print_rows("Fields/State",state);
|
|
|
print_rows("Neighbors", neighs);
|
|
|
|
|
|
std::cout << "--------------------------------------------------------------------------------------------\n";
|
|
|
std::cout << std::left << std::setw(16) << "TOTALS"
|
|
|
<< std::setw(36) << "Excitation"
|
|
|
<< std::right << std::setw(12) << std::fixed << std::setprecision(6) << gExcit << '\n';
|
|
|
std::cout << std::left << std::setw(16) << "TOTALS"
|
|
|
<< std::setw(36) << "Propagation"
|
|
|
<< std::right << std::setw(12) << std::fixed << std::setprecision(6) << gProp << '\n';
|
|
|
std::cout << std::left << std::setw(16) << "TOTALS"
|
|
|
<< std::setw(36) << "Fields/State"
|
|
|
<< std::right << std::setw(12) << std::fixed << std::setprecision(6) << gState << '\n';
|
|
|
std::cout << std::left << std::setw(16) << "TOTALS"
|
|
|
<< std::setw(36) << "Neighbors"
|
|
|
<< std::right << std::setw(12) << std::fixed << std::setprecision(6) << gNeigh << '\n';
|
|
|
std::cout << std::left << std::setw(16) << "TOTAL (est.)"
|
|
|
<< std::setw(36) << ""
|
|
|
<< std::right << std::setw(12) << std::fixed << std::setprecision(6) << gTotal << '\n';
|
|
|
|
|
|
std::cout << "--------------------------------------------------------------------------------------------\n";
|
|
|
std::cout << "GPU Memory Free / Total [GB]: "
|
|
|
<< std::fixed << std::setprecision(2)
|
|
|
<< double(free_cudamem)/1e9 << " / " << double(total_cudamem)/1e9 << '\n';
|
|
|
std::cout << "============================================================================================\n";
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
// Copy to GPU Memory
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
// ---- Excitation maps & counts -------------------------------------------------
|
|
|
CUDA_SAFE_MALLOC(mapE_d, BYTES(int8_t, exciCNT_total * mapElemsPerExci));
|
|
|
CUDA_SAFE_MALLOC(mapH_d, BYTES(int8_t, exciCNT_total * mapElemsPerExci));
|
|
|
CUDA_SAFE_MALLOC(ExcitationFacesCnt_d, BYTES(int, exciCNT_total));
|
|
|
CUDA_SAFE_MALLOC(ExcitationFacesOffset_d, BYTES(int, exciCNT_total));
|
|
|
CUDA_SAFE_MALLOC(ExcitationFacesNum_d, BYTES(int, excitationFaces));
|
|
|
CUDA_SAFE_MALLOC(nd_coords_tet_d, BYTES(fp_t_ts, tetNdElems));
|
|
|
CUDA_SAFE_MALLOC(nd_coords_face_d, BYTES(fp_t_ts, faceNdElems));
|
|
|
if (PlaneWaveBCFlag)
|
|
|
{
|
|
|
CUDA_SAFE_MALLOC(Z_face_pw_d, BYTES(fp_t_ts, excitationFaces));
|
|
|
}
|
|
|
|
|
|
// --- Allocate precomputed tangential fields (only port faces) ---
|
|
|
if (portCNT > 0)
|
|
|
{
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&Etan_qp_d, excitationFaces * Q * 3 * sizeof(fp_t_ts)));
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&Htan_qp_d, excitationFaces * Q * 3 * sizeof(fp_t_ts)));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&PortFacePidx_d, excitationFaces * sizeof(int)));
|
|
|
const int nPorts = (int)portExcitations.size();
|
|
|
CUDA_SAFE_CALL(cudaMalloc((void**)&ExcitationProps_d, nPorts * sizeof(ExcitationProp)));
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
CUDA_SAFE_COPY(mapE_d, mapE_h, BYTES(int8_t, exciCNT_total * mapElemsPerExci));
|
|
|
CUDA_SAFE_COPY(mapH_d, mapH_h, BYTES(int8_t, exciCNT_total * mapElemsPerExci));
|
|
|
CUDA_SAFE_COPY(ExcitationFacesCnt_d, ExcitationFacesCnt_h, BYTES(int, exciCNT_total));
|
|
|
CUDA_SAFE_COPY(ExcitationFacesOffset_d, ExcitationFacesOffset_h, BYTES(int, exciCNT_total));
|
|
|
CUDA_SAFE_COPY(ExcitationFacesNum_d, ExcitationFacesNum_h, BYTES(int, excitationFaces));
|
|
|
CUDA_SAFE_COPY(nd_coords_tet_d, nd_coords_tet_h, BYTES(fp_t_ts, tetNdElems));
|
|
|
CUDA_SAFE_COPY(nd_coords_face_d, nd_coords_face_h, BYTES(fp_t_ts, faceNdElems));
|
|
|
|
|
|
if (PlaneWaveBCFlag)
|
|
|
{
|
|
|
CUDA_SAFE_COPY(Z_face_pw_d, Z_face_pw_h, BYTES(fp_t_ts, excitationFaces));
|
|
|
}
|
|
|
|
|
|
// --- copy precomputed tangential fields (only port faces) ---
|
|
|
if (portCNT > 0)
|
|
|
{
|
|
|
cout << "Export Etan and Htan" << endl;
|
|
|
CUDA_SAFE_CALL(cudaMemset(Etan_qp_d, 0.0, BYTES(fp_t_ts, excitationFaces * Q * 3)));
|
|
|
CUDA_SAFE_CALL(cudaMemset(Htan_qp_d, 0.0, BYTES(fp_t_ts, excitationFaces * Q * 3)));
|
|
|
CUDA_SAFE_COPY(Etan_qp_d, Etan_qp_h, BYTES(fp_t_ts, excitationFaces * Q * 3));
|
|
|
CUDA_SAFE_COPY(Htan_qp_d, Htan_qp_h, BYTES(fp_t_ts, excitationFaces * Q * 3));
|
|
|
CUDA_SAFE_COPY(PortFacePidx_d, PortFacePidx_h, BYTES(int, excitationFaces));
|
|
|
const int nPorts = (int)portExcitations.size();
|
|
|
CUDA_SAFE_COPY(ExcitationProps_d, portExcitations.data(), nPorts * sizeof(ExcitationProp));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
// ---- Irregular (non-PML) -----------------------------------------------------
|
|
|
CUDA_SAFE_MALLOC(Loc1E_d, BYTES(fp_t_ts, irregularTetras_total * localElems));
|
|
|
CUDA_SAFE_MALLOC(Loc2E_d, BYTES(fp_t_ts, irregularTetras_total * localElems));
|
|
|
CUDA_SAFE_MALLOC(Loc1H_d, BYTES(fp_t_ts, irregularTetras_total * localElems));
|
|
|
CUDA_SAFE_MALLOC(Loc2H_d, BYTES(fp_t_ts, irregularTetras_total * localElems));
|
|
|
CUDA_SAFE_MALLOC(Neigh1E_d, BYTES(fp_t_ts, irregularNeighbours_total * neighElems));
|
|
|
CUDA_SAFE_MALLOC(Neigh2E_d, BYTES(fp_t_ts, irregularNeighbours_total * neighElems));
|
|
|
CUDA_SAFE_MALLOC(Neigh1H_d, BYTES(fp_t_ts, irregularNeighbours_total * neighElems));
|
|
|
CUDA_SAFE_MALLOC(Neigh2H_d, BYTES(fp_t_ts, irregularNeighbours_total * neighElems));
|
|
|
|
|
|
// Inverses only for excitations
|
|
|
CUDA_SAFE_MALLOC(InvE_d, BYTES(fp_t_ts, exciCNT_total * localElems));
|
|
|
CUDA_SAFE_MALLOC(InvH_d, BYTES(fp_t_ts, exciCNT_total * localElems));
|
|
|
|
|
|
|
|
|
// Irregular (non-PML)
|
|
|
CUDA_SAFE_COPY(Loc1E_d, Loc1E_h, BYTES(fp_t_ts, irregularTetras_total * localElems));
|
|
|
CUDA_SAFE_COPY(Loc2E_d, Loc2E_h, BYTES(fp_t_ts, irregularTetras_total * localElems));
|
|
|
CUDA_SAFE_COPY(Loc1H_d, Loc1H_h, BYTES(fp_t_ts, irregularTetras_total * localElems));
|
|
|
CUDA_SAFE_COPY(Loc2H_d, Loc2H_h, BYTES(fp_t_ts, irregularTetras_total * localElems));
|
|
|
CUDA_SAFE_COPY(Neigh1E_d, Neigh1E_h, BYTES(fp_t_ts, irregularNeighbours_total * neighElems));
|
|
|
CUDA_SAFE_COPY(Neigh2E_d, Neigh2E_h, BYTES(fp_t_ts, irregularNeighbours_total * neighElems));
|
|
|
CUDA_SAFE_COPY(Neigh1H_d, Neigh1H_h, BYTES(fp_t_ts, irregularNeighbours_total * neighElems));
|
|
|
CUDA_SAFE_COPY(Neigh2H_d, Neigh2H_h, BYTES(fp_t_ts, irregularNeighbours_total * neighElems));
|
|
|
CUDA_SAFE_COPY(InvE_d, InvE_h, BYTES(fp_t_ts, exciCNT_total * localElems));
|
|
|
CUDA_SAFE_COPY(InvH_d, InvH_h, BYTES(fp_t_ts, exciCNT_total * localElems));
|
|
|
|
|
|
|
|
|
|
|
|
// ---- Regular (prototype per group) -------------------------------------------
|
|
|
// Use exact counts — NOT (regularCNT - 1) or "*4"
|
|
|
if (regularRegionFlag)
|
|
|
{
|
|
|
|
|
|
if (regNormGroups > 0)
|
|
|
{
|
|
|
CUDA_SAFE_MALLOC(regularLoc1E_d, BYTES(fp_t_ts, static_cast<size_t>(regNormGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularLoc2E_d, BYTES(fp_t_ts, static_cast<size_t>(regNormGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularLoc1H_d, BYTES(fp_t_ts, static_cast<size_t>(regNormGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularLoc2H_d, BYTES(fp_t_ts, static_cast<size_t>(regNormGroups) * localElems));
|
|
|
|
|
|
CUDA_SAFE_MALLOC(regularNeigh1E_d, BYTES(fp_t_ts, static_cast<size_t>(regNormFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(regularNeigh2E_d, BYTES(fp_t_ts, static_cast<size_t>(regNormFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(regularNeigh1H_d, BYTES(fp_t_ts, static_cast<size_t>(regNormFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(regularNeigh2H_d, BYTES(fp_t_ts, static_cast<size_t>(regNormFacesTotal) * neighElems));
|
|
|
|
|
|
CUDA_SAFE_COPY(regularLoc1E_d, regularLoc1E_h, BYTES(fp_t_ts, static_cast<size_t>(regNormGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularLoc2E_d, regularLoc2E_h, BYTES(fp_t_ts, static_cast<size_t>(regNormGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularLoc1H_d, regularLoc1H_h, BYTES(fp_t_ts, static_cast<size_t>(regNormGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularLoc2H_d, regularLoc2H_h, BYTES(fp_t_ts, static_cast<size_t>(regNormGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularNeigh1E_d, regularNeigh1E_h, BYTES(fp_t_ts, static_cast<size_t>(regNormFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_COPY(regularNeigh2E_d, regularNeigh2E_h, BYTES(fp_t_ts, static_cast<size_t>(regNormFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_COPY(regularNeigh1H_d, regularNeigh1H_h, BYTES(fp_t_ts, static_cast<size_t>(regNormFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_COPY(regularNeigh2H_d, regularNeigh2H_h, BYTES(fp_t_ts, static_cast<size_t>(regNormFacesTotal) * neighElems));
|
|
|
|
|
|
}
|
|
|
|
|
|
if (regPMLGroups > 0)
|
|
|
{
|
|
|
// PML-regular
|
|
|
CUDA_SAFE_MALLOC(regularPMLLoc1E_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLLoc2E_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLLoc1H_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLLoc2H_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
|
|
|
CUDA_SAFE_MALLOC(regularPMLNeigh1E_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLNeigh2E_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLNeigh1H_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLNeigh2H_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLFacesTotal) * neighElems));
|
|
|
|
|
|
// PML auxiliaries for regular-PML prototypes (if used)
|
|
|
CUDA_SAFE_MALLOC(regularPMLAuxE_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLAuxH_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLLoc1M_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLLoc2M_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLLoc1J_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_MALLOC(regularPMLLoc2J_d, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
|
|
|
// PML-regular
|
|
|
CUDA_SAFE_COPY(regularPMLLoc1E_d, regularPMLLoc1E_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLLoc2E_d, regularPMLLoc2E_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLLoc1H_d, regularPMLLoc1H_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLLoc2H_d, regularPMLLoc2H_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLNeigh1E_d, regularPMLNeigh1E_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_COPY(regularPMLNeigh2E_d, regularPMLNeigh2E_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_COPY(regularPMLNeigh1H_d, regularPMLNeigh1H_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLFacesTotal) * neighElems));
|
|
|
CUDA_SAFE_COPY(regularPMLNeigh2H_d, regularPMLNeigh2H_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLFacesTotal) * neighElems));
|
|
|
|
|
|
CUDA_SAFE_COPY(regularPMLAuxE_d, regularPMLAuxE_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLAuxH_d, regularPMLAuxH_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLLoc1M_d, regularPMLLoc1M_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLLoc2M_d, regularPMLLoc2M_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLLoc1J_d, regularPMLLoc1J_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
CUDA_SAFE_COPY(regularPMLLoc2J_d, regularPMLLoc2J_h, BYTES(fp_t_ts, static_cast<size_t>(regPMLGroups) * localElems));
|
|
|
|
|
|
CUDA_SAFE_MALLOC(r_Mn_d, BYTES(fp_t_ts, static_cast<size_t>(numRegPMLTetras) * localElems));
|
|
|
CUDA_SAFE_MALLOC(r_Mn1_d, BYTES(fp_t_ts, static_cast<size_t>(numRegPMLTetras) * localElems));
|
|
|
CUDA_SAFE_MALLOC(r_Jn12_d, BYTES(fp_t_ts, static_cast<size_t>(numRegPMLTetras) * localElems));
|
|
|
CUDA_SAFE_MALLOC(r_Jn32_d, BYTES(fp_t_ts, static_cast<size_t>(numRegPMLTetras) * localElems));
|
|
|
|
|
|
CUDA_SAFE_ZERO(r_Mn_d, BYTES(fp_t_ts, static_cast<size_t>(numRegPMLTetras) * localElems));
|
|
|
CUDA_SAFE_ZERO(r_Mn1_d, BYTES(fp_t_ts, static_cast<size_t>(numRegPMLTetras) * localElems));
|
|
|
CUDA_SAFE_ZERO(r_Jn12_d, BYTES(fp_t_ts, static_cast<size_t>(numRegPMLTetras) * localElems));
|
|
|
CUDA_SAFE_ZERO(r_Jn32_d, BYTES(fp_t_ts, static_cast<size_t>(numRegPMLTetras) * localElems));
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// ---- Irregular PML (per element) ---------------------------------------------
|
|
|
cout << "Non regular PMLTetras_total = " << PMLTetras_total << endl;
|
|
|
if (PMLTetras_total > 0)
|
|
|
{
|
|
|
CUDA_SAFE_MALLOC(Loc1E_PML_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(Loc2E_PML_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(Loc1H_PML_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(Loc2H_PML_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
|
|
|
CUDA_SAFE_MALLOC(Neigh1E_PML_d, BYTES(fp_t_ts, static_cast<size_t>(PMLNeighbours_total) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(Neigh2E_PML_d, BYTES(fp_t_ts, static_cast<size_t>(PMLNeighbours_total) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(Neigh1H_PML_d, BYTES(fp_t_ts, static_cast<size_t>(PMLNeighbours_total) * neighElems));
|
|
|
CUDA_SAFE_MALLOC(Neigh2H_PML_d, BYTES(fp_t_ts, static_cast<size_t>(PMLNeighbours_total) * neighElems));
|
|
|
|
|
|
CUDA_SAFE_MALLOC(AuxE_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(AuxH_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(AuxM1_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(AuxJ1_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(AuxM2_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(AuxJ2_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
|
|
|
|
|
|
CUDA_SAFE_COPY(Loc1E_PML_d, Loc1E_PML_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_COPY(Loc2E_PML_d, Loc2E_PML_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_COPY(Loc1H_PML_d, Loc1H_PML_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_COPY(Loc2H_PML_d, Loc2H_PML_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
|
|
|
CUDA_SAFE_COPY(Neigh1E_PML_d, Neigh1E_PML_h, BYTES(fp_t_ts, static_cast<size_t>(PMLNeighbours_total) * neighElems));
|
|
|
CUDA_SAFE_COPY(Neigh2E_PML_d, Neigh2E_PML_h, BYTES(fp_t_ts, static_cast<size_t>(PMLNeighbours_total) * neighElems));
|
|
|
CUDA_SAFE_COPY(Neigh1H_PML_d, Neigh1H_PML_h, BYTES(fp_t_ts, static_cast<size_t>(PMLNeighbours_total) * neighElems));
|
|
|
CUDA_SAFE_COPY(Neigh2H_PML_d, Neigh2H_PML_h, BYTES(fp_t_ts, static_cast<size_t>(PMLNeighbours_total) * neighElems));
|
|
|
|
|
|
CUDA_SAFE_COPY(AuxE_d, AuxE_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_COPY(AuxH_d, AuxH_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_COPY(AuxM1_d, AuxM1_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_COPY(AuxJ1_d, AuxJ1_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_COPY(AuxM2_d, AuxM2_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_COPY(AuxJ2_d, AuxJ2_h, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
|
|
|
CUDA_SAFE_MALLOC(Mn_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(Mn1_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(Jn12_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_MALLOC(Jn32_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
|
|
|
CUDA_SAFE_ZERO(Mn_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_ZERO(Mn1_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_ZERO(Jn12_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
CUDA_SAFE_ZERO(Jn32_d, BYTES(fp_t_ts, static_cast<size_t>(PMLTetras_total) * localElems));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
// ---- Global field buffers -----------------------------------------------------
|
|
|
CUDA_SAFE_MALLOC(En_d, BYTES(fp_t_ts, sizeFieldElems));
|
|
|
CUDA_SAFE_MALLOC(En1_d, BYTES(fp_t_ts, sizeFieldElems));
|
|
|
CUDA_SAFE_MALLOC(Hn12_d, BYTES(fp_t_ts, sizeFieldElems));
|
|
|
CUDA_SAFE_MALLOC(Hn32_d, BYTES(fp_t_ts, sizeFieldElems));
|
|
|
|
|
|
// Fields zero init
|
|
|
CUDA_SAFE_ZERO(En_d, BYTES(fp_t_ts, sizeFieldElems));
|
|
|
CUDA_SAFE_ZERO(En1_d, BYTES(fp_t_ts, sizeFieldElems));
|
|
|
CUDA_SAFE_ZERO(Hn12_d, BYTES(fp_t_ts, sizeFieldElems));
|
|
|
CUDA_SAFE_ZERO(Hn32_d, BYTES(fp_t_ts, sizeFieldElems));
|
|
|
|
|
|
|
|
|
// ---- Neighbor maps ------------------------------------------------------------
|
|
|
CUDA_SAFE_MALLOC(NeighMap_d, BYTES(int, neighMapElems));
|
|
|
CUDA_SAFE_MALLOC(Neighbours_d, BYTES(int, neighboursElems));
|
|
|
CUDA_SAFE_MALLOC(NeighboursOffset_d, BYTES(int, neighboursElems));
|
|
|
CUDA_SAFE_MALLOC(auxFieldInput, BYTES(fp_t_ts, auxInElems));
|
|
|
CUDA_SAFE_MALLOC(auxFieldOutput, BYTES(fp_t_ts, auxOutElems));
|
|
|
|
|
|
// Neighbor structures
|
|
|
CUDA_SAFE_COPY(NeighMap_d, NeighMap_h, BYTES(int, neighMapElems));
|
|
|
CUDA_SAFE_COPY(Neighbours_d, Neighbours_h, BYTES(int, neighboursElems));
|
|
|
CUDA_SAFE_COPY(NeighboursOffset_d, NeighboursOffset_h, BYTES(int, neighboursElems));
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::TimeSteppingCuBLAS()
|
|
|
{
|
|
|
fp_t InitTime = 0.0;
|
|
|
|
|
|
fp_t Frequency = freq;
|
|
|
fp_t dt_nyquist = 1.0 / (2.0 * Frequency * MEGA);
|
|
|
fp_t dt_sample = (1 / SamplingRate) * dt_nyquist;
|
|
|
tsPerSampling = (int)ceil(dt_sample / LocTimeSteps[N_class - 1]);
|
|
|
dt_sample = tsPerSampling * LocTimeSteps[N_class - 1];
|
|
|
|
|
|
if(FinalTime > 0)
|
|
|
NtimeSteps = (int)ceil((FinalTime - InitTime) / LocTimeSteps[N_class -1]); // number of time steps for the biggest time step size
|
|
|
else
|
|
|
NtimeSteps = 0;
|
|
|
|
|
|
if(usePade){
|
|
|
fp_t earlyTime = 10 * Length(maxPoint - minPoint) / Vo;
|
|
|
/*7.5 (for saftey use 10) is empirical because in "Early Time Behavior in Reverberation Chambers and
|
|
|
Its Effect on the Relationships Between Coherence
|
|
|
Bandwidth, Chamber Decay Time, RMS Delay
|
|
|
Spread, and the Chamber Buildup Time", Christopher L. Holloway et al.
|
|
|
the value of 3/2 is suggested from equation 30 */
|
|
|
tsPerPade = (int)ceil(earlyTime / LocTimeSteps[N_class -1]);
|
|
|
tsPerPade = tsPerPade + tsPerSampling - tsPerPade % tsPerSampling;
|
|
|
|
|
|
fieldProbes = new fp_t_ts[probeCNT * (int)ceil((1.0 * NtimeSteps) / tsPerSampling) * NumOfFieldComponents];
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMallocHost((void**)&tranferencePadeFunctionFD_h, padeCNT * (int)ceil((1.0 * NtimeSteps) / tsPerSampling) * NumOfFieldComponents * sizeof(cuDoubleComplex), cudaHostAllocMapped));
|
|
|
|
|
|
getPadeFreq((int)ceil((1.0 * NtimeSteps) / tsPerSampling), tsPerSampling);
|
|
|
}
|
|
|
|
|
|
Write_TD_Data(tsPerSampling, NtimeSteps);
|
|
|
|
|
|
//Output precision set to 15 digits
|
|
|
cout.precision(15);
|
|
|
|
|
|
//Print out data used in the computation
|
|
|
cout << endl;
|
|
|
cout << "=============================================" << endl;
|
|
|
cout << "== Running CUDA Implementation (Non-Heavy) ==" << endl;
|
|
|
cout << "=============================================" << endl;
|
|
|
cout << endl;
|
|
|
cout << "==========================================" << endl;
|
|
|
cout << " PERFORMING INFORMATION " << endl;
|
|
|
cout << "==========================================" << endl;
|
|
|
|
|
|
if(FinalTime > 0)
|
|
|
cout << " Final Time(sec) = " << FinalTime << endl;
|
|
|
else
|
|
|
cout << " Final Time = " << "TBD" << endl;
|
|
|
|
|
|
cout << " Time Step, dt(sec) = " << LocTimeSteps[N_class -1] << endl;
|
|
|
cout << " Number of Tetrahedra = " << tetraCNT << endl;
|
|
|
cout << " Number of Classes = " << N_class << endl;
|
|
|
|
|
|
if(FinalTime > 0)
|
|
|
cout << " Number of Time Steps = " << NtimeSteps << endl;
|
|
|
|
|
|
for(int i = 0; i < N_class ; i++){
|
|
|
cout << " LocTimeSteps[" << i << "] = " << LocTimeSteps[i] << endl;
|
|
|
}
|
|
|
|
|
|
cout << endl;
|
|
|
cout << " dt_nyquist = " << dt_nyquist << endl;
|
|
|
cout << " dt_sample = " << dt_sample << endl;
|
|
|
cout << " tsPerSampling = " << tsPerSampling << endl;
|
|
|
|
|
|
if(FinalTime > 0)
|
|
|
cout << " Number of samplings = " << (int)ceil((1.0 * NtimeSteps) / tsPerSampling) << endl;
|
|
|
|
|
|
if(usePade){
|
|
|
cout << " Time Steps / Pade Calc = " << tsPerPade << endl;
|
|
|
}
|
|
|
cout << "==========================================" << endl;
|
|
|
cout << endl;
|
|
|
|
|
|
//Memory status
|
|
|
SYSTEM_MEM_USAGE();
|
|
|
cout << endl;
|
|
|
|
|
|
cout << " " << endl;
|
|
|
cout << "===================================================" << endl;
|
|
|
cout << " Local Time-Stepping Loop " << endl;
|
|
|
cout << "===================================================" << endl;
|
|
|
|
|
|
// Variables for time tracking
|
|
|
size_t total_time = 0;
|
|
|
fp_t current_time = 0;
|
|
|
bool exitBool = false;
|
|
|
current_time -= (double)dt_sample * 1e9;
|
|
|
|
|
|
if(FinalTime <= 0){
|
|
|
NtimeSteps = NumOfSampleEnergyCheck * tsPerSampling + 1;
|
|
|
fieldEnergy = 0;
|
|
|
maxFieldEnergy = 0;
|
|
|
if(numberOfEnergyPoints == 0){
|
|
|
numberOfEnergyPoints = probeCNT;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
cublasHandle_t handle;
|
|
|
cublasCreate(&handle);
|
|
|
|
|
|
timer_start("Time Stepping", ' ');
|
|
|
timer_start("Start Time Stepping", 'm');
|
|
|
for(int n = 0; n < NtimeSteps; n++){
|
|
|
ComputeE_cuBLAS(handle, N_class - 1);
|
|
|
ComputeH_cuBLAS(handle, N_class - 1);
|
|
|
|
|
|
|
|
|
if(n % tsPerSampling == 0)
|
|
|
{
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(En1_h, En1_d, tetraCNT * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToHost));
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(Hn32_h, Hn32_d, tetraCNT * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToHost));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
total_time += timer_stop('m');
|
|
|
if(write_probes && probeCNT > 0)
|
|
|
{
|
|
|
writeFieldProbeCuBLAS(n);
|
|
|
if(write_AnalyticalIncidentProbes){
|
|
|
writeAnalyticalIncidentPWProbes(n);
|
|
|
}
|
|
|
|
|
|
if(n != 0 && usePade && n % tsPerPade == 0)
|
|
|
{
|
|
|
if(padeTime < 0.0){
|
|
|
exitBool = calculatePadeCUDA(n, n / tsPerPade == 1, false);
|
|
|
}else if(n * LocTimeSteps[N_class - 1] > padeTime * 1e-9){
|
|
|
exitBool = true;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if(write_fields){
|
|
|
writeFieldGlobalCuBLAS(n);
|
|
|
}
|
|
|
|
|
|
// Modified by Qi Jian to write surface currents
|
|
|
if(WriteSurfFlag)
|
|
|
{
|
|
|
writeCurrentsOutputSurfMesh_CuBLAS(n);
|
|
|
}
|
|
|
|
|
|
// Writing the fields on the port surfaces
|
|
|
if (PortBCFlag)
|
|
|
{
|
|
|
writePortFieldProbeCuBLAS(n);
|
|
|
}
|
|
|
|
|
|
|
|
|
fp_t_ts magAux = 0;
|
|
|
|
|
|
for(int i = 0; i < tetraCNT * TetPolyOrderDim[PolyFlag]; i++){
|
|
|
magAux += En1_h[i] * En1_h[i];
|
|
|
}
|
|
|
|
|
|
cout << "E field norm^2 " << magAux << endl;
|
|
|
|
|
|
current_time += (double)dt_sample * 1e9;
|
|
|
DEBUG_INFO(" Current Time : " + to_string(current_time) + "ns");
|
|
|
DEBUG_INFO(" Average iteration time : "+ to_string(((double)total_time / (double)(n + 1.0))) + " msec");
|
|
|
|
|
|
if(exitBool){
|
|
|
calculatePadeCUDA(n, false, true);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
if(FinalTime < 0 && n == NtimeSteps-1){
|
|
|
if(!checkEnergyDecay()){
|
|
|
NtimeSteps += NumOfSampleEnergyCheck * tsPerSampling;
|
|
|
cout << "Max Energy: " << maxFieldEnergy << " - Current Energy: " << fieldEnergy << " - Relation: " << fieldEnergy * 100 / maxFieldEnergy << "%" << endl;
|
|
|
fieldEnergy = 0.0;
|
|
|
}else{
|
|
|
Write_TD_Data(tsPerSampling, NtimeSteps);
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
cout << "---------------------------------------------------" << endl;
|
|
|
|
|
|
timer_start(to_string(tsPerSampling)+" steps", 'm');
|
|
|
}
|
|
|
}
|
|
|
if(!exitBool && padeCNT > 0 && !writeWhilePade){
|
|
|
writeFieldProbeAfterPade(tsPerSampling);
|
|
|
}
|
|
|
|
|
|
if(!exitBool && (NtimeSteps-1 % tsPerSampling != 0)){
|
|
|
timer_stop('m');
|
|
|
}
|
|
|
|
|
|
DEBUG_INFO(" Total iteration time: "+ to_string((double)total_time) + " msec");
|
|
|
timer_stop(' ');
|
|
|
}
|
|
|
|
|
|
//The recursivity in ComputeE and ComputeH is due to the LTS process
|
|
|
/**********************************************************************
|
|
|
Local Time-Stepping for CUDA Recursive
|
|
|
|
|
|
Explained in "Dissipative terms and local time-stepping improvements
|
|
|
in a spatial high order Discontinuous Galerkin scheme
|
|
|
for the time-domain Maxwell’s equations" by E. Montseny
|
|
|
**********************************************************************/
|
|
|
|
|
|
void FemGrp::ComputeE_cuBLAS(cublasHandle_t handle, int class_i){
|
|
|
if(class_i == 0){
|
|
|
LE_CuBLAS(handle, class_i);
|
|
|
}else{
|
|
|
LE_CuBLAS(handle, class_i);
|
|
|
ComputeE_cuBLAS(handle, class_i - 1);
|
|
|
ComputeH_cuBLAS(handle, class_i - 1);
|
|
|
ComputeE_cuBLAS(handle, class_i - 1);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void FemGrp::ComputeH_cuBLAS(cublasHandle_t handle, int class_i){
|
|
|
if(class_i == 0){
|
|
|
LH_CuBLAS(handle, class_i);
|
|
|
}else{
|
|
|
LH_CuBLAS(handle, class_i);
|
|
|
ComputeH_cuBLAS(handle, class_i - 1);
|
|
|
ComputeE_cuBLAS(handle, class_i - 1);
|
|
|
ComputeH_cuBLAS(handle, class_i - 1);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::LE_CuBLAS(cublasHandle_t handle, int class_i)
|
|
|
{
|
|
|
const int Q = GAUSS_POINT_NUM_h[PolyFlag]; // same as GPU kernel uses
|
|
|
|
|
|
int irregularTetras = nonRegularTetraCnt_h[class_i];
|
|
|
|
|
|
int classOffset = ClassTetraOffset[class_i];
|
|
|
int neighOffset = NeighClassOffset_h[class_i];
|
|
|
|
|
|
int blockSize = 256; //optimal number
|
|
|
int numBlocks;
|
|
|
|
|
|
if(irregularTetras > 0)
|
|
|
{
|
|
|
// Local Mattrices
|
|
|
int nMatrices = irregularTetras;
|
|
|
int matrixOffset = classTetraOffset_loc_h[class_i];
|
|
|
|
|
|
|
|
|
int m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
int n = TetPolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
long long int strideA = m * n;
|
|
|
long long int strideB = n;
|
|
|
long long int strideC = m;
|
|
|
|
|
|
float alpha = 1.0;
|
|
|
float beta = 0.0;
|
|
|
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Loc1E_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&En_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&En1_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Loc2E_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Hn12_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&En1_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
|
|
|
if(ClassExcitationCount[class_i] > 0)
|
|
|
{
|
|
|
nMatrices = ClassExcitationCount[class_i];
|
|
|
matrixOffset = ClassExcitationOffset[class_i];
|
|
|
|
|
|
//cout << "ClassExcitationCount[" << class_i << "] = " << ClassExcitationCount[class_i] << endl;
|
|
|
//cout << "ClassExcitationOffset[" << class_i << "] = " << ClassExcitationOffset[class_i] << endl;
|
|
|
//cout << "classOffset * strideC " << classOffset * strideC << endl;
|
|
|
|
|
|
numBlocks = (nMatrices + blockSize - 1) / blockSize;
|
|
|
|
|
|
fp_t_ts dt = LocTimeSteps[class_i];
|
|
|
fp_t_ts t = (LocalExciIndexE[class_i] + 0.5) * dt;
|
|
|
LocalExciIndexE[class_i]++;
|
|
|
|
|
|
|
|
|
if (PWorPort == 0)
|
|
|
{
|
|
|
if (interior_excitation_flag)
|
|
|
{
|
|
|
addExcitationE_PML<<<numBlocks, blockSize>>>(&ExcitationFacesCnt_d[matrixOffset],
|
|
|
&ExcitationFacesOffset_d[matrixOffset],
|
|
|
ExcitationFacesNum_d,
|
|
|
nMatrices,
|
|
|
ClassExcitation_sc_CNT[class_i],
|
|
|
&mapE_d[matrixOffset * strideC],
|
|
|
excitationProp,
|
|
|
PolyFlag,
|
|
|
dt /Eo, t,
|
|
|
&nd_coords_tet_d[matrixOffset * NumOfNodes * NumOfUnitaryVectors],
|
|
|
nd_coords_face_d,
|
|
|
Z_face_pw_d,
|
|
|
&InvE_d[matrixOffset * strideA],
|
|
|
&En1_d[classOffset * strideC]);
|
|
|
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
|
|
|
addExcitationE<<<numBlocks, blockSize>>>(&ExcitationFacesCnt_d[matrixOffset],
|
|
|
&ExcitationFacesOffset_d[matrixOffset],
|
|
|
ExcitationFacesNum_d,
|
|
|
nMatrices,
|
|
|
&mapE_d[matrixOffset * strideC],
|
|
|
excitationProp,
|
|
|
PolyFlag,
|
|
|
dt /Eo, t,
|
|
|
&nd_coords_tet_d[matrixOffset * NumOfNodes * NumOfUnitaryVectors],
|
|
|
nd_coords_face_d,
|
|
|
Z_face_pw_d,
|
|
|
&InvE_d[matrixOffset * strideA],
|
|
|
&En1_d[classOffset * strideC]);
|
|
|
}
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
addExcitationE_port<<<numBlocks, blockSize>>>( &ExcitationFacesCnt_d[matrixOffset],
|
|
|
&ExcitationFacesOffset_d[matrixOffset],
|
|
|
ExcitationFacesNum_d,
|
|
|
nMatrices,
|
|
|
&mapE_d[matrixOffset * strideC],
|
|
|
ExcitationProps_d,
|
|
|
PortFacePidx_d,
|
|
|
PolyFlag,
|
|
|
dt /Eo, t,
|
|
|
&nd_coords_tet_d[matrixOffset * NumOfNodes * NumOfUnitaryVectors],
|
|
|
nd_coords_face_d,
|
|
|
&InvE_d[matrixOffset * strideA],
|
|
|
&En1_d[classOffset * strideC]);
|
|
|
|
|
|
//cout << "\n\n\n\n\n";
|
|
|
}
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
// Coupling Matrices
|
|
|
nMatrices = classNeighIrregular_h[class_i];
|
|
|
matrixOffset = classNeighOffset_loc_h[class_i];
|
|
|
|
|
|
m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
n = FacePolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
strideA = m * n;
|
|
|
strideB = n;
|
|
|
strideC = m;
|
|
|
|
|
|
numBlocks = (nMatrices * n + blockSize - 1) / blockSize;
|
|
|
makeNeighField<<<numBlocks, blockSize>>>(&NeighMap_d[neighOffset * n], Hn12_d, auxFieldInput, nMatrices * n); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Neigh1E_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
makeNeighField<<<numBlocks, blockSize>>>(&NeighMap_d[neighOffset * n], En_d, auxFieldInput, nMatrices * n); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Neigh2E_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
|
|
|
int blockY = (blockSize + TetPolyOrderDim[PolyFlag] - 1) / TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
dim3 blockDim(TetPolyOrderDim[PolyFlag], blockY, 1);
|
|
|
numBlocks = (irregularTetras + blockY - 1) / blockY;
|
|
|
|
|
|
addCouplingResults<<<numBlocks, blockDim>>>(&En1_d[classOffset * TetPolyOrderDim[PolyFlag]], auxFieldOutput, &Neighbours_d[classOffset], &NeighboursOffset_d[classOffset], irregularTetras); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// --------------------------------------------------------------------------------------------------
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
if(regularRegionFlag && classRegularGroupsCnt_h[class_i] > 0)
|
|
|
{
|
|
|
for(int i = 0; i < classRegularGroupsCnt_h[class_i]; i++)
|
|
|
{
|
|
|
int groupID = classRegularGroupsId_h[class_i][i];
|
|
|
int groupElements = classRegularTetraCnt_h[class_i * regularCNT + groupID];
|
|
|
int groupOffset = classOffset + nonRegularTetraCnt_h[class_i] + classRegularTetraOffset_h[class_i][i];
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
// Local Matrices
|
|
|
int m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
int n = TetPolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
float alpha = 1.0;
|
|
|
float beta = 0.0;
|
|
|
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularLoc1E_d[(groupID - 1) * m * n], m,
|
|
|
&En_d[groupOffset * n], n,
|
|
|
&beta,
|
|
|
&En1_d[groupOffset * m], m);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularLoc2E_d[(groupID - 1) * m * n], m,
|
|
|
&Hn12_d[groupOffset * n], n,
|
|
|
&beta,
|
|
|
&En1_d[groupOffset * m], m);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
// Coupling Matrices
|
|
|
int regularNeighOffset = neighOffset + classNeighIrregular_h[class_i] + classRegularTetraOffset_h[class_i][i] * NumOfFaces;
|
|
|
|
|
|
m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
n = FacePolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
numBlocks = (groupElements * n * NumOfFaces + blockSize - 1) / blockSize;
|
|
|
|
|
|
makeNeighFieldRegular<<<numBlocks, blockSize>>>(&NeighMap_d[regularNeighOffset * n], Hn12_d, auxFieldInput, groupElements, PolyFlag);
|
|
|
|
|
|
long long int strideA = m * n;
|
|
|
long long int strideB = n * groupElements;
|
|
|
long long int strideC = m * groupElements;
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularNeigh1E_d[(groupID - 1) * NumOfFaces * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
NumOfFaces);
|
|
|
|
|
|
makeNeighFieldRegular<<<numBlocks, blockSize>>>(&NeighMap_d[regularNeighOffset * n], En_d, auxFieldInput, groupElements, PolyFlag);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularNeigh2E_d[(groupID - 1) * NumOfFaces * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
NumOfFaces);
|
|
|
|
|
|
int blockY = (blockSize + TetPolyOrderDim[PolyFlag] - 1) / TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
dim3 blockDim(TetPolyOrderDim[PolyFlag], blockY, 1);
|
|
|
numBlocks = (groupElements + blockY - 1) / blockY;
|
|
|
|
|
|
addCouplingResultsRegular<<<numBlocks, blockDim>>>(&En1_d[groupOffset * TetPolyOrderDim[PolyFlag]], auxFieldOutput, groupElements);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
// --------------------------------------------
|
|
|
// PML Section
|
|
|
int PMLTetras = nonRegularPMLTetraCnt_h[class_i];
|
|
|
|
|
|
classOffset = classPMLTetraOffset_h[class_i];
|
|
|
neighOffset = classNeighPMLOffset_h[class_i];
|
|
|
|
|
|
if(PMLTetras > 0)
|
|
|
{
|
|
|
// Local Mattrices
|
|
|
int nMatrices = PMLTetras;
|
|
|
int matrixOffset = classPMLTetraOffset_loc_h[class_i];
|
|
|
|
|
|
// cout << "classPMLTetraOffset_loc_h[class_i] : " << classPMLTetraOffset_loc_h[class_i] << endl;
|
|
|
|
|
|
int m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
int n = TetPolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
long long int strideA = m * n;
|
|
|
long long int strideB = n;
|
|
|
long long int strideC = m;
|
|
|
|
|
|
float alpha = 1.0;
|
|
|
float beta = 0.0;
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
// --------------------------------------------------------
|
|
|
// Auxilliary J
|
|
|
|
|
|
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&AuxJ1_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Jn12_d[matrixOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Jn32_d[matrixOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&AuxJ2_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&En_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Jn32_d[matrixOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
// --------------------------------------------------------
|
|
|
|
|
|
alpha = 1.0;
|
|
|
beta = 0.0;
|
|
|
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Loc1E_PML_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&En_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&En1_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Loc2E_PML_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Hn12_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&En1_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
// Add Auxilliary J term
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&AuxE_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Jn32_d[matrixOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&En1_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
// Coupling Matrices
|
|
|
nMatrices = classNeighPML_h[class_i];
|
|
|
matrixOffset = classNeighPMLOffset_loc_h[class_i];
|
|
|
|
|
|
m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
n = FacePolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
strideA = m * n;
|
|
|
strideB = n;
|
|
|
strideC = m;
|
|
|
|
|
|
numBlocks = (nMatrices * n + blockSize - 1) / blockSize;
|
|
|
makeNeighField<<<numBlocks, blockSize>>>(&NeighMap_d[neighOffset * n], Hn12_d, auxFieldInput, nMatrices * n); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Neigh1E_PML_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
makeNeighField<<<numBlocks, blockSize>>>(&NeighMap_d[neighOffset * n], En_d, auxFieldInput, nMatrices * n); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Neigh2E_PML_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
int blockY = (blockSize + TetPolyOrderDim[PolyFlag] - 1) / TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
dim3 blockDim(TetPolyOrderDim[PolyFlag], blockY, 1);
|
|
|
numBlocks = (PMLTetras + blockY - 1) / blockY;
|
|
|
|
|
|
//Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
addCouplingResults<<<numBlocks, blockDim>>>(&En1_d[classPMLTetraOffset_h[class_i] * TetPolyOrderDim[PolyFlag]], auxFieldOutput, &Neighbours_d[classOffset], &NeighboursOffset_d[classOffset], PMLTetras);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(regularRegionFlag && classRegularPMLGroupsCnt_h[class_i] > 0)
|
|
|
{
|
|
|
for(int i = 0; i < classRegularPMLGroupsCnt_h[class_i]; i++)
|
|
|
{
|
|
|
int groupID = classRegularPMLGroupsId_h[class_i][i];
|
|
|
int groupElements = classRegularTetraCnt_h[class_i * regularCNT + groupID];
|
|
|
int groupOffset = classOffset + nonRegularPMLTetraCnt_h[class_i] + classRegularPMLTetraOffset_h[class_i][i];
|
|
|
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
int local_index = groupID - 1 - regularCNT_Normal;
|
|
|
int aux_offset = classRegularPMLTetraOffset_h[class_i][i];
|
|
|
|
|
|
// Local Matrices
|
|
|
int m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
int n = TetPolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
|
|
|
// --------------------------------------------------------
|
|
|
// Auxilliary J
|
|
|
float alpha = 1.0;
|
|
|
float beta = 0.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLLoc1J_d[(local_index) * m * n], m,
|
|
|
&r_Jn12_d[aux_offset * n], n,
|
|
|
&beta,
|
|
|
&r_Jn32_d[aux_offset * m], m);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLLoc2J_d[(local_index) * m * n], m,
|
|
|
&En_d[aux_offset * n], n,
|
|
|
&beta,
|
|
|
&r_Jn32_d[aux_offset * m], m);
|
|
|
|
|
|
// --------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLLoc1E_d[(local_index) * m * n], m,
|
|
|
&En_d[groupOffset * n], n,
|
|
|
&beta,
|
|
|
&En1_d[groupOffset * m], m);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLLoc2E_d[(local_index) * m * n], m,
|
|
|
&Hn12_d[groupOffset * n], n,
|
|
|
&beta,
|
|
|
&En1_d[groupOffset * m], m);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLAuxE_d[(local_index) * m * n], m,
|
|
|
&r_Jn32_d[aux_offset * n], n,
|
|
|
&beta,
|
|
|
&En1_d[groupOffset * m], m);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
// Coupling Matrices
|
|
|
int regularNeighOffset = neighOffset + classNeighPML_h[class_i] + classRegularPMLTetraOffset_h[class_i][i] * NumOfFaces;
|
|
|
|
|
|
m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
n = FacePolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
numBlocks = (groupElements * n * NumOfFaces + blockSize - 1) / blockSize;
|
|
|
|
|
|
makeNeighFieldRegular<<<numBlocks, blockSize>>>(&NeighMap_d[regularNeighOffset * n], Hn12_d, auxFieldInput, groupElements, PolyFlag);
|
|
|
|
|
|
long long int strideA = m * n;
|
|
|
long long int strideB = n * groupElements;
|
|
|
long long int strideC = m * groupElements;
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLNeigh1E_d[(local_index) * NumOfFaces * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
NumOfFaces);
|
|
|
|
|
|
makeNeighFieldRegular<<<numBlocks, blockSize>>>(&NeighMap_d[regularNeighOffset * n], En_d, auxFieldInput, groupElements, PolyFlag);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLNeigh2E_d[(local_index) * NumOfFaces * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
NumOfFaces);
|
|
|
|
|
|
int blockY = (blockSize + TetPolyOrderDim[PolyFlag] - 1) / TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
dim3 blockDim(TetPolyOrderDim[PolyFlag], blockY, 1);
|
|
|
numBlocks = (groupElements + blockY - 1) / blockY;
|
|
|
|
|
|
addCouplingResultsRegular<<<numBlocks, blockDim>>>(&En1_d[groupOffset * TetPolyOrderDim[PolyFlag]], auxFieldOutput, groupElements);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
/*
|
|
|
int total_tets = ClassTetraCnt[class_i] + ClassPMLTetraCnt[class_i];
|
|
|
int offset = ClassTetraOffset[class_i];
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&En_d[offset * TetPolyOrderDim[PolyFlag]], &En1_d[offset * TetPolyOrderDim[PolyFlag]],
|
|
|
total_tets * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToDevice));
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
*/
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&En_d[0], &En1_d[0], tetraCNT * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToDevice));
|
|
|
cudaDeviceSynchronize();
|
|
|
|
|
|
|
|
|
|
|
|
if(nonRegularPMLTetraCnt_h[class_i] > 0)
|
|
|
{
|
|
|
int num_PML_tets = nonRegularPMLTetraCnt_h[class_i];
|
|
|
int matrixOffset = classPMLTetraOffset_loc_h[class_i];
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&Jn12_d[matrixOffset * TetPolyOrderDim[PolyFlag]], &Jn32_d[matrixOffset * TetPolyOrderDim[PolyFlag]],
|
|
|
num_PML_tets * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToDevice));
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
}
|
|
|
|
|
|
if(classRegularPMLGroupsCnt_h[class_i] > 0)
|
|
|
{
|
|
|
int num_PML_tets = numRegPMLTetras;
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&Jn12_d[0], &Jn32_d[0], num_PML_tets * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToDevice));
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
}
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
void FemGrp::LH_CuBLAS(cublasHandle_t handle, int class_i){
|
|
|
|
|
|
const int Q = GAUSS_POINT_NUM_h[PolyFlag]; // same as GPU kernel uses
|
|
|
|
|
|
int irregularTetras = nonRegularTetraCnt_h[class_i];
|
|
|
|
|
|
int classOffset = ClassTetraOffset[class_i];
|
|
|
int neighOffset = NeighClassOffset_h[class_i];
|
|
|
|
|
|
int blockSize = 256; //optimal number
|
|
|
int numBlocks;
|
|
|
|
|
|
if(irregularTetras > 0)
|
|
|
{
|
|
|
// Local Mattrices
|
|
|
int nMatrices = irregularTetras;
|
|
|
int matrixOffset = classTetraOffset_loc_h[class_i];
|
|
|
|
|
|
int m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
int n = TetPolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
long long int strideA = m * n;
|
|
|
long long int strideB = n;
|
|
|
long long int strideC = m;
|
|
|
|
|
|
float alpha = 1.0;
|
|
|
float beta = 0.0;
|
|
|
|
|
|
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Loc1H_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Hn12_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Hn32_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Loc2H_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&En_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Hn32_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
|
|
|
if(ClassExcitationCount[class_i] > 0){
|
|
|
nMatrices = ClassExcitationCount[class_i];
|
|
|
matrixOffset = ClassExcitationOffset[class_i];
|
|
|
|
|
|
numBlocks = (nMatrices + blockSize - 1) / blockSize;
|
|
|
|
|
|
fp_t_ts dt = LocTimeSteps[class_i];
|
|
|
fp_t_ts t = (LocalExciIndexH[class_i] + 1.0) * dt;
|
|
|
LocalExciIndexH[class_i]++;
|
|
|
|
|
|
|
|
|
if (PWorPort == 0)
|
|
|
{
|
|
|
if (interior_excitation_flag)
|
|
|
{
|
|
|
addExcitationH_PML<<<numBlocks, blockSize>>>(&ExcitationFacesCnt_d[matrixOffset],
|
|
|
&ExcitationFacesOffset_d[matrixOffset],
|
|
|
ExcitationFacesNum_d,
|
|
|
nMatrices,
|
|
|
ClassExcitation_sc_CNT[class_i],
|
|
|
&mapH_d[matrixOffset * strideC],
|
|
|
excitationProp,
|
|
|
PolyFlag,
|
|
|
dt / Uo, t,
|
|
|
&nd_coords_tet_d[4 * 3 * matrixOffset],
|
|
|
nd_coords_face_d,
|
|
|
Z_face_pw_d,
|
|
|
&InvH_d[strideA * matrixOffset],
|
|
|
&Hn32_d[classOffset * strideC]);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
addExcitationH<<<numBlocks, blockSize>>>(&ExcitationFacesCnt_d[matrixOffset],
|
|
|
&ExcitationFacesOffset_d[matrixOffset],
|
|
|
ExcitationFacesNum_d,
|
|
|
nMatrices,
|
|
|
&mapH_d[matrixOffset * strideC],
|
|
|
excitationProp,
|
|
|
PolyFlag,
|
|
|
dt / Uo, t,
|
|
|
&nd_coords_tet_d[4 * 3 * matrixOffset],
|
|
|
nd_coords_face_d,
|
|
|
Z_face_pw_d,
|
|
|
&InvH_d[strideA * matrixOffset],
|
|
|
&Hn32_d[classOffset * strideC]);
|
|
|
}
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
|
|
|
addExcitationH_port<<<numBlocks, blockSize>>>(&ExcitationFacesCnt_d[matrixOffset],
|
|
|
&ExcitationFacesOffset_d[matrixOffset],
|
|
|
ExcitationFacesNum_d,
|
|
|
nMatrices,
|
|
|
&mapH_d[matrixOffset * strideC],
|
|
|
ExcitationProps_d,
|
|
|
PortFacePidx_d,
|
|
|
PolyFlag,
|
|
|
dt / Uo, t,
|
|
|
&nd_coords_tet_d[4 * 3 * matrixOffset],
|
|
|
nd_coords_face_d,
|
|
|
&InvH_d[strideA * matrixOffset],
|
|
|
&Hn32_d[classOffset * strideC]);
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
// Coupling Matrices
|
|
|
nMatrices = classNeighIrregular_h[class_i];
|
|
|
matrixOffset = classNeighOffset_loc_h[class_i];
|
|
|
|
|
|
m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
n = FacePolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
strideA = m * n;
|
|
|
strideB = n;
|
|
|
strideC = m;
|
|
|
|
|
|
numBlocks = (nMatrices * n + blockSize - 1) / blockSize;
|
|
|
makeNeighField<<<numBlocks, blockSize>>>(&NeighMap_d[neighOffset * n], En_d, auxFieldInput, nMatrices * n); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Neigh1H_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
makeNeighField<<<numBlocks, blockSize>>>(&NeighMap_d[neighOffset * n], Hn12_d, auxFieldInput, nMatrices * n); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Neigh2H_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
int blockY = (blockSize + TetPolyOrderDim[PolyFlag] - 1) / TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
dim3 blockDim(TetPolyOrderDim[PolyFlag], blockY, 1);
|
|
|
numBlocks = (irregularTetras + blockY - 1) / blockY;
|
|
|
|
|
|
addCouplingResults<<<numBlocks, blockDim>>>(&Hn32_d[classOffset * TetPolyOrderDim[PolyFlag]], auxFieldOutput, &Neighbours_d[classOffset], &NeighboursOffset_d[classOffset], irregularTetras); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
}
|
|
|
|
|
|
|
|
|
// --------------------------------------------------------------------------------------------------
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
if(regularRegionFlag && classRegularGroupsCnt_h[class_i] > 0)
|
|
|
{
|
|
|
for(int i = 0; i < classRegularGroupsCnt_h[class_i]; i++)
|
|
|
{
|
|
|
int groupID = classRegularGroupsId_h[class_i][i];
|
|
|
int groupElements = classRegularTetraCnt_h[class_i * regularCNT + groupID];
|
|
|
int groupOffset = classOffset + nonRegularTetraCnt_h[class_i] + classRegularTetraOffset_h[class_i][i];
|
|
|
|
|
|
// Local Matrices
|
|
|
int m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
int n = TetPolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
float alpha = 1.0;
|
|
|
float beta = 0.0;
|
|
|
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularLoc1H_d[(groupID - 1) * m * n], m,
|
|
|
&Hn12_d[groupOffset * n], n,
|
|
|
&beta,
|
|
|
&Hn32_d[groupOffset * m], m);
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularLoc2H_d[(groupID - 1) * m * n], m,
|
|
|
&En_d[groupOffset * n], n,
|
|
|
&beta,
|
|
|
&Hn32_d[groupOffset * m], m);
|
|
|
|
|
|
// Coupling Matrices
|
|
|
int regularNeighOffset = neighOffset + classNeighIrregular_h[class_i] + classRegularTetraOffset_h[class_i][i] * NumOfFaces;
|
|
|
|
|
|
m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
n = FacePolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
numBlocks = (groupElements * n * NumOfFaces + blockSize - 1) / blockSize;
|
|
|
|
|
|
makeNeighFieldRegular<<<numBlocks, blockSize>>>(&NeighMap_d[regularNeighOffset * n], En_d, auxFieldInput, groupElements, PolyFlag);
|
|
|
|
|
|
long long int strideA = m * n;
|
|
|
long long int strideB = n * groupElements;
|
|
|
long long int strideC = m * groupElements;
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularNeigh1H_d[(groupID - 1) * NumOfFaces * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
NumOfFaces);
|
|
|
|
|
|
makeNeighFieldRegular<<<numBlocks, blockSize>>>(&NeighMap_d[regularNeighOffset * n], Hn12_d, auxFieldInput, groupElements, PolyFlag);
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularNeigh2H_d[(groupID - 1) * NumOfFaces * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
NumOfFaces);
|
|
|
|
|
|
int blockY = (blockSize + TetPolyOrderDim[PolyFlag] - 1) / TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
dim3 blockDim(TetPolyOrderDim[PolyFlag], blockY, 1);
|
|
|
numBlocks = (groupElements + blockY - 1) / blockY;
|
|
|
|
|
|
addCouplingResultsRegular<<<numBlocks, blockDim>>>(&Hn32_d[groupOffset * TetPolyOrderDim[PolyFlag]], auxFieldOutput, groupElements);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize()); // make sure prior kernels/GEMMs finished
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
// --------------------------------------------
|
|
|
// PML Section
|
|
|
int PMLTetras = nonRegularPMLTetraCnt_h[class_i];
|
|
|
|
|
|
classOffset = classPMLTetraOffset_h[class_i];
|
|
|
neighOffset = classNeighPMLOffset_h[class_i];
|
|
|
|
|
|
if(PMLTetras > 0)
|
|
|
{
|
|
|
// Local Mattrices
|
|
|
int nMatrices = PMLTetras;
|
|
|
int matrixOffset = classPMLTetraOffset_loc_h[class_i];
|
|
|
|
|
|
int m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
int n = TetPolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
long long int strideA = m * n;
|
|
|
long long int strideB = n;
|
|
|
long long int strideC = m;
|
|
|
|
|
|
float alpha = 1.0;
|
|
|
float beta = 0.0;
|
|
|
|
|
|
// --------------------------------------------------------
|
|
|
// Auxilliary M
|
|
|
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&AuxM1_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Mn_d[matrixOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Mn1_d[matrixOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&AuxM2_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Hn12_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Mn1_d[matrixOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
|
|
|
// --------------------------------------------------------
|
|
|
|
|
|
|
|
|
alpha = 1.0;
|
|
|
beta = 0.0;
|
|
|
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Loc1H_PML_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Hn12_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Hn32_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Loc2H_PML_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&En_d[classOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Hn32_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
|
|
|
// Add Auxilliary Term M
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&AuxH_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
&Mn1_d[matrixOffset * strideB], n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
&Hn32_d[classOffset * strideC], m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
|
|
|
// Coupling Matrices
|
|
|
|
|
|
nMatrices = classNeighPML_h[class_i];
|
|
|
matrixOffset = classNeighPMLOffset_loc_h[class_i];
|
|
|
|
|
|
// cout << start << " " << nMatrices << " " << start + nMatrices << endl;
|
|
|
m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
n = FacePolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
strideA = m * n;
|
|
|
strideB = n;
|
|
|
strideC = m;
|
|
|
|
|
|
numBlocks = (nMatrices * n + blockSize - 1) / blockSize;
|
|
|
makeNeighField<<<numBlocks, blockSize>>>(&NeighMap_d[neighOffset * n], En_d, auxFieldInput, nMatrices * n); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Neigh1H_PML_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
makeNeighField<<<numBlocks, blockSize>>>(&NeighMap_d[neighOffset * n], Hn12_d, auxFieldInput, nMatrices * n); //Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, 1, n,
|
|
|
&alpha,
|
|
|
&Neigh2H_PML_d[matrixOffset * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
nMatrices);
|
|
|
|
|
|
int blockY = (blockSize + TetPolyOrderDim[PolyFlag] - 1) / TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
dim3 blockDim(TetPolyOrderDim[PolyFlag], blockY, 1);
|
|
|
numBlocks = (PMLTetras + blockY - 1) / blockY;
|
|
|
|
|
|
//Implement 3D if tetras over blocksize * (2^(31) - 1)
|
|
|
addCouplingResults<<<numBlocks, blockDim>>>(&Hn32_d[classPMLTetraOffset_h[class_i] * TetPolyOrderDim[PolyFlag]], auxFieldOutput, &Neighbours_d[classOffset], &NeighboursOffset_d[classOffset], PMLTetras);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(regularRegionFlag && classRegularPMLGroupsCnt_h[class_i] > 0)
|
|
|
{
|
|
|
for(int i = 0; i < classRegularPMLGroupsCnt_h[class_i]; i++)
|
|
|
{
|
|
|
int groupID = classRegularPMLGroupsId_h[class_i][i];
|
|
|
int groupElements = classRegularTetraCnt_h[class_i * regularCNT + groupID];
|
|
|
int groupOffset = classOffset + nonRegularPMLTetraCnt_h[class_i] + classRegularPMLTetraOffset_h[class_i][i];
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
int local_index = groupID - 1 - regularCNT_Normal;
|
|
|
int aux_offset = classRegularPMLTetraOffset_h[class_i][i];
|
|
|
|
|
|
// Local Matrices
|
|
|
int m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
int n = TetPolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
|
|
|
// --------------------------------------------------------
|
|
|
// Auxilliary M
|
|
|
|
|
|
float alpha = 1.0;
|
|
|
float beta = 0.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLLoc1M_d[(local_index) * m * n], m,
|
|
|
&r_Mn_d[aux_offset * n], n,
|
|
|
&beta,
|
|
|
&r_Mn1_d[aux_offset * m], m);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLLoc2M_d[(local_index) * m * n], m,
|
|
|
&Hn12_d[aux_offset * n], n,
|
|
|
&beta,
|
|
|
&r_Mn1_d[aux_offset * m], m);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
// --------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLLoc1H_d[(local_index) * m * n], m,
|
|
|
&Hn12_d[groupOffset * n], n,
|
|
|
&beta,
|
|
|
&Hn32_d[groupOffset * m], m);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLLoc2H_d[(local_index) * m * n], m,
|
|
|
&En_d[groupOffset * n], n,
|
|
|
&beta,
|
|
|
&Hn32_d[groupOffset * m], m);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemm(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLAuxH_d[(local_index) * m * n], m,
|
|
|
&r_Mn1_d[aux_offset * n], n,
|
|
|
&beta,
|
|
|
&Hn32_d[groupOffset * m], m);
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
// Coupling Matrices
|
|
|
int regularNeighOffset = neighOffset + classNeighPML_h[class_i] + classRegularPMLTetraOffset_h[class_i][i] * NumOfFaces;
|
|
|
|
|
|
m = TetPolyOrderDim[PolyFlag]; //rows of A
|
|
|
n = FacePolyOrderDim[PolyFlag]; //rows of B and cols of A
|
|
|
|
|
|
numBlocks = (groupElements * n * NumOfFaces + blockSize - 1) / blockSize;
|
|
|
|
|
|
makeNeighFieldRegular<<<numBlocks, blockSize>>>(&NeighMap_d[regularNeighOffset * n], En_d, auxFieldInput, groupElements, PolyFlag);
|
|
|
|
|
|
long long int strideA = m * n;
|
|
|
long long int strideB = n * groupElements;
|
|
|
long long int strideC = m * groupElements;
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 0.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLNeigh1H_d[(local_index) * NumOfFaces * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
NumOfFaces);
|
|
|
|
|
|
makeNeighFieldRegular<<<numBlocks, blockSize>>>(&NeighMap_d[regularNeighOffset * n], Hn12_d, auxFieldInput, groupElements, PolyFlag);
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
beta = 1.0;
|
|
|
cublasSgemmStridedBatched(handle,
|
|
|
CUBLAS_OP_N,
|
|
|
CUBLAS_OP_N,
|
|
|
m, groupElements, n,
|
|
|
&alpha,
|
|
|
®ularPMLNeigh2H_d[(local_index) * NumOfFaces * strideA], m,
|
|
|
strideA,
|
|
|
auxFieldInput, n,
|
|
|
strideB,
|
|
|
&beta,
|
|
|
auxFieldOutput, m,
|
|
|
strideC,
|
|
|
NumOfFaces);
|
|
|
|
|
|
int blockY = (blockSize + TetPolyOrderDim[PolyFlag] - 1) / TetPolyOrderDim[PolyFlag];
|
|
|
|
|
|
dim3 blockDim(TetPolyOrderDim[PolyFlag], blockY, 1);
|
|
|
numBlocks = (groupElements + blockY - 1) / blockY;
|
|
|
|
|
|
addCouplingResultsRegular<<<numBlocks, blockDim>>>(&Hn32_d[groupOffset * TetPolyOrderDim[PolyFlag]], auxFieldOutput, groupElements);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
int total_tets = ClassTetraCnt[class_i] + ClassPMLTetraCnt[class_i];
|
|
|
int offset = ClassTetraOffset[class_i];
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&Hn12_d[ offset * TetPolyOrderDim[PolyFlag]], &Hn32_d[offset * TetPolyOrderDim[PolyFlag]],
|
|
|
total_tets * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToDevice));
|
|
|
cudaDeviceSynchronize();
|
|
|
*/
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&Hn12_d[0], &Hn32_d[0], tetraCNT * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToDevice));
|
|
|
cudaDeviceSynchronize();
|
|
|
|
|
|
|
|
|
if(nonRegularPMLTetraCnt_h[class_i] > 0)
|
|
|
{
|
|
|
int num_PML_tets = nonRegularPMLTetraCnt_h[class_i];
|
|
|
int matrixOffset = classPMLTetraOffset_loc_h[class_i];
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&Mn_d[matrixOffset * TetPolyOrderDim[PolyFlag]], &Mn1_d[matrixOffset * TetPolyOrderDim[PolyFlag]], num_PML_tets * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToDevice));
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
}
|
|
|
|
|
|
if(classRegularPMLGroupsCnt_h[class_i] > 0)
|
|
|
{
|
|
|
int num_PML_tets = numRegPMLTetras;
|
|
|
CUDA_SAFE_CALL(cudaMemcpy(&r_Mn_d[0], &r_Mn1_d[0], num_PML_tets * TetPolyOrderDim[PolyFlag] * sizeof(fp_t_ts), cudaMemcpyDeviceToDevice));
|
|
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
|
|
}
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
void FemGrp::FreeGPU(){
|
|
|
CUDA_SAFE_CALL(cudaFree(mapE_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(mapH_d));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(ExcitationFacesCnt_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(ExcitationFacesOffset_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(ExcitationFacesNum_d));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(nd_coords_tet_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(nd_coords_face_d));
|
|
|
|
|
|
if(PlaneWaveBCFlag){
|
|
|
CUDA_SAFE_CALL(cudaFree(Z_face_pw_d));
|
|
|
}
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(InvE_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(InvH_d));
|
|
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(Loc1E_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Loc2E_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Loc1H_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Loc2H_d));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(Neigh1E_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Neigh2E_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Neigh1H_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Neigh2H_d));
|
|
|
|
|
|
if(regularRegionFlag){
|
|
|
CUDA_SAFE_CALL(cudaFree(regularLoc1E_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(regularLoc2E_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(regularLoc1H_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(regularLoc2H_d));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(regularNeigh1E_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(regularNeigh2E_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(regularNeigh1H_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(regularNeigh2H_d));
|
|
|
}
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(En_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(En1_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Hn12_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Hn32_d));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(NeighMap_d));
|
|
|
CUDA_SAFE_CALL(cudaFree(Neighbours_d));
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaFree(auxFieldInput));
|
|
|
CUDA_SAFE_CALL(cudaFree(auxFieldOutput));
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Refactored by Qi Jian to build Octree of the tetrahedrals
|
|
|
void FemGrp::initializeOctree(std::string prjName, bool non_Conformal_flag)
|
|
|
{
|
|
|
|
|
|
cout << "========================================================== \n";
|
|
|
|
|
|
// Initialize octree object
|
|
|
octree_object = Octree();
|
|
|
|
|
|
// Compute AABB for each tetrahedron
|
|
|
/*
|
|
|
std::cout << "Compute AABB for tetrahedral" << std::endl;
|
|
|
octree_object.tetra_boxes.resize(tetraCNT);
|
|
|
#pragma omp parallel for
|
|
|
for (int tet_id = 0; tet_id < tetraCNT; ++tet_id)
|
|
|
{
|
|
|
const tetra& tet = tetARRAY[tet_id];
|
|
|
|
|
|
double x[4], y[4], z[4];
|
|
|
for (int i = 0; i < 4; ++i)
|
|
|
{
|
|
|
x[i] = tet.nd[i]->getCoord().getx();
|
|
|
y[i] = tet.nd[i]->getCoord().gety();
|
|
|
z[i] = tet.nd[i]->getCoord().getz();
|
|
|
}
|
|
|
|
|
|
AABB box;
|
|
|
box.xmin = std::min({x[0], x[1], x[2], x[3]});
|
|
|
box.xmax = std::max({x[0], x[1], x[2], x[3]});
|
|
|
box.ymin = std::min({y[0], y[1], y[2], y[3]});
|
|
|
box.ymax = std::max({y[0], y[1], y[2], y[3]});
|
|
|
box.zmin = std::min({z[0], z[1], z[2], z[3]});
|
|
|
box.zmax = std::max({z[0], z[1], z[2], z[3]});
|
|
|
|
|
|
octree_object.tetra_boxes[tet_id] = box;
|
|
|
}
|
|
|
*/
|
|
|
|
|
|
// Compute AABB for each tetrahedron
|
|
|
std::cout << "Compute AABB for tetrahedral (with buffer)" << std::endl;
|
|
|
octree_object.tetra_boxes.resize(tetraCNT);
|
|
|
|
|
|
// Buffer multiplier (e.g., 5% enlargement)
|
|
|
const double buffer_factor = 2.0;
|
|
|
|
|
|
#pragma omp parallel for
|
|
|
for (int tet_id = 0; tet_id < tetraCNT; ++tet_id)
|
|
|
{
|
|
|
const tetra& tet = tetARRAY[tet_id];
|
|
|
|
|
|
double x[4], y[4], z[4];
|
|
|
for (int i = 0; i < 4; ++i)
|
|
|
{
|
|
|
x[i] = tet.nd[i]->getCoord().getx();
|
|
|
y[i] = tet.nd[i]->getCoord().gety();
|
|
|
z[i] = tet.nd[i]->getCoord().getz();
|
|
|
}
|
|
|
|
|
|
AABB box;
|
|
|
double xmin = std::min({x[0], x[1], x[2], x[3]});
|
|
|
double xmax = std::max({x[0], x[1], x[2], x[3]});
|
|
|
double ymin = std::min({y[0], y[1], y[2], y[3]});
|
|
|
double ymax = std::max({y[0], y[1], y[2], y[3]});
|
|
|
double zmin = std::min({z[0], z[1], z[2], z[3]});
|
|
|
double zmax = std::max({z[0], z[1], z[2], z[3]});
|
|
|
|
|
|
// Compute center and half-sizes
|
|
|
double cx = 0.5 * (xmin + xmax);
|
|
|
double cy = 0.5 * (ymin + ymax);
|
|
|
double cz = 0.5 * (zmin + zmax);
|
|
|
double hx = 0.5 * (xmax - xmin);
|
|
|
double hy = 0.5 * (ymax - ymin);
|
|
|
double hz = 0.5 * (zmax - zmin);
|
|
|
|
|
|
// Apply buffer multiplier
|
|
|
hx *= buffer_factor;
|
|
|
hy *= buffer_factor;
|
|
|
hz *= buffer_factor;
|
|
|
|
|
|
// Store expanded box
|
|
|
box.xmin = cx - hx; box.xmax = cx + hx;
|
|
|
box.ymin = cy - hy; box.ymax = cy + hy;
|
|
|
box.zmin = cz - hz; box.zmax = cz + hz;
|
|
|
|
|
|
octree_object.tetra_boxes[tet_id] = box;
|
|
|
}
|
|
|
|
|
|
|
|
|
std::cout << "Compute global bounding box" << std::endl;
|
|
|
|
|
|
// All the tetrahedra IDs
|
|
|
std::vector<int> all_tet_ids(tetraCNT);
|
|
|
std::iota(all_tet_ids.begin(), all_tet_ids.end(), 0);
|
|
|
|
|
|
// All the non-conformal tetrahedra IDs
|
|
|
std::vector<int> all_NC_tet_ids(nonConformalCNT);
|
|
|
if (non_Conformal_flag)
|
|
|
{
|
|
|
std::cout << "Store non-conformal tetrahedra IDs" << std::endl;
|
|
|
all_NC_tet_ids.assign(ncARRAY, ncARRAY + nonConformalCNT);
|
|
|
}
|
|
|
|
|
|
|
|
|
AABB global_box {
|
|
|
.xmin = std::numeric_limits<float>::max(),
|
|
|
.xmax = -std::numeric_limits<float>::max(),
|
|
|
.ymin = std::numeric_limits<float>::max(),
|
|
|
.ymax = -std::numeric_limits<float>::max(),
|
|
|
.zmin = std::numeric_limits<float>::max(),
|
|
|
.zmax = -std::numeric_limits<float>::max()
|
|
|
};
|
|
|
|
|
|
for (const auto& box : octree_object.tetra_boxes)
|
|
|
{
|
|
|
global_box.xmin = std::min(global_box.xmin, box.xmin);
|
|
|
global_box.xmax = std::max(global_box.xmax, box.xmax);
|
|
|
global_box.ymin = std::min(global_box.ymin, box.ymin);
|
|
|
global_box.ymax = std::max(global_box.ymax, box.ymax);
|
|
|
global_box.zmin = std::min(global_box.zmin, box.zmin);
|
|
|
global_box.zmax = std::max(global_box.zmax, box.zmax);
|
|
|
}
|
|
|
|
|
|
std::cout << "Global Bounding Box:" << std::endl;
|
|
|
std::cout << " xmin = " << global_box.xmin << ", xmax = " << global_box.xmax << std::endl;
|
|
|
std::cout << " ymin = " << global_box.ymin << ", ymax = " << global_box.ymax << std::endl;
|
|
|
std::cout << " zmin = " << global_box.zmin << ", zmax = " << global_box.zmax << std::endl;
|
|
|
|
|
|
fp_t x_range = (global_box.xmax - global_box.xmin);
|
|
|
fp_t y_range = (global_box.ymax - global_box.ymin);
|
|
|
fp_t z_range = (global_box.zmax - global_box.zmin);
|
|
|
fp_t max_range = std::max({x_range, y_range, z_range});
|
|
|
fp_t wavelength = 3e8 / (freq * 1e6);
|
|
|
|
|
|
double box_size = 100.0 * wavelength; // or any desired multiple of λ
|
|
|
int min_depth = 1; // or 2, etc.
|
|
|
int octree_depth = std::max(min_depth, static_cast<int>(std::ceil(std::log2(max_range / box_size))));
|
|
|
|
|
|
double buffer_distance = wavelength / 2.0;
|
|
|
|
|
|
//int octree_depth = static_cast<int>(std::ceil(std::log2((4.0 * max_range) / wavelength))) - 1;
|
|
|
|
|
|
std::cout << "Max Range = " << max_range << " | Wavelength = " << wavelength << std::endl;
|
|
|
std::cout << "Compute octree with octree depth = " << octree_depth << std::endl;
|
|
|
|
|
|
|
|
|
if (non_Conformal_flag)
|
|
|
{
|
|
|
octree_object.buildOctree_withNCFLAGS(all_tet_ids, all_NC_tet_ids, global_box, buffer_distance, 0, octree_depth);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
octree_object.buildOctree(all_tet_ids, global_box, buffer_distance, 0, octree_depth);
|
|
|
}
|
|
|
|
|
|
// Link tetrahedron memory
|
|
|
octree_object.tet_ptr = tetARRAY;
|
|
|
octree_object.tet_count = tetraCNT;
|
|
|
std::cout << "Octree build completed" << std::endl;
|
|
|
|
|
|
cout << "========================================================== \n";
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
// Find the Barycentric coordinates of the probes
|
|
|
void FemGrp::computeBarycentricEmbedding()
|
|
|
{
|
|
|
std::cout << "Compute the Barycentric center of the nodes" << std::endl;
|
|
|
const int num_nodes = outputMesh.num_nodes;
|
|
|
const double tol = 1e-8;
|
|
|
|
|
|
//#pragma omp parallel for schedule(dynamic)
|
|
|
for (int node_id = 0; node_id < num_nodes; ++node_id)
|
|
|
{
|
|
|
std::vector<float> node_xyz = outputMesh.getNode(node_id);
|
|
|
double probe_xyz[3] = {node_xyz[0], node_xyz[1], node_xyz[2]};
|
|
|
|
|
|
std::vector<std::pair<int, std::array<double, 4>>> found_tets;
|
|
|
bool success = octree_object.findTetraInOctree(probe_xyz, found_tets, tol);
|
|
|
|
|
|
if (success)
|
|
|
{
|
|
|
tri_nodes_bary[node_id].first = static_cast<int>(found_tets.size());
|
|
|
tri_nodes_bary[node_id].second = found_tets;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
tri_nodes_bary[node_id].first = -1;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Report and verify
|
|
|
bool error_flag = false;
|
|
|
for (int i = 0; i < num_nodes; ++i)
|
|
|
{
|
|
|
if (tri_nodes_bary[i].first < 0)
|
|
|
{
|
|
|
std::cerr << "Node " << i << " not found in simulation domain" << std::endl;
|
|
|
std::vector<float> node_xyz = outputMesh.getNode(i);
|
|
|
double probe_xyz[3] = {node_xyz[0], node_xyz[1], node_xyz[2]};
|
|
|
std::cerr << probe_xyz[0] << " " << probe_xyz[1] << " " << probe_xyz[2] << std::endl;
|
|
|
error_flag = true;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if (error_flag)
|
|
|
{
|
|
|
std::cerr << "Error: Some nodes were not found in the simulation domain. Exiting." << std::endl;
|
|
|
std::exit(EXIT_FAILURE);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Refactored by Qi Jian to initialize the output surface mesh
|
|
|
// Note that the octree have to be built before calling this function
|
|
|
void FemGrp::makeOutputSurfMesh(std::string prjName)
|
|
|
{
|
|
|
// Load surface mesh
|
|
|
char triName[256];
|
|
|
sprintf(triName, "./%s_out.tri", prjName.c_str());
|
|
|
std::cout << "--------------------" << std::endl;
|
|
|
std::cout << "Reading Tri surface mesh " << triName << std::endl;
|
|
|
outputMesh.readFromFile(triName);
|
|
|
|
|
|
std::cout << "--------------------" << std::endl;
|
|
|
std::cout << "Compute Normals " << std::endl;
|
|
|
outputMesh.computeTriangleNormals();
|
|
|
|
|
|
std::cout << "--------------------" << std::endl;
|
|
|
outputMesh.printSummary();
|
|
|
std::cout << "--------------------" << std::endl;
|
|
|
|
|
|
tri_nodes_bary.resize(outputMesh.num_nodes);
|
|
|
|
|
|
// Fill barycentric coordinate map
|
|
|
computeBarycentricEmbedding();
|
|
|
|
|
|
std::cout << "Completed" << std::endl;
|
|
|
std::cout << "--------------------" << std::endl;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Added by Qi Jian
|
|
|
// Utility to write fields of probes
|
|
|
void FemGrp::writeProbeFieldsCSV(
|
|
|
const std::string& outputDir, // e.g. "./PROBES1"
|
|
|
const std::string& fname, // simulation/project name
|
|
|
int timeStep, // timestep number
|
|
|
const std::vector<int>& node_ids, // node IDs to write
|
|
|
const std::vector<vtr>& Efield, // electric field vectors
|
|
|
const std::vector<vtr>& Hfield // magnetic field vectors
|
|
|
)
|
|
|
{
|
|
|
|
|
|
char csvFileName[512];
|
|
|
sprintf(csvFileName, "%s/Probes_%s_%04d.csv", outputDir.c_str(), fname.c_str(), timeStep);
|
|
|
|
|
|
std::ofstream csvFile(csvFileName);
|
|
|
if (!csvFile.is_open()) {
|
|
|
std::cerr << "Error opening file: " << csvFileName << std::endl;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
// Write header
|
|
|
csvFile << "Ex,Ey,Ez,Hx,Hy,Hz\n";
|
|
|
|
|
|
// Lambda to write one node's fields
|
|
|
auto write_fields = [&](int node_id)
|
|
|
{
|
|
|
const vtr& E = Efield[node_id];
|
|
|
const vtr& H = Hfield[node_id];
|
|
|
csvFile << std::fixed << std::setprecision(6)
|
|
|
<< E.getx() << "," << E.gety() << "," << E.getz() << ","
|
|
|
<< H.getx() << "," << H.gety() << "," << H.getz() << "\n";
|
|
|
};
|
|
|
|
|
|
for (int i = 0; i < node_ids.size(); ++i)
|
|
|
{
|
|
|
int node_id = node_ids[i];
|
|
|
write_fields(node_id);
|
|
|
}
|
|
|
|
|
|
csvFile.close();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FemGrp::writeCurrentsOutputSurfMesh_CuBLAS(int timeStep)
|
|
|
{
|
|
|
|
|
|
const int num_nodes = outputMesh.num_nodes;
|
|
|
const int num_tri = outputMesh.num_triangles;
|
|
|
|
|
|
// ----------------------------------------------
|
|
|
// Step 1: Compute fields at all nodes (scattered field)
|
|
|
// ----------------------------------------------
|
|
|
|
|
|
// Incident Field at points
|
|
|
std::vector<vtr> E_field(num_nodes);
|
|
|
std::vector<vtr> H_field(num_nodes);
|
|
|
std::vector<vtr> Einc_field(num_nodes);
|
|
|
std::vector<vtr> Hinc_field(num_nodes);
|
|
|
|
|
|
|
|
|
int i, j;
|
|
|
fp_t vol;
|
|
|
fp_t zeta[4];
|
|
|
vtr lvtr[3];
|
|
|
vtr avtr[4];
|
|
|
|
|
|
int tetraMAP_aux[TetPolyOrderDim[getPolyFlag()]];
|
|
|
fp_t_ts E_coeff[TetPolyOrderDim[getPolyFlag()]];
|
|
|
fp_t_ts H_coeff[TetPolyOrderDim[getPolyFlag()]];
|
|
|
|
|
|
vtr Einc;
|
|
|
vtr Hinc;
|
|
|
vtr r;
|
|
|
vtr eField;
|
|
|
vtr hField;
|
|
|
|
|
|
|
|
|
// DEBUG purpose: Store all the node ids as probes
|
|
|
vector<int> node_ids(num_nodes);
|
|
|
for(i = 0; i < num_nodes; i++)
|
|
|
{
|
|
|
node_ids[i] = i;
|
|
|
}
|
|
|
|
|
|
|
|
|
// Compute the Incident Fields
|
|
|
for(i = 0; i < num_nodes; i++)
|
|
|
{
|
|
|
int number_of_associated_tets = tri_nodes_bary.at(i).first;
|
|
|
|
|
|
Einc.reset();
|
|
|
Hinc.reset();
|
|
|
|
|
|
std::vector<std::pair<int, std::array<double, 4>>> found_tets = tri_nodes_bary.at(i).second;
|
|
|
Einc_field[i].reset();
|
|
|
Hinc_field[i].reset();
|
|
|
|
|
|
for (int t = 0; t < number_of_associated_tets; t++)
|
|
|
{
|
|
|
int tet_id = found_tets.at(t).first;
|
|
|
array<double,4> tri_bary_coord = found_tets.at(t).second;
|
|
|
tetra& tet = tetARRAY[tet_id];
|
|
|
|
|
|
zeta[0] = static_cast<fp_t>(tri_bary_coord[0]);
|
|
|
zeta[1] = static_cast<fp_t>(tri_bary_coord[1]);
|
|
|
zeta[2] = static_cast<fp_t>(tri_bary_coord[2]);
|
|
|
zeta[3] = static_cast<fp_t>(tri_bary_coord[3]);
|
|
|
|
|
|
SimplexToCartesian(tet, r, zeta);
|
|
|
getAnalyticalPWField(tet, r, Einc, Hinc, timeStep, LocTimeSteps[N_class -1]);
|
|
|
|
|
|
Einc_field[i] = Einc_field[i] + Einc;
|
|
|
Hinc_field[i] = Hinc_field[i] + Hinc;
|
|
|
|
|
|
}
|
|
|
|
|
|
Einc_field[i] = Einc_field[i] / ((fp_t) number_of_associated_tets);
|
|
|
Hinc_field[i] = Hinc_field[i] / ((fp_t) number_of_associated_tets);
|
|
|
|
|
|
}
|
|
|
|
|
|
//writeProbeFieldsCSV( "./PROBES_inc", fname, timeStep, node_ids, Einc_field, Hinc_field);
|
|
|
|
|
|
make_dir_if_not_exist("./CURRENT_INC");
|
|
|
char regFileName[StrOutput];
|
|
|
// Prepare output file name
|
|
|
regFileName[StrOutput] = {0};
|
|
|
sprintf(regFileName, "./CURRENT_INC/Einc_field_%s_%05d.dat", fname, timeStep);
|
|
|
|
|
|
// Open output file
|
|
|
FILE* fout = fopen(regFileName, "w");
|
|
|
if (!fout)
|
|
|
{
|
|
|
std::cerr << "❌ Failed to open output file: " << regFileName << std::endl;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
std::vector<int> tri_nodes = outputMesh.getTriangle(1);
|
|
|
int nodeIdx = tri_nodes[0]; // Pick only the first node
|
|
|
const vtr& E = Einc_field[nodeIdx]; // Get E-field vector at that node
|
|
|
|
|
|
// Write full vector (Ex, Ey, Ez) to file
|
|
|
fprintf(fout, "%.10e %.10e %.10e\n", E.getx(), E.gety(), E.getz());
|
|
|
fclose(fout); // Done!
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate Total Fields at the points
|
|
|
for(i = 0; i < num_nodes; i++)
|
|
|
{
|
|
|
int number_of_associated_tets = tri_nodes_bary.at(i).first;
|
|
|
|
|
|
eField.reset();
|
|
|
hField.reset();
|
|
|
|
|
|
std::vector<std::pair<int, std::array<double, 4>>> found_tets = tri_nodes_bary.at(i).second;
|
|
|
E_field[i].reset();
|
|
|
H_field[i].reset();
|
|
|
|
|
|
for (int t = 0; t < number_of_associated_tets; t++)
|
|
|
{
|
|
|
|
|
|
int tet_id = found_tets.at(t).first;
|
|
|
array<double,4> tri_bary_coord = found_tets.at(t).second;
|
|
|
tetra& tet = tetARRAY[tet_id];
|
|
|
|
|
|
tet.geometry(lvtr, avtr, &vol);
|
|
|
avtr[3].reset();
|
|
|
avtr[3] = avtr[3] - (avtr[0] + avtr[1] + avtr[2]);
|
|
|
|
|
|
eField.reset();
|
|
|
hField.reset();
|
|
|
zeta[0] = static_cast<fp_t>(tri_bary_coord[0]);
|
|
|
zeta[1] = static_cast<fp_t>(tri_bary_coord[1]);
|
|
|
zeta[2] = static_cast<fp_t>(tri_bary_coord[2]);
|
|
|
zeta[3] = static_cast<fp_t>(tri_bary_coord[3]);
|
|
|
|
|
|
eField = CalcEfield(&En1_h[mapIdLoc[tet.getcnt()] * TetPolyOrderDim[PolyFlag]], avtr, vol, zeta, PolyFlag);
|
|
|
hField = CalcEfield(&Hn32_h[mapIdLoc[tet.getcnt()] * TetPolyOrderDim[PolyFlag]], avtr, vol, zeta, PolyFlag);
|
|
|
|
|
|
E_field[i] = E_field[i] + eField;
|
|
|
H_field[i] = H_field[i] + hField;
|
|
|
|
|
|
}
|
|
|
|
|
|
E_field[i] = E_field[i] / ((fp_t) number_of_associated_tets);
|
|
|
H_field[i] = H_field[i] / ((fp_t) number_of_associated_tets);
|
|
|
|
|
|
}
|
|
|
|
|
|
//writeProbeFieldsCSV( "./PROBES_total", fname, timeStep, node_ids, E_field, H_field);
|
|
|
|
|
|
|
|
|
regMface = new Register[outputMesh.num_triangles];
|
|
|
regJface = new Register[outputMesh.num_triangles];
|
|
|
|
|
|
make_dir_if_not_exist("./CURRENT_Total");
|
|
|
|
|
|
|
|
|
for(int i = 0; i < outputMesh.num_triangles; i++)
|
|
|
{
|
|
|
|
|
|
std::vector<int> tri_nodes = outputMesh.getTriangle(i);
|
|
|
|
|
|
std::vector<float> normal_d = outputMesh.getNormal(i);
|
|
|
vtr NormalVtr(normal_d[0], normal_d[1], normal_d[2]);
|
|
|
regMface[i].initial(3);
|
|
|
regJface[i].initial(3);
|
|
|
|
|
|
for(j = 0; j < 3; j++)
|
|
|
{
|
|
|
int nodeIdx = tri_nodes[j];
|
|
|
vtr eLocalFace = E_field[nodeIdx];
|
|
|
vtr hLocalFace = H_field[nodeIdx];
|
|
|
|
|
|
// No averaging
|
|
|
regMface[i].setField(j, NormalVtr * eLocalFace * (-1.0));
|
|
|
regJface[i].setField(j, NormalVtr * hLocalFace * (1.0));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Register
|
|
|
memset(regFileName, 0, StrOutput * sizeof(char));
|
|
|
sprintf(regFileName, "./CURRENT_Total/Currents_%s_%05d", fname, timeStep);
|
|
|
printRegister(regMface, regJface, outputMesh.num_triangles, regFileName,1);
|
|
|
|
|
|
delete[] regMface;
|
|
|
delete[] regJface;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate Scattered Fields at the points
|
|
|
for(int i = 0; i < num_nodes; i++)
|
|
|
{
|
|
|
E_field[i] = E_field[i] - Einc_field[i];
|
|
|
H_field[i] = H_field[i] - Hinc_field[i];
|
|
|
}
|
|
|
|
|
|
//writeProbeFieldsCSV( "./PROBES_sc", fname, timeStep, node_ids, E_field, H_field);
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------------
|
|
|
// Write the Scattered Fields
|
|
|
|
|
|
regMface = new Register[outputMesh.num_triangles];
|
|
|
regJface = new Register[outputMesh.num_triangles];
|
|
|
|
|
|
make_dir_if_not_exist("./CURRENT_SC");
|
|
|
|
|
|
|
|
|
for(int i = 0; i < outputMesh.num_triangles; i++)
|
|
|
{
|
|
|
|
|
|
std::vector<int> tri_nodes = outputMesh.getTriangle(i);
|
|
|
|
|
|
std::vector<float> normal_d = outputMesh.getNormal(i);
|
|
|
vtr NormalVtr(normal_d[0], normal_d[1], normal_d[2]);
|
|
|
regMface[i].initial(3);
|
|
|
regJface[i].initial(3);
|
|
|
|
|
|
for(j = 0; j < 3; j++)
|
|
|
{
|
|
|
int nodeIdx = tri_nodes[j];
|
|
|
vtr eLocalFace = E_field[nodeIdx];
|
|
|
vtr hLocalFace = H_field[nodeIdx];
|
|
|
|
|
|
// No averaging
|
|
|
regMface[i].setField(j, NormalVtr * eLocalFace * (-1.0));
|
|
|
regJface[i].setField(j, NormalVtr * hLocalFace * (1.0));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Register
|
|
|
memset(regFileName, 0, StrOutput * sizeof(char));
|
|
|
sprintf(regFileName, "./CURRENT_SC/Currents_%s_%05d", fname, timeStep);
|
|
|
printRegister(regMface, regJface, outputMesh.num_triangles, regFileName,1);
|
|
|
|
|
|
delete[] regMface;
|
|
|
delete[] regJface;
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
#endif
|
|
|
|