You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
766 lines
26 KiB
766 lines
26 KiB
// FEMGRP.H definition of class FemGrp
|
|
#ifndef FEMGRP_H
|
|
#define FEMGRP_H
|
|
|
|
#include "../dgtd-performance.hpp"
|
|
#include "MeshPartition_METIS5.h"
|
|
#include "array.h"
|
|
#include "bc.h"
|
|
#include "clipper.h"
|
|
#include "coord.h"
|
|
#include "counter.h"
|
|
#include "cxxtimer.hpp"
|
|
#include "densemat.h"
|
|
#include "edge.h"
|
|
#include "face.h"
|
|
#include "gpc.h"
|
|
#include "material.h"
|
|
#include "node.h"
|
|
#include "partition.h"
|
|
#include "path.h"
|
|
#include "plane_wave_mesh.h"
|
|
#include "portmesh.h"
|
|
#include "register.h"
|
|
#include "sparmat.h"
|
|
#include "tetra.h"
|
|
#include "tree.h"
|
|
#include "fftw3.h"
|
|
#include <iostream>
|
|
#include <list>
|
|
#include <set>
|
|
#include <type_traits>
|
|
|
|
#include "OutputSurfMesh.h" // By Qi Jian
|
|
#include "Octree.h" // By Qi Jian
|
|
|
|
#if defined(DGTD_USE_CUDA)
|
|
#include <cuComplex.h>
|
|
#include <cusolverDn.h>
|
|
#include <cublas_v2.h>
|
|
#endif
|
|
|
|
#ifdef WIN32
|
|
#include <windows.h>
|
|
#else
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#if defined(DGTD_USE_OPENCL)
|
|
#include <CL/opencl.h>
|
|
#define CPU "11th Gen Intel(R) Core(TM) i9-11980HK @ 2.60GHz"
|
|
#define INTEL_GPU "Intel(R) OpenCL HD Graphics "
|
|
#define NVIDIA_GPU "NVIDIA CUDA"
|
|
#define HYBRID_GPU "Intel(R) OpenCL HD Graphics + NVIDIA CUDA"
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Material getMaterialProp(Material*, tetra*, fp_t);
|
|
|
|
|
|
vtr CalcEfield(double*, vtr*, fp_t, fp_t*, int);
|
|
|
|
vtr CalcEfield(float*, vtr*, fp_t, fp_t*, int);
|
|
|
|
|
|
void testVector(fp_t*, vtr, vtr*, fp_t, fp_t*, int);
|
|
int bcTypeConvert(char*);
|
|
void MapPrism(int*, int*, int, int*, int, int, int, int, int*);
|
|
|
|
typedef struct coupInfo {
|
|
int nei_subd_id;
|
|
int my_dof_id;
|
|
int nei_dof_id;
|
|
} coupInfo;
|
|
|
|
class FemGrp {
|
|
|
|
friend class Octree; // Added by Qi Jian
|
|
|
|
friend class FemGrpBalancing; // added Feb 3 2012
|
|
friend Material getMaterialProp(Material*, tetra*, fp_t);
|
|
|
|
friend vtr CalcEfield(float*, vtr*, fp_t, fp_t*, int);
|
|
|
|
friend vtr CalcEfield(double*, vtr*, fp_t, fp_t*, int);
|
|
|
|
friend void testVector(fp_t*, vtr, vtr*, fp_t, fp_t*, int);
|
|
|
|
protected:
|
|
int nodeCNT; // numeber of nodes
|
|
int edgeCNT; // number of edges (without repeted edges)
|
|
int faceCNT; // number of faces
|
|
int tetraCNT; // number of tetras
|
|
int probeCNT; // number of points to print the fields
|
|
int bcCNT; // number of boundary conditions
|
|
|
|
bool regularRegionFlag;
|
|
int regularCNT; // number of diferent types of tetra in regular areas
|
|
int regularTetraCNT; // number of tetras that are regular
|
|
|
|
int nonConformalCNT; //number of non-Conformal tetras
|
|
int totalObjNum; // number of material objects, at least one, default is freespace
|
|
int* bcMap; // array with the correlation between the order of the boundary conditions and the bcmap numbers
|
|
char* fname; // File name
|
|
fp_t unit;
|
|
fp_t freq; // Freq: Central Modulation Frequency (MHz)
|
|
bool nonConformalCase; // nonConformal
|
|
int neighCNT; //Number of neighbours
|
|
int noMatrixTetras; // Number of tetras that uses the matrices of a reference tetra
|
|
|
|
bool usePade; //True if the calculation is going to be shortened by using Pade Acceleration
|
|
int padeCNT; //Number of points in the probe file (from the beginning) that are going to be use in the pade aproximation
|
|
fp_t padeTime; // Time when to perform Padé (it will be performed at the end of the Pade period that contains that time)
|
|
vtr maxPoint; // (Max value of x, Max value of y, Max value of z)
|
|
vtr minPoint; // (min value of x, min value of y, min value of z)
|
|
fp_t_ts* fieldProbes; // fieldProbes[nProbes][nSamplings][components] it stores the fields in time domain calculated by DGTD
|
|
Complex* tranferencePadeFunctionFD; // tranferencePadeFunctionFD[nProbes][nSamplings][components][fields] it stores the transference function in freq domain stimated by pade H(f) = Y(f)/X(f)
|
|
|
|
fp_t_ts fieldEnergy; // Current field energy accumulated through NumOfSampleEnergyCheck
|
|
fp_t_ts maxFieldEnergy; // Maximum Energy average from all the probes and NumOfSampleEnergyCheck
|
|
fp_t_ts energyDecayFactor; // Current Avg Energy / Max Avg Energy to finish
|
|
int numberOfEnergyPoints; // Number of points that are used to calculate the energy convergence (starting from the top)
|
|
|
|
int tsPerSampling; //time steps per sampling
|
|
int tsPerPade; //time steps per pade approx
|
|
int tsSource; //length of the signal
|
|
Complex* padeFreqs; //exp(-j * 2pi * freq fft * dt_pade)
|
|
int* padeFreqConstant; // X[m \Delta f] (the values of m) [(0 -> M/2) (-M/2 -> -1)]
|
|
Complex* sourceFreqDomain;
|
|
fp_t* sourceTimeDomain;
|
|
|
|
tetra* tetARRAY; // array with all the tetras
|
|
node* ndARRAY; // array with all the nodes //ndARRAY = new node[nodeCNT];
|
|
bc* bcARRAY; // array with all the boundary conditions //bcARRAY = new bc[bcCNT]
|
|
int* ncARRAY; // array with the index of the tetras that at least one of their faces are non-conformal
|
|
int* regularReferenceARRAY; // array with the cnt value of the tetra that is going to be the reference for each tetra
|
|
int* regionARRAY; // array with the cnt of the reference of each regular group (location 0 will be -1) *** NOTE: it won't exist if there isn't regular file ***
|
|
|
|
edge** edgeARRAY; // array with all the edges ordered by getGlobalCnt
|
|
face** faceARRAY; // array with all the faces ordered by cnt
|
|
|
|
Material* objProp; // array of the materials in the structure
|
|
Coordinate Coord;
|
|
RegisterArray regE;
|
|
RegisterArray regH;
|
|
Register* regMface;
|
|
Register* regJface;
|
|
Register* regEface;
|
|
Register* regHface;
|
|
|
|
PlaneWaveMesh* planeWaveMesh;
|
|
PlaneWaveMesh* InterSurfMesh;
|
|
PlaneWaveMesh* SurfMesh; // Surface planewave mesh
|
|
|
|
set<edge>* edgeSetPtr; // set with all the edges which will be deleted after mapping all the faces
|
|
set<face>* faceSetPtr; // set with all the faces which will be deleted after mapping all the faces
|
|
|
|
// added for DG
|
|
int N_class; //Number of LTS classes
|
|
int NtimeSteps;
|
|
int Nstart;
|
|
int* LocalExciIndexE; //Time step each class is at an execution moment when the value is cheacked (E field)
|
|
int* LocalExciIndexH; //Time step each class is at an execution moment when the value is cheacked (H field)
|
|
fp_t TimeStep_dt;
|
|
int ClassMul; //m in LTS formula (number of timesteps difference between classes (2m+1)^k)
|
|
double* LocTimeSteps; // dt per class
|
|
int* ClassTetraCnt; //Number of tetras in the LTS class
|
|
int* ClassTetraOffset; //Number of tetras in smaller dt classes
|
|
int** ClassTetraIndex; //Array of the array of indexes of the tetra per Class
|
|
int** ClassTetraOrderedCNT; // Number of Conformal - NonConformal - Reg0 - Reg1 - ... Per Class
|
|
int** ClassTetraOrderedOffset;
|
|
|
|
double dt_min;
|
|
double dt_max;
|
|
|
|
size_t dimE; // addition of all the LocalEDOF
|
|
size_t dimH; // addition of all the LocalHDOF
|
|
size_t DimE; // DimE = dimE
|
|
size_t DimH; // DimH = dimH
|
|
|
|
size_t MemSizeE; // DimE * sizeof(fp_t or fp_t_ts); depends on CPU or GPU
|
|
size_t MemSizeH; // DimH * sizeof(fp_t or fp_t_ts); depends on CPU or GPU
|
|
|
|
|
|
ArrayFP<fp_t>* en; // ArrayFP<fp_t>(DimE);
|
|
ArrayFP<fp_t>* hn_12; // ArrayFP<fp_t>(DimH)
|
|
ArrayFP<fp_t>* en_1; // ArrayFP<fp_t>(DimE);
|
|
ArrayFP<fp_t>* hn_32; // ArrayFP<fp_t>(DimH)
|
|
|
|
fp_t factor_Flux; // Gamma(penalty) -> 0 central, other number = flux factor
|
|
|
|
bool write_fields; // Output for fields of the full structure
|
|
bool write_probes; // Output for probes(field in a point)
|
|
bool WriteSurfFlag; // Flag to Write Fields / Currents on Surface (<base_name>_out.tri)
|
|
bool write_AnalyticalIncidentProbes; // Output the wave at any point if the structure is freespace
|
|
bool writeWhilePade; // Output for probes(field in a point) while padé (usually false)
|
|
bool writePadeTD; //Output TD for Padé points (usually false)
|
|
|
|
bool scalbSty; // 1 if LTS off - 0 if LTS on
|
|
fp_t To; // Tdelay: Delay of excitation pulse (sec)
|
|
fp_t Tau; // Tau: Parameter for pulse width (sec)
|
|
int TimeDistFlag; // Port Waveform (0)TH (1)Gauss
|
|
int ExcitFlag; // Scattering Planewave Waveform: (0)Time Harmonic (1)Gauss (2)Neumann (3) Modulated Gauss
|
|
fp_t FinalTime; // FinalTime: Termination Time (sec)
|
|
int PolyFlag; // 12. Poly Order: (1) First order (2) Second order
|
|
fp_t SamplingRate; // samplingFreq / NyquistFreq
|
|
|
|
int* TetExcitIndexArray; // Array with the ids of the tetras that are ABC/Port
|
|
int TetExcitIndexArraySize; //Number of ports and PWs
|
|
int* ClassExcitationCount; //Size = N_class, content = number of element in that class that are PW or port
|
|
int* ClassExcitationOffset; // Offset of the number of tetras
|
|
int excitationFaces; // Number of excited faces
|
|
|
|
int** ClassExcitationArray; //Array with the arrays of ids of exitation tetras of each class
|
|
|
|
int* MapingArrayE;
|
|
int* MapingArrayH;
|
|
|
|
bool UseQuadratureMatrices;
|
|
|
|
// device pointers
|
|
#if defined(DGTD_USE_CUDA)
|
|
|
|
// Host (CPU memory)
|
|
fp_t_ts* Loc1E_h;
|
|
fp_t_ts* Loc2E_h;
|
|
fp_t_ts* Neigh1E_h;
|
|
fp_t_ts* Neigh2E_h;
|
|
|
|
fp_t_ts* Loc1H_h;
|
|
fp_t_ts* Loc2H_h;
|
|
fp_t_ts* Neigh1H_h;
|
|
fp_t_ts* Neigh2H_h;
|
|
|
|
fp_t_ts* InvE_h;
|
|
fp_t_ts* InvH_h;
|
|
|
|
fp_t_ts* En1_h;
|
|
fp_t_ts* Hn32_h;
|
|
|
|
int8_t* mapE_h;
|
|
int8_t* mapH_h;
|
|
|
|
int* NeighMap_h;
|
|
int* NeighClass_h; //number of basis functions of neighbors used (already contains TetPolyOrder)
|
|
int* NeighClassOffset_h;
|
|
int* Neighbours_h;
|
|
int* NeighboursOffset_h;
|
|
|
|
int* ExcitationFacesCnt_h;
|
|
int* ExcitationFacesOffset_h;
|
|
int* ExcitationFacesNum_h;
|
|
|
|
fp_t_ts* Z_face_pw_h;
|
|
|
|
fp_t_ts* nd_coords_tet_h;
|
|
fp_t_ts* nd_coords_face_h;
|
|
|
|
//Regular
|
|
int* classRegularTetraCnt_h; // Number of elements of each regular group per class (Reminder: 0 is irregular)
|
|
int* classIrregularTetraOffset_h; // Number of irregular tetras in the smaller classes
|
|
|
|
int* classNeighIrregular_h; // Number of neighbors of the irregular elements in each class
|
|
int* classNeighIrregularOffset_h; // Number of neighbors of the irregular elements in the smaller classes
|
|
|
|
int* classRegularGroupsCnt_h; // Number of regular groups per class
|
|
int** classRegularGroupsId_h; // The regular groups numbers for each class
|
|
int** classRegularTetraOffset_h; // Number of tetras in a class before each group (to obtain the global: add the value to the class offset)
|
|
|
|
int* classRegularPMLGroupsCnt_h; // Number of regular groups per class
|
|
int** classRegularPMLGroupsId_h; // The regular groups numbers for each class
|
|
int** classRegularPMLTetraOffset_h; // Number of tetras in a class before each group (to obtain the global: add the value to the class offset)
|
|
int** classRegularPMLTetraFaceOffset_h;
|
|
|
|
int* classRegularGroupsNeighCnt_h;
|
|
|
|
int totalRegularNeighFaceCnt;
|
|
int totalRegularPMLNeighFaceCnt;
|
|
|
|
int* nonRegularTetraCnt_h;
|
|
int* nonRegularPMLTetraCnt_h;
|
|
|
|
fp_t_ts* regularLoc1E_h;
|
|
fp_t_ts* regularLoc2E_h;
|
|
fp_t_ts* regularNeigh1E_h;
|
|
fp_t_ts* regularNeigh2E_h;
|
|
|
|
fp_t_ts* regularLoc1H_h;
|
|
fp_t_ts* regularLoc2H_h;
|
|
fp_t_ts* regularNeigh1H_h;
|
|
fp_t_ts* regularNeigh2H_h;
|
|
|
|
|
|
bool flag1;
|
|
|
|
// Device (GPU memory)
|
|
fp_t_ts* En_d;
|
|
fp_t_ts* Hn12_d;
|
|
fp_t_ts* En1_d;
|
|
fp_t_ts* Hn32_d;
|
|
|
|
fp_t_ts* Loc1E_d;
|
|
fp_t_ts* Loc2E_d;
|
|
fp_t_ts* Neigh1E_d;
|
|
fp_t_ts* Neigh2E_d;
|
|
|
|
fp_t_ts* Loc1H_d;
|
|
fp_t_ts* Loc2H_d;
|
|
fp_t_ts* Neigh1H_d;
|
|
fp_t_ts* Neigh2H_d;
|
|
|
|
int* NeighMap_d;
|
|
int* Neighbours_d;
|
|
int* NeighboursOffset_d;
|
|
|
|
fp_t_ts* InvE_d;
|
|
fp_t_ts* InvH_d;
|
|
|
|
int8_t* mapE_d;
|
|
int8_t* mapH_d;
|
|
|
|
int* ExcitationFacesCnt_d;
|
|
int* ExcitationFacesOffset_d;
|
|
int* ExcitationFacesNum_d;
|
|
|
|
fp_t_ts* nd_coords_tet_d;
|
|
fp_t_ts* nd_coords_face_d;
|
|
|
|
fp_t_ts* Z_face_pw_d;
|
|
|
|
fp_t_ts* auxFieldInput;
|
|
fp_t_ts* auxFieldOutput;
|
|
|
|
int* mapIdLoc; // it maps the real location and the id (mapIdLoc[ID] = real position)
|
|
|
|
// For Padé
|
|
int* padeFreqConstant_d;
|
|
cuDoubleComplex* tranferencePadeFunctionFD_h;
|
|
|
|
//Regular
|
|
fp_t_ts* regularLoc1E_d;
|
|
fp_t_ts* regularLoc2E_d;
|
|
fp_t_ts* regularNeigh1E_d;
|
|
fp_t_ts* regularNeigh2E_d;
|
|
|
|
fp_t_ts* regularLoc1H_d;
|
|
fp_t_ts* regularLoc2H_d;
|
|
fp_t_ts* regularNeigh1H_d;
|
|
fp_t_ts* regularNeigh2H_d;
|
|
|
|
|
|
// Regular PML
|
|
fp_t_ts* regularPMLLoc1E_h;
|
|
fp_t_ts* regularPMLLoc2E_h;
|
|
fp_t_ts* regularPMLNeigh1E_h;
|
|
fp_t_ts* regularPMLNeigh2E_h;
|
|
|
|
fp_t_ts* regularPMLLoc1H_h;
|
|
fp_t_ts* regularPMLLoc2H_h;
|
|
fp_t_ts* regularPMLNeigh1H_h;
|
|
fp_t_ts* regularPMLNeigh2H_h;
|
|
|
|
fp_t_ts* regularPMLAuxE_h;
|
|
fp_t_ts* regularPMLAuxH_h;
|
|
fp_t_ts* regularPMLLoc1M_h;
|
|
fp_t_ts* regularPMLLoc2M_h;
|
|
fp_t_ts* regularPMLLoc1J_h;
|
|
fp_t_ts* regularPMLLoc2J_h;
|
|
|
|
|
|
|
|
// PML Variables
|
|
bool PML_flag;
|
|
bool interior_excitation_flag;
|
|
fp_t PML_conductivity_order;
|
|
fp_t PML_thickness;
|
|
fp_t Ellipse_Rx, Ellipse_Ry, Ellipse_Rz;
|
|
fp_t planewave_xmin, planewave_xmax, planewave_ymin, planewave_ymax, planewave_zmin, planewave_zmax;
|
|
|
|
int* ClassPMLTetraCnt;
|
|
int* ClassExcitation_sc_CNT;
|
|
int* ClassPMLTetraOffset;
|
|
int* classPMLTetraOffset_h;
|
|
|
|
int* classRegularTetraCnt_all_h;
|
|
int* classTetraOffset_loc_h;
|
|
int* classPMLTetraOffset_loc_h;
|
|
|
|
int* classNeighOffset_loc_h;
|
|
int* classNeighPMLOffset_loc_h;
|
|
int* classNeighIrregularCnt_h;
|
|
int* classNeighPMLCnt_h;
|
|
int* classNeighOffset_h;
|
|
int* classNeighPMLOffset_h;
|
|
int* classNeighPML_h;
|
|
|
|
int* classregNeighPML_h;
|
|
int num_elements_regular_PML;
|
|
|
|
fp_t_ts* Loc1E_PML_h;
|
|
fp_t_ts* Loc2E_PML_h;
|
|
fp_t_ts* Loc1H_PML_h;
|
|
fp_t_ts* Loc2H_PML_h;
|
|
fp_t_ts* Neigh1E_PML_h;
|
|
fp_t_ts* Neigh2E_PML_h;
|
|
fp_t_ts* Neigh1H_PML_h;
|
|
fp_t_ts* Neigh2H_PML_h;
|
|
|
|
fp_t_ts* AuxE_h;
|
|
fp_t_ts* AuxH_h;
|
|
fp_t_ts* AuxM1_h;
|
|
fp_t_ts* AuxM2_h;
|
|
fp_t_ts* AuxJ1_h;
|
|
fp_t_ts* AuxJ2_h;
|
|
|
|
|
|
fp_t_ts* Loc1E_PML_d;
|
|
fp_t_ts* Loc2E_PML_d;
|
|
fp_t_ts* Loc1H_PML_d;
|
|
fp_t_ts* Loc2H_PML_d;
|
|
fp_t_ts* Neigh1E_PML_d;
|
|
fp_t_ts* Neigh2E_PML_d;
|
|
fp_t_ts* Neigh1H_PML_d;
|
|
fp_t_ts* Neigh2H_PML_d;
|
|
|
|
fp_t_ts* AuxE_d;
|
|
fp_t_ts* AuxH_d;
|
|
fp_t_ts* AuxM1_d;
|
|
fp_t_ts* AuxJ1_d;
|
|
fp_t_ts* AuxM2_d;
|
|
fp_t_ts* AuxJ2_d;
|
|
|
|
fp_t_ts* Mn_d;
|
|
fp_t_ts* Mn1_d;
|
|
fp_t_ts* Jn12_d;
|
|
fp_t_ts* Jn32_d;
|
|
|
|
fp_t_ts* r_Mn_d;
|
|
fp_t_ts* r_Mn1_d;
|
|
fp_t_ts* r_Jn12_d;
|
|
fp_t_ts* r_Jn32_d;
|
|
|
|
int PWorPort;
|
|
|
|
|
|
int numberPML;
|
|
int regularCNT_Normal;
|
|
int regularCNT_PML;
|
|
|
|
int nonregularCNT_Normal;
|
|
int nonregularCNT_PML;
|
|
|
|
int numRegPMLTetras;
|
|
int numRegTetras;
|
|
|
|
|
|
|
|
fp_t_ts* regularPMLLoc1E_d;
|
|
fp_t_ts* regularPMLLoc2E_d;
|
|
fp_t_ts* regularPMLLoc1H_d;
|
|
fp_t_ts* regularPMLLoc2H_d;
|
|
fp_t_ts* regularPMLNeigh1E_d;
|
|
fp_t_ts* regularPMLNeigh2E_d;
|
|
fp_t_ts* regularPMLNeigh1H_d;
|
|
fp_t_ts* regularPMLNeigh2H_d;
|
|
|
|
fp_t_ts* regularPMLAuxE_d;
|
|
fp_t_ts* regularPMLAuxH_d;
|
|
fp_t_ts* regularPMLLoc1M_d;
|
|
fp_t_ts* regularPMLLoc2M_d;
|
|
fp_t_ts* regularPMLLoc1J_d;
|
|
fp_t_ts* regularPMLLoc2J_d;
|
|
|
|
|
|
fp_t_ts* Etan_qp_h;
|
|
fp_t_ts* Htan_qp_h;
|
|
fp_t_ts* Etan_qp_d;
|
|
fp_t_ts* Htan_qp_d;
|
|
|
|
|
|
int* ClassPortOffset_h; // Offset of the tetras to seperate the Ports
|
|
int* ClassPortCnt_h; // Number of tetras associated to the Ports
|
|
int* ClassPortNum_h; // Port number at each offset
|
|
std::unordered_map<int,int> bcNumToPnum; // key: BCNum, value: pNum
|
|
std::unordered_map<int,int> pnumToBcNum; // key: pNum, value: BCNum
|
|
std::vector<int> portPnums; // List of port numbers in the structure
|
|
|
|
int* PortFacePidx_h; // dense port index per excitation face (-1 if not a port)
|
|
fp_t_ts* PortProbes_h; // (cx,cy,cz) per face
|
|
fp_t_ts* Etan_center_h; // (Ex,Ey,Ez) per face
|
|
fp_t_ts* Htan_center_h; // (Hx,Hy,Hz) per face
|
|
int* FaceID_excitation_h; // Face ID per excitation face
|
|
int* TetID_excitation_h; // Tet ID per excitation face
|
|
std::vector<std::pair<int, std::vector<std::pair<int, std::array<double,4>>>>> portFaceCentroid_bary;
|
|
int* PortFacePidx_d; // dense port index per excitation face (-1 if not a port)
|
|
|
|
std::vector<std::pair<int, std::vector<std::pair<int, std::array<double,4>>>>> port_bary;
|
|
int* PortProbeOffset_h;
|
|
int* PortProbeCount_h;
|
|
|
|
|
|
#endif
|
|
|
|
public:
|
|
const int TetPolyOrderDim[4] = {6, 12, 30, 45};
|
|
const int FacePolyOrderDim[4] = {3, 6, 12, 18};
|
|
|
|
const int DOFS_PER_EDGE[3] = {1, 2, 3};
|
|
const int DOFS_PER_FACE_1[3] = {0, 0, 2};
|
|
const int DOFS_PER_FACE_2[3] = {0, 0, 1};
|
|
|
|
// mapping of all the edges of a tetra
|
|
int edgeMAP[6][2] = {{0, 1}, // node 0 - node 1
|
|
{0, 2}, // node 0 - node 2
|
|
{0, 3}, // node 0 - node 3
|
|
{1, 2}, // node 1 - node 2
|
|
{1, 3}, // node 1 - node 3
|
|
{2, 3}}; // node 2 - node 3
|
|
|
|
// mapping of all the faces of a tetra
|
|
int faceMAP[4][3] = {{1, 2, 3}, // node 1 - node 2 - node 3
|
|
{0, 2, 3}, // node 0 - node 2 - node 3
|
|
{0, 1, 3}, // node 0 - node 1 - node 3
|
|
{0, 1, 2}}; // node 0 - node 1 - node 2
|
|
|
|
bool PlaneWaveBCFlag; // 1 -> There's a PW
|
|
bool InterSurfFlag;
|
|
int portCNT;
|
|
portMesh* pMeshARRAY;
|
|
bool PortBCFlag; // 1 -> There's a Port, not valid with PW at the same time
|
|
int LocPortCnt; // number of ports in the structure
|
|
fp_t scalingLength;
|
|
|
|
int PEC_PMC_port_flag; // 1 if rectangular, 0 is use port solver
|
|
|
|
|
|
|
|
|
|
// constructor
|
|
FemGrp();
|
|
~FemGrp();
|
|
|
|
// set functions
|
|
void set_fname(char* name) { fname = name; }
|
|
void set_freq(fp_t f) { freq = f; }
|
|
void set_unit(const fp_t u) { unit = u; }
|
|
void setNtimeSteps(int n) { NtimeSteps = n; }
|
|
void setFluxFactor(const fp_t fac) { factor_Flux = fac; }
|
|
|
|
void setWriteFields(bool val) { write_fields = val; }
|
|
void setWriteProbes(bool val) { write_probes = val; }
|
|
void setwriteAnalyticalIncidentProbes(bool val) { write_AnalyticalIncidentProbes = val; }
|
|
void setWriteSurfFlag(bool val) { WriteSurfFlag = val; }
|
|
|
|
void setscalbSty(bool val) { scalbSty = val; }
|
|
void setPolyFlag(int n) { PolyFlag = n; }
|
|
void setClassMul(int val) { ClassMul = val; }
|
|
void setTo(fp_t val) { To = val; }
|
|
void setTau(fp_t val) { Tau = val; }
|
|
void setTimeDist(int val) { TimeDistFlag = val; }
|
|
void setExciFlag(int val) { ExcitFlag = val; }
|
|
void setFinalTime(fp_t val) { FinalTime = val; }
|
|
void setSamplingRate(fp_t val) { SamplingRate = val; }
|
|
void setRegularRegionFlag(bool val) { regularRegionFlag = val; }
|
|
void setPade(bool val){ usePade = val; }
|
|
void setPadeTime(fp_t val){ padeTime = val; }
|
|
void setPadeCNT(int val){ padeCNT = val; }
|
|
|
|
void setEnergyDecayFactor(fp_t_ts val){ energyDecayFactor = val; }
|
|
void setNumberOfEnergyPoints(int val){ numberOfEnergyPoints = val; }
|
|
|
|
// pre-processing functions for conformal
|
|
void readNODE();
|
|
void readTETRA();
|
|
void readBC();
|
|
void readMaterial();
|
|
void readPROBE();
|
|
void readREGULAR();
|
|
|
|
void initializeMaxMinPoints();
|
|
void setMaxMinPoints(fp_t x, fp_t y, fp_t z);
|
|
|
|
void setFname(char* name) { fname = name; };
|
|
|
|
// processing procedures
|
|
void makeEdgeArray();
|
|
void makeNonConformalArray();
|
|
void makeFaceArray();
|
|
|
|
// post-processing procedures
|
|
void writeFieldProbe(int timeStep);
|
|
void writeFieldProbeCuBLAS(int timeStep);
|
|
void writeFieldGlobalCuBLAS(int timeStep);
|
|
bool checkEnergyDecay();
|
|
|
|
|
|
void prepPortPROBE();
|
|
|
|
void writeFieldGlobal(int timeStep);
|
|
void writeEquivalentSurfaceCurrents_(int timeStep);
|
|
void writeFieldProbeAfterPade(int tsSize);
|
|
|
|
void writeAnalyticalIncidentPWProbes(int timeStep);
|
|
void getAnalyticalPWField(tetra& tet, vtr& r, vtr& Einc, vtr& Hinc, int timeStep, fp_t dt);
|
|
|
|
void printRegister(Register* regMface, Register* regJface, int FaceCnt, char* prjName, int order);
|
|
void printTriMesh(int ndNum, node** ndArray, int fcNum, face** fcArray, char* prjName);
|
|
|
|
// procedures that are RCS-related
|
|
void makePlaneWaveMesh();
|
|
void AssignExcitParamToFace();
|
|
void makeInterSurfMesh(int);
|
|
void makeInterSurfMesh(int, int);
|
|
void makeSurfMesh(int);
|
|
void makeSurfMesh(int, int);
|
|
|
|
// procedures for port
|
|
void makePortMeshes();
|
|
void solveWaveguidePorts();
|
|
void WriteWaveguidePortFields();
|
|
void AssignPortFieldsInFaces();
|
|
void EvaluateSparametersGlobal(int, fp_t, bool);
|
|
|
|
void Generate_PEC_PML_Port();
|
|
void AssignPortFieldsInFaces_TEM();
|
|
|
|
|
|
// get functions
|
|
int getbcCnt() const { return bcCNT; }
|
|
int gettetraCnt() const { return tetraCNT; }
|
|
int getNCCnt() const { return nonConformalCNT; }
|
|
bool getNonConformal() const { return nonConformalCase;}
|
|
char* getfname() const { return fname; }
|
|
tetra* getTetraArray() const { return tetARRAY; }
|
|
face** getFaceArray() const { return faceARRAY; }
|
|
edge** getEdgeArray() const { return edgeARRAY; }
|
|
node* getNodeArray() const { return ndARRAY; }
|
|
fp_t getFreq() const { return freq; }
|
|
int getClassMul() const { return ClassMul; }
|
|
bc* getbcPtr(int);
|
|
fp_t getFinalTime() const { return FinalTime; }
|
|
fp_t getTo() const { return To; }
|
|
fp_t getTau() const { return Tau; }
|
|
int getTimeDist() const { return TimeDistFlag; }
|
|
int getExciFlag() const { return ExcitFlag; }
|
|
int getPolyFlag() const { return PolyFlag; }
|
|
int getDimE() const { return dimE; }
|
|
int getDimH() const { return dimH; }
|
|
bool getRegularRegionFlag() const { return regularRegionFlag; }
|
|
bool getUsePade() const { return usePade; }
|
|
|
|
// Added for DG
|
|
void AssignMaterialProperties();
|
|
void AssignTetraFlags();
|
|
void EvaluateTimeStep();
|
|
void GetMatrices();
|
|
void Get_dt_min_max();
|
|
void LocalTimeSteppingClassPartioning();
|
|
|
|
// procedures that correspond to assemble the global matrices
|
|
void DG_AssignOffsets();
|
|
|
|
// Post Processing
|
|
void Get_Coefficients_(tetra* tet, ArrayFP<fp_t>* origEn_1, ArrayFP<fp_t>* origHn_32);
|
|
|
|
// FFT
|
|
void Write_TD_Data(int tsPerSample, int ntimeSteps);
|
|
|
|
// L2 Error calculation
|
|
void GetTetQuadRule(int PolyOrder, int& points, fp_t**, fp_t*);
|
|
void CalculateL2Error(int& timeStep, fp_t dt, int TimeDistFlag);
|
|
void CalculateL2ErrorProbes(int& timeStep, fp_t dt, int TimeDistFlag);
|
|
void GetExactSolution(tetra& tet, vtr& r, vtr&, vtr&, int, fp_t, int Flag);
|
|
void SimplexToCartesian(tetra& tet, vtr& r, fp_t zeta[4]);
|
|
|
|
// Free GPU
|
|
void FreeGPU();
|
|
|
|
// Pade Acceleration
|
|
bool calculatePade(int currentTimeStep);
|
|
void calculatePadeEnd(int currentTimeStep);
|
|
void getPadeCoef(fp_t* a_k, fp_t* b_k, fp_t_ts* field, int N, int component, fp_t_ts* maxField);
|
|
void getPadeFreq(int N, int tsPerSample);
|
|
void getPadeIFFTEnd(int probe, Complex* fDomainField);
|
|
void getPadeIFFT(int probe, Complex* fDomainField);
|
|
void testEnd();
|
|
fp_t getFreqDomainPade(fp_t* a_k, fp_t* b_k, int M_global, int N, Complex* field, int component, int probe, bool firstValue);
|
|
void getSourceTimeDomain(int timeStep, fp_t* Einc, int ExcitFlag);
|
|
|
|
void calculatePadeFreqDomain(int currentTimeStep);
|
|
void getPadeIFFT(int probe, cuDoubleComplex* fDomainField);
|
|
|
|
void getPadeCoefCUDA(fp_t* a_k, fp_t* b_k, fp_t* maxField, int local_id, cudaStream_t stream, int currentTimeStep);
|
|
void getFreqDomainPadeCUDA(fp_t* a_k, fp_t* b_k, int M_global, int N, cuDoubleComplex* H_f, cudaStream_t stream);
|
|
void calculatePadeEndCUDA(int currentTimeStep);
|
|
bool calculatePadeCUDA(int currentTimeStep, bool isFirst, bool isEnd);
|
|
bool studyPadeConvergence(cuDoubleComplex* oldField, cuDoubleComplex* newField, fp_t* maxFields, int totalPoints, int point);
|
|
void printFD(int probe, cuDoubleComplex* fDomainField);
|
|
|
|
// Matrix Free Implementation
|
|
void SetUpMatrixVector();
|
|
|
|
// Local Time Stepping Functions Matrix Free Implementation
|
|
void ComputeE_MatrixFree(int, fp_t);
|
|
void ComputeH_MatrixFree(int, fp_t);
|
|
void LeapFrogE(int, fp_t);
|
|
void LeapFrogH(int, fp_t);
|
|
void LTS_TimeUpdateGlobal_MatrixFree();
|
|
|
|
void numberDofs();
|
|
|
|
// DEVICE
|
|
|
|
void PrepareGPUcuBLAS();
|
|
void LE_CuBLAS(cublasHandle_t handle, int class_i);
|
|
void LH_CuBLAS(cublasHandle_t handle, int class_i);
|
|
void TimeSteppingCuBLAS();
|
|
void ComputeE_cuBLAS(cublasHandle_t handle, int class_i);
|
|
void ComputeH_cuBLAS(cublasHandle_t handle, int class_i);
|
|
|
|
|
|
// QIJIAN: output Mesh
|
|
OutputSurfMesh outputMesh;
|
|
Octree octree_object;
|
|
|
|
// Vector of tetra ids and barycentric coordinates for the probes
|
|
// Each probes can be associated with multiple tetrahedral
|
|
// Format: (number of tetra ,(tetra id, barycentric coordinates))
|
|
void initializeOctree(std::string prjName, bool non_Conformal_flag);
|
|
std::vector<std::pair<int, std::vector<std::pair<int, std::array<double, 4>>>>> tri_nodes_bary;
|
|
|
|
// vector<pair<int, array<double, 4>>> probes_bary; // 1: tet_id where the probe point is located 2: barycentric coords
|
|
// Modified by qi jian
|
|
std::vector<std::pair<int, std::vector<std::pair<int, std::array<double, 4>>>>> probes_bary; // 1: tet_id where the probe point is located 2: barycentric coords
|
|
void writePortIncidentProbeCuBLAS(int timeStep);
|
|
|
|
|
|
void makeOutputSurfMesh(std::string prjName);
|
|
void computeBarycentricEmbedding();
|
|
|
|
|
|
void writeCurrentsOutputSurfMesh_CuBLAS(int timeStep);
|
|
|
|
|
|
|
|
void writeProbeFieldsCSV(
|
|
const std::string& outputDir, // e.g. "./PROBES1"
|
|
const std::string& fname, // simulation/project name
|
|
int timeStep, // timestep number
|
|
const std::vector<int>& node_ids, // node IDs to write
|
|
const std::vector<vtr>& Efield, // electric field vectors
|
|
const std::vector<vtr>& Hfield // magnetic field vectors
|
|
);
|
|
|
|
void prepPortFaceCentroidPROBE();
|
|
void writePortFieldProbeCuBLAS(int timeStep);
|
|
|
|
private:
|
|
int bcArrange(int);
|
|
void readBcMap();
|
|
};
|
|
#endif |