// FEMGRP.H definition of class FemGrp #ifndef FEMGRP_H #define FEMGRP_H #include "../dgtd-performance.hpp" #include "MeshPartition_METIS5.h" #include "array.h" #include "bc.h" #include "clipper.h" #include "coord.h" #include "counter.h" #include "cxxtimer.hpp" #include "densemat.h" #include "edge.h" #include "face.h" #include "gpc.h" #include "material.h" #include "node.h" #include "partition.h" #include "path.h" #include "plane_wave_mesh.h" #include "portmesh.h" #include "register.h" #include "sparmat.h" #include "tetra.h" #include "tree.h" #include "fftw3.h" #include #include #include #include #include "OutputSurfMesh.h" // By Qi Jian #include "Octree.h" // By Qi Jian #if defined(DGTD_USE_CUDA) #include #include #include #endif #ifdef WIN32 #include #else #include #endif #if defined(DGTD_USE_OPENCL) #include #define CPU "11th Gen Intel(R) Core(TM) i9-11980HK @ 2.60GHz" #define INTEL_GPU "Intel(R) OpenCL HD Graphics " #define NVIDIA_GPU "NVIDIA CUDA" #define HYBRID_GPU "Intel(R) OpenCL HD Graphics + NVIDIA CUDA" #endif Material getMaterialProp(Material*, tetra*, fp_t); vtr CalcEfield(double*, vtr*, fp_t, fp_t*, int); vtr CalcEfield(float*, vtr*, fp_t, fp_t*, int); void testVector(fp_t*, vtr, vtr*, fp_t, fp_t*, int); int bcTypeConvert(char*); void MapPrism(int*, int*, int, int*, int, int, int, int, int*); typedef struct coupInfo { int nei_subd_id; int my_dof_id; int nei_dof_id; } coupInfo; class FemGrp { friend class Octree; // Added by Qi Jian friend class FemGrpBalancing; // added Feb 3 2012 friend Material getMaterialProp(Material*, tetra*, fp_t); friend vtr CalcEfield(float*, vtr*, fp_t, fp_t*, int); friend vtr CalcEfield(double*, vtr*, fp_t, fp_t*, int); friend void testVector(fp_t*, vtr, vtr*, fp_t, fp_t*, int); protected: int nodeCNT; // numeber of nodes int edgeCNT; // number of edges (without repeted edges) int faceCNT; // number of faces int tetraCNT; // number of tetras int probeCNT; // number of points to print the fields int bcCNT; // number of boundary conditions bool regularRegionFlag; int regularCNT; // number of diferent types of tetra in regular areas int regularTetraCNT; // number of tetras that are regular int nonConformalCNT; //number of non-Conformal tetras int totalObjNum; // number of material objects, at least one, default is freespace int* bcMap; // array with the correlation between the order of the boundary conditions and the bcmap numbers char* fname; // File name fp_t unit; fp_t freq; // Freq: Central Modulation Frequency (MHz) bool nonConformalCase; // nonConformal int neighCNT; //Number of neighbours int noMatrixTetras; // Number of tetras that uses the matrices of a reference tetra bool usePade; //True if the calculation is going to be shortened by using Pade Acceleration int padeCNT; //Number of points in the probe file (from the beginning) that are going to be use in the pade aproximation fp_t padeTime; // Time when to perform Padé (it will be performed at the end of the Pade period that contains that time) vtr maxPoint; // (Max value of x, Max value of y, Max value of z) vtr minPoint; // (min value of x, min value of y, min value of z) fp_t_ts* fieldProbes; // fieldProbes[nProbes][nSamplings][components] it stores the fields in time domain calculated by DGTD Complex* tranferencePadeFunctionFD; // tranferencePadeFunctionFD[nProbes][nSamplings][components][fields] it stores the transference function in freq domain stimated by pade H(f) = Y(f)/X(f) fp_t_ts fieldEnergy; // Current field energy accumulated through NumOfSampleEnergyCheck fp_t_ts maxFieldEnergy; // Maximum Energy average from all the probes and NumOfSampleEnergyCheck fp_t_ts energyDecayFactor; // Current Avg Energy / Max Avg Energy to finish int numberOfEnergyPoints; // Number of points that are used to calculate the energy convergence (starting from the top) int tsPerSampling; //time steps per sampling int tsPerPade; //time steps per pade approx int tsSource; //length of the signal Complex* padeFreqs; //exp(-j * 2pi * freq fft * dt_pade) int* padeFreqConstant; // X[m \Delta f] (the values of m) [(0 -> M/2) (-M/2 -> -1)] Complex* sourceFreqDomain; fp_t* sourceTimeDomain; tetra* tetARRAY; // array with all the tetras node* ndARRAY; // array with all the nodes //ndARRAY = new node[nodeCNT]; bc* bcARRAY; // array with all the boundary conditions //bcARRAY = new bc[bcCNT] int* ncARRAY; // array with the index of the tetras that at least one of their faces are non-conformal int* regularReferenceARRAY; // array with the cnt value of the tetra that is going to be the reference for each tetra int* regionARRAY; // array with the cnt of the reference of each regular group (location 0 will be -1) *** NOTE: it won't exist if there isn't regular file *** edge** edgeARRAY; // array with all the edges ordered by getGlobalCnt face** faceARRAY; // array with all the faces ordered by cnt Material* objProp; // array of the materials in the structure Coordinate Coord; RegisterArray regE; RegisterArray regH; Register* regMface; Register* regJface; Register* regEface; Register* regHface; PlaneWaveMesh* planeWaveMesh; PlaneWaveMesh* InterSurfMesh; PlaneWaveMesh* SurfMesh; // Surface planewave mesh set* edgeSetPtr; // set with all the edges which will be deleted after mapping all the faces set* faceSetPtr; // set with all the faces which will be deleted after mapping all the faces // added for DG int N_class; //Number of LTS classes int NtimeSteps; int Nstart; int* LocalExciIndexE; //Time step each class is at an execution moment when the value is cheacked (E field) int* LocalExciIndexH; //Time step each class is at an execution moment when the value is cheacked (H field) fp_t TimeStep_dt; int ClassMul; //m in LTS formula (number of timesteps difference between classes (2m+1)^k) double* LocTimeSteps; // dt per class int* ClassTetraCnt; //Number of tetras in the LTS class int* ClassTetraOffset; //Number of tetras in smaller dt classes int** ClassTetraIndex; //Array of the array of indexes of the tetra per Class int** ClassTetraOrderedCNT; // Number of Conformal - NonConformal - Reg0 - Reg1 - ... Per Class int** ClassTetraOrderedOffset; double dt_min; double dt_max; size_t dimE; // addition of all the LocalEDOF size_t dimH; // addition of all the LocalHDOF size_t DimE; // DimE = dimE size_t DimH; // DimH = dimH size_t MemSizeE; // DimE * sizeof(fp_t or fp_t_ts); depends on CPU or GPU size_t MemSizeH; // DimH * sizeof(fp_t or fp_t_ts); depends on CPU or GPU ArrayFP* en; // ArrayFP(DimE); ArrayFP* hn_12; // ArrayFP(DimH) ArrayFP* en_1; // ArrayFP(DimE); ArrayFP* hn_32; // ArrayFP(DimH) fp_t factor_Flux; // Gamma(penalty) -> 0 central, other number = flux factor bool write_fields; // Output for fields of the full structure bool write_probes; // Output for probes(field in a point) bool WriteSurfFlag; // Flag to Write Fields / Currents on Surface (_out.tri) bool write_AnalyticalIncidentProbes; // Output the wave at any point if the structure is freespace bool writeWhilePade; // Output for probes(field in a point) while padé (usually false) bool writePadeTD; //Output TD for Padé points (usually false) bool scalbSty; // 1 if LTS off - 0 if LTS on fp_t To; // Tdelay: Delay of excitation pulse (sec) fp_t Tau; // Tau: Parameter for pulse width (sec) int TimeDistFlag; // Port Waveform (0)TH (1)Gauss int ExcitFlag; // Scattering Planewave Waveform: (0)Time Harmonic (1)Gauss (2)Neumann (3) Modulated Gauss fp_t FinalTime; // FinalTime: Termination Time (sec) int PolyFlag; // 12. Poly Order: (1) First order (2) Second order fp_t SamplingRate; // samplingFreq / NyquistFreq int* TetExcitIndexArray; // Array with the ids of the tetras that are ABC/Port int TetExcitIndexArraySize; //Number of ports and PWs int* ClassExcitationCount; //Size = N_class, content = number of element in that class that are PW or port int* ClassExcitationOffset; // Offset of the number of tetras int excitationFaces; // Number of excited faces int** ClassExcitationArray; //Array with the arrays of ids of exitation tetras of each class int* MapingArrayE; int* MapingArrayH; bool UseQuadratureMatrices; // device pointers #if defined(DGTD_USE_CUDA) // Host (CPU memory) fp_t_ts* Loc1E_h; fp_t_ts* Loc2E_h; fp_t_ts* Neigh1E_h; fp_t_ts* Neigh2E_h; fp_t_ts* Loc1H_h; fp_t_ts* Loc2H_h; fp_t_ts* Neigh1H_h; fp_t_ts* Neigh2H_h; fp_t_ts* InvE_h; fp_t_ts* InvH_h; fp_t_ts* En1_h; fp_t_ts* Hn32_h; int8_t* mapE_h; int8_t* mapH_h; int* NeighMap_h; int* NeighClass_h; //number of basis functions of neighbors used (already contains TetPolyOrder) int* NeighClassOffset_h; int* Neighbours_h; int* NeighboursOffset_h; int* ExcitationFacesCnt_h; int* ExcitationFacesOffset_h; int* ExcitationFacesNum_h; fp_t_ts* Z_face_pw_h; fp_t_ts* nd_coords_tet_h; fp_t_ts* nd_coords_face_h; //Regular int* classRegularTetraCnt_h; // Number of elements of each regular group per class (Reminder: 0 is irregular) int* classIrregularTetraOffset_h; // Number of irregular tetras in the smaller classes int* classNeighIrregular_h; // Number of neighbors of the irregular elements in each class int* classNeighIrregularOffset_h; // Number of neighbors of the irregular elements in the smaller classes int* classRegularGroupsCnt_h; // Number of regular groups per class int** classRegularGroupsId_h; // The regular groups numbers for each class int** classRegularTetraOffset_h; // Number of tetras in a class before each group (to obtain the global: add the value to the class offset) int* classRegularPMLGroupsCnt_h; // Number of regular groups per class int** classRegularPMLGroupsId_h; // The regular groups numbers for each class int** classRegularPMLTetraOffset_h; // Number of tetras in a class before each group (to obtain the global: add the value to the class offset) int** classRegularPMLTetraFaceOffset_h; int* classRegularGroupsNeighCnt_h; int totalRegularNeighFaceCnt; int totalRegularPMLNeighFaceCnt; int* nonRegularTetraCnt_h; int* nonRegularPMLTetraCnt_h; fp_t_ts* regularLoc1E_h; fp_t_ts* regularLoc2E_h; fp_t_ts* regularNeigh1E_h; fp_t_ts* regularNeigh2E_h; fp_t_ts* regularLoc1H_h; fp_t_ts* regularLoc2H_h; fp_t_ts* regularNeigh1H_h; fp_t_ts* regularNeigh2H_h; bool flag1; // Device (GPU memory) fp_t_ts* En_d; fp_t_ts* Hn12_d; fp_t_ts* En1_d; fp_t_ts* Hn32_d; fp_t_ts* Loc1E_d; fp_t_ts* Loc2E_d; fp_t_ts* Neigh1E_d; fp_t_ts* Neigh2E_d; fp_t_ts* Loc1H_d; fp_t_ts* Loc2H_d; fp_t_ts* Neigh1H_d; fp_t_ts* Neigh2H_d; int* NeighMap_d; int* Neighbours_d; int* NeighboursOffset_d; fp_t_ts* InvE_d; fp_t_ts* InvH_d; int8_t* mapE_d; int8_t* mapH_d; int* ExcitationFacesCnt_d; int* ExcitationFacesOffset_d; int* ExcitationFacesNum_d; fp_t_ts* nd_coords_tet_d; fp_t_ts* nd_coords_face_d; fp_t_ts* Z_face_pw_d; fp_t_ts* auxFieldInput; fp_t_ts* auxFieldOutput; int* mapIdLoc; // it maps the real location and the id (mapIdLoc[ID] = real position) // For Padé int* padeFreqConstant_d; cuDoubleComplex* tranferencePadeFunctionFD_h; //Regular fp_t_ts* regularLoc1E_d; fp_t_ts* regularLoc2E_d; fp_t_ts* regularNeigh1E_d; fp_t_ts* regularNeigh2E_d; fp_t_ts* regularLoc1H_d; fp_t_ts* regularLoc2H_d; fp_t_ts* regularNeigh1H_d; fp_t_ts* regularNeigh2H_d; // Regular PML fp_t_ts* regularPMLLoc1E_h; fp_t_ts* regularPMLLoc2E_h; fp_t_ts* regularPMLNeigh1E_h; fp_t_ts* regularPMLNeigh2E_h; fp_t_ts* regularPMLLoc1H_h; fp_t_ts* regularPMLLoc2H_h; fp_t_ts* regularPMLNeigh1H_h; fp_t_ts* regularPMLNeigh2H_h; fp_t_ts* regularPMLAuxE_h; fp_t_ts* regularPMLAuxH_h; fp_t_ts* regularPMLLoc1M_h; fp_t_ts* regularPMLLoc2M_h; fp_t_ts* regularPMLLoc1J_h; fp_t_ts* regularPMLLoc2J_h; // PML Variables bool PML_flag; bool interior_excitation_flag; fp_t PML_conductivity_order; fp_t PML_thickness; fp_t Ellipse_Rx, Ellipse_Ry, Ellipse_Rz; fp_t planewave_xmin, planewave_xmax, planewave_ymin, planewave_ymax, planewave_zmin, planewave_zmax; int* ClassPMLTetraCnt; int* ClassExcitation_sc_CNT; int* ClassPMLTetraOffset; int* classPMLTetraOffset_h; int* classRegularTetraCnt_all_h; int* classTetraOffset_loc_h; int* classPMLTetraOffset_loc_h; int* classNeighOffset_loc_h; int* classNeighPMLOffset_loc_h; int* classNeighIrregularCnt_h; int* classNeighPMLCnt_h; int* classNeighOffset_h; int* classNeighPMLOffset_h; int* classNeighPML_h; int* classregNeighPML_h; int num_elements_regular_PML; fp_t_ts* Loc1E_PML_h; fp_t_ts* Loc2E_PML_h; fp_t_ts* Loc1H_PML_h; fp_t_ts* Loc2H_PML_h; fp_t_ts* Neigh1E_PML_h; fp_t_ts* Neigh2E_PML_h; fp_t_ts* Neigh1H_PML_h; fp_t_ts* Neigh2H_PML_h; fp_t_ts* AuxE_h; fp_t_ts* AuxH_h; fp_t_ts* AuxM1_h; fp_t_ts* AuxM2_h; fp_t_ts* AuxJ1_h; fp_t_ts* AuxJ2_h; fp_t_ts* Loc1E_PML_d; fp_t_ts* Loc2E_PML_d; fp_t_ts* Loc1H_PML_d; fp_t_ts* Loc2H_PML_d; fp_t_ts* Neigh1E_PML_d; fp_t_ts* Neigh2E_PML_d; fp_t_ts* Neigh1H_PML_d; fp_t_ts* Neigh2H_PML_d; fp_t_ts* AuxE_d; fp_t_ts* AuxH_d; fp_t_ts* AuxM1_d; fp_t_ts* AuxJ1_d; fp_t_ts* AuxM2_d; fp_t_ts* AuxJ2_d; fp_t_ts* Mn_d; fp_t_ts* Mn1_d; fp_t_ts* Jn12_d; fp_t_ts* Jn32_d; fp_t_ts* r_Mn_d; fp_t_ts* r_Mn1_d; fp_t_ts* r_Jn12_d; fp_t_ts* r_Jn32_d; int PWorPort; int numberPML; int regularCNT_Normal; int regularCNT_PML; int nonregularCNT_Normal; int nonregularCNT_PML; int numRegPMLTetras; int numRegTetras; fp_t_ts* regularPMLLoc1E_d; fp_t_ts* regularPMLLoc2E_d; fp_t_ts* regularPMLLoc1H_d; fp_t_ts* regularPMLLoc2H_d; fp_t_ts* regularPMLNeigh1E_d; fp_t_ts* regularPMLNeigh2E_d; fp_t_ts* regularPMLNeigh1H_d; fp_t_ts* regularPMLNeigh2H_d; fp_t_ts* regularPMLAuxE_d; fp_t_ts* regularPMLAuxH_d; fp_t_ts* regularPMLLoc1M_d; fp_t_ts* regularPMLLoc2M_d; fp_t_ts* regularPMLLoc1J_d; fp_t_ts* regularPMLLoc2J_d; fp_t_ts* Etan_qp_h; fp_t_ts* Htan_qp_h; fp_t_ts* Etan_qp_d; fp_t_ts* Htan_qp_d; int* ClassPortOffset_h; // Offset of the tetras to seperate the Ports int* ClassPortCnt_h; // Number of tetras associated to the Ports int* ClassPortNum_h; // Port number at each offset std::unordered_map bcNumToPnum; // key: BCNum, value: pNum std::unordered_map pnumToBcNum; // key: pNum, value: BCNum std::vector portPnums; // List of port numbers in the structure int* PortFacePidx_h; // dense port index per excitation face (-1 if not a port) fp_t_ts* PortProbes_h; // (cx,cy,cz) per face fp_t_ts* Etan_center_h; // (Ex,Ey,Ez) per face fp_t_ts* Htan_center_h; // (Hx,Hy,Hz) per face int* FaceID_excitation_h; // Face ID per excitation face int* TetID_excitation_h; // Tet ID per excitation face std::vector>>>> portFaceCentroid_bary; int* PortFacePidx_d; // dense port index per excitation face (-1 if not a port) std::vector>>>> port_bary; int* PortProbeOffset_h; int* PortProbeCount_h; #endif public: const int TetPolyOrderDim[4] = {6, 12, 30, 45}; const int FacePolyOrderDim[4] = {3, 6, 12, 18}; const int DOFS_PER_EDGE[3] = {1, 2, 3}; const int DOFS_PER_FACE_1[3] = {0, 0, 2}; const int DOFS_PER_FACE_2[3] = {0, 0, 1}; // mapping of all the edges of a tetra int edgeMAP[6][2] = {{0, 1}, // node 0 - node 1 {0, 2}, // node 0 - node 2 {0, 3}, // node 0 - node 3 {1, 2}, // node 1 - node 2 {1, 3}, // node 1 - node 3 {2, 3}}; // node 2 - node 3 // mapping of all the faces of a tetra int faceMAP[4][3] = {{1, 2, 3}, // node 1 - node 2 - node 3 {0, 2, 3}, // node 0 - node 2 - node 3 {0, 1, 3}, // node 0 - node 1 - node 3 {0, 1, 2}}; // node 0 - node 1 - node 2 bool PlaneWaveBCFlag; // 1 -> There's a PW bool InterSurfFlag; int portCNT; portMesh* pMeshARRAY; bool PortBCFlag; // 1 -> There's a Port, not valid with PW at the same time int LocPortCnt; // number of ports in the structure fp_t scalingLength; int PEC_PMC_port_flag; // 1 if rectangular, 0 is use port solver // constructor FemGrp(); ~FemGrp(); // set functions void set_fname(char* name) { fname = name; } void set_freq(fp_t f) { freq = f; } void set_unit(const fp_t u) { unit = u; } void setNtimeSteps(int n) { NtimeSteps = n; } void setFluxFactor(const fp_t fac) { factor_Flux = fac; } void setWriteFields(bool val) { write_fields = val; } void setWriteProbes(bool val) { write_probes = val; } void setwriteAnalyticalIncidentProbes(bool val) { write_AnalyticalIncidentProbes = val; } void setWriteSurfFlag(bool val) { WriteSurfFlag = val; } void setscalbSty(bool val) { scalbSty = val; } void setPolyFlag(int n) { PolyFlag = n; } void setClassMul(int val) { ClassMul = val; } void setTo(fp_t val) { To = val; } void setTau(fp_t val) { Tau = val; } void setTimeDist(int val) { TimeDistFlag = val; } void setExciFlag(int val) { ExcitFlag = val; } void setFinalTime(fp_t val) { FinalTime = val; } void setSamplingRate(fp_t val) { SamplingRate = val; } void setRegularRegionFlag(bool val) { regularRegionFlag = val; } void setPade(bool val){ usePade = val; } void setPadeTime(fp_t val){ padeTime = val; } void setPadeCNT(int val){ padeCNT = val; } void setEnergyDecayFactor(fp_t_ts val){ energyDecayFactor = val; } void setNumberOfEnergyPoints(int val){ numberOfEnergyPoints = val; } // pre-processing functions for conformal void readNODE(); void readTETRA(); void readBC(); void readMaterial(); void readPROBE(); void readREGULAR(); void initializeMaxMinPoints(); void setMaxMinPoints(fp_t x, fp_t y, fp_t z); void setFname(char* name) { fname = name; }; // processing procedures void makeEdgeArray(); void makeNonConformalArray(); void makeFaceArray(); // post-processing procedures void writeFieldProbe(int timeStep); void writeFieldProbeCuBLAS(int timeStep); void writeFieldGlobalCuBLAS(int timeStep); bool checkEnergyDecay(); void prepPortPROBE(); void writeFieldGlobal(int timeStep); void writeEquivalentSurfaceCurrents_(int timeStep); void writeFieldProbeAfterPade(int tsSize); void writeAnalyticalIncidentPWProbes(int timeStep); void getAnalyticalPWField(tetra& tet, vtr& r, vtr& Einc, vtr& Hinc, int timeStep, fp_t dt); void printRegister(Register* regMface, Register* regJface, int FaceCnt, char* prjName, int order); void printTriMesh(int ndNum, node** ndArray, int fcNum, face** fcArray, char* prjName); // procedures that are RCS-related void makePlaneWaveMesh(); void AssignExcitParamToFace(); void makeInterSurfMesh(int); void makeInterSurfMesh(int, int); void makeSurfMesh(int); void makeSurfMesh(int, int); // procedures for port void makePortMeshes(); void solveWaveguidePorts(); void WriteWaveguidePortFields(); void AssignPortFieldsInFaces(); void EvaluateSparametersGlobal(int, fp_t, bool); void Generate_PEC_PML_Port(); void AssignPortFieldsInFaces_TEM(); // get functions int getbcCnt() const { return bcCNT; } int gettetraCnt() const { return tetraCNT; } int getNCCnt() const { return nonConformalCNT; } bool getNonConformal() const { return nonConformalCase;} char* getfname() const { return fname; } tetra* getTetraArray() const { return tetARRAY; } face** getFaceArray() const { return faceARRAY; } edge** getEdgeArray() const { return edgeARRAY; } node* getNodeArray() const { return ndARRAY; } fp_t getFreq() const { return freq; } int getClassMul() const { return ClassMul; } bc* getbcPtr(int); fp_t getFinalTime() const { return FinalTime; } fp_t getTo() const { return To; } fp_t getTau() const { return Tau; } int getTimeDist() const { return TimeDistFlag; } int getExciFlag() const { return ExcitFlag; } int getPolyFlag() const { return PolyFlag; } int getDimE() const { return dimE; } int getDimH() const { return dimH; } bool getRegularRegionFlag() const { return regularRegionFlag; } bool getUsePade() const { return usePade; } // Added for DG void AssignMaterialProperties(); void AssignTetraFlags(); void EvaluateTimeStep(); void GetMatrices(); void Get_dt_min_max(); void LocalTimeSteppingClassPartioning(); // procedures that correspond to assemble the global matrices void DG_AssignOffsets(); // Post Processing void Get_Coefficients_(tetra* tet, ArrayFP* origEn_1, ArrayFP* origHn_32); // FFT void Write_TD_Data(int tsPerSample, int ntimeSteps); // L2 Error calculation void GetTetQuadRule(int PolyOrder, int& points, fp_t**, fp_t*); void CalculateL2Error(int& timeStep, fp_t dt, int TimeDistFlag); void CalculateL2ErrorProbes(int& timeStep, fp_t dt, int TimeDistFlag); void GetExactSolution(tetra& tet, vtr& r, vtr&, vtr&, int, fp_t, int Flag); void SimplexToCartesian(tetra& tet, vtr& r, fp_t zeta[4]); // Free GPU void FreeGPU(); // Pade Acceleration bool calculatePade(int currentTimeStep); void calculatePadeEnd(int currentTimeStep); void getPadeCoef(fp_t* a_k, fp_t* b_k, fp_t_ts* field, int N, int component, fp_t_ts* maxField); void getPadeFreq(int N, int tsPerSample); void getPadeIFFTEnd(int probe, Complex* fDomainField); void getPadeIFFT(int probe, Complex* fDomainField); void testEnd(); fp_t getFreqDomainPade(fp_t* a_k, fp_t* b_k, int M_global, int N, Complex* field, int component, int probe, bool firstValue); void getSourceTimeDomain(int timeStep, fp_t* Einc, int ExcitFlag); void calculatePadeFreqDomain(int currentTimeStep); void getPadeIFFT(int probe, cuDoubleComplex* fDomainField); void getPadeCoefCUDA(fp_t* a_k, fp_t* b_k, fp_t* maxField, int local_id, cudaStream_t stream, int currentTimeStep); void getFreqDomainPadeCUDA(fp_t* a_k, fp_t* b_k, int M_global, int N, cuDoubleComplex* H_f, cudaStream_t stream); void calculatePadeEndCUDA(int currentTimeStep); bool calculatePadeCUDA(int currentTimeStep, bool isFirst, bool isEnd); bool studyPadeConvergence(cuDoubleComplex* oldField, cuDoubleComplex* newField, fp_t* maxFields, int totalPoints, int point); void printFD(int probe, cuDoubleComplex* fDomainField); // Matrix Free Implementation void SetUpMatrixVector(); // Local Time Stepping Functions Matrix Free Implementation void ComputeE_MatrixFree(int, fp_t); void ComputeH_MatrixFree(int, fp_t); void LeapFrogE(int, fp_t); void LeapFrogH(int, fp_t); void LTS_TimeUpdateGlobal_MatrixFree(); void numberDofs(); // DEVICE void PrepareGPUcuBLAS(); void LE_CuBLAS(cublasHandle_t handle, int class_i); void LH_CuBLAS(cublasHandle_t handle, int class_i); void TimeSteppingCuBLAS(); void ComputeE_cuBLAS(cublasHandle_t handle, int class_i); void ComputeH_cuBLAS(cublasHandle_t handle, int class_i); // QIJIAN: output Mesh OutputSurfMesh outputMesh; Octree octree_object; // Vector of tetra ids and barycentric coordinates for the probes // Each probes can be associated with multiple tetrahedral // Format: (number of tetra ,(tetra id, barycentric coordinates)) void initializeOctree(std::string prjName, bool non_Conformal_flag); std::vector>>>> tri_nodes_bary; // vector>> probes_bary; // 1: tet_id where the probe point is located 2: barycentric coords // Modified by qi jian std::vector>>>> probes_bary; // 1: tet_id where the probe point is located 2: barycentric coords void writePortIncidentProbeCuBLAS(int timeStep); void makeOutputSurfMesh(std::string prjName); void computeBarycentricEmbedding(); void writeCurrentsOutputSurfMesh_CuBLAS(int timeStep); void writeProbeFieldsCSV( const std::string& outputDir, // e.g. "./PROBES1" const std::string& fname, // simulation/project name int timeStep, // timestep number const std::vector& node_ids, // node IDs to write const std::vector& Efield, // electric field vectors const std::vector& Hfield // magnetic field vectors ); void prepPortFaceCentroidPROBE(); void writePortFieldProbeCuBLAS(int timeStep); private: int bcArrange(int); void readBcMap(); }; #endif