This repository serve as a backup for my Maxwell-TD code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1889 lines
59 KiB

//
// Created by jundafeng on 3/23/22.
//
#ifndef DGTD_KERNELS_CU
#define DGTD_KERNELS_CU
#include "kernels.cuh"
/* Basic kernel: one tetra per thread
* Opt1: one block per tetra, one entry per thread, registers can be shuffled.
* New kernels can be parallelized in a smarter way:
* 1. volume integral can be parallelized in (12,12)
* 2. surface integral can be parallelized in (6,6)
* 3. coupling integral can be parallelized in (6,6)
* 4. excitation can be parallelized in (12,1)
*/
//-----------------------CONSTANTS--------------------------------------------------------------------------------------------
__device__ constexpr int GAUSS_POINT_NUM[4] = {6, 9, 9, 9};
__device__ constexpr int TetPolyOrderDim[4] = {6, 12, 30, 45};
__device__ constexpr int FacePolyOrderDim[4] = {3, 6, 12, 18};
#define g6_a 0.816847572980459
#define g6_b (1.0 - g6_a) / 2.0
#define g6_c 0.108103018168070
#define g6_d (1.0 - g6_c) / 2.0
#define g6_W1 0.109951743655322
#define g6_W2 0.223381589678011
#define g9_a 0.437525248383384
#define g9_b 1.0 - 2.0 * g9_a
#define g9_c 0.797112651860071
#define g9_d 0.165409927389841
#define g9_e 1.0 - g9_c - g9_d
#define g9_W1 0.205950504760887
#define g9_W2 0.063691414286223
#if defined(DGTD_USE_CUDA)
#include <cuda.h>
#include <cuda_runtime.h>
#include <cooperative_groups.h>
#include "cuda_utils.h"
__device__ fp_t_ts g2d_6[6][4] = {
g6_a, g6_b, g6_b, g6_W1,
g6_b, g6_a, g6_b, g6_W1,
g6_b, g6_b, g6_a, g6_W1,
g6_c, g6_d, g6_d, g6_W2,
g6_d, g6_c, g6_d, g6_W2,
g6_d, g6_d, g6_c, g6_W2};
// added by Seung-Cheol Lee & Marinos N. Vouvakis 08/12/02
// Order = 5
__device__ fp_t_ts g2d_9[9][4] = {
g9_b, g9_a, g9_a, g9_W1,
g9_a, g9_b, g9_a, g9_W1,
g9_a, g9_a, g9_b, g9_W1,
g9_c, g9_d, g9_e, g9_W2,
g9_c, g9_e, g9_d, g9_W2,
g9_d, g9_c, g9_e, g9_W2,
g9_d, g9_e, g9_c, g9_W2,
g9_e, g9_c, g9_d, g9_W2,
g9_e, g9_d, g9_c, g9_W2};
// __device__ const int FACE_BASIS_IJK[6][2] = {
// {1, 2},
// {0, 2},
// {0, 1},
// {1, 2},
// {0, 2},
// {0, 1},
// };
__device__ const int FACE_BASIS_IJK[18][3] = {
{1, 2, -1},
{0, 2, -1},
{0, 1, -1},
{1, 2, -1},
{0, 2, -1},
{0, 1, -1},
{0, 1, 2},
{0, 1, 2},
{1, 2, -1},
{0, 2, -1},
{0, 1, -1},
{0, 1, 2},
{0, 1, 2},
{0, 1, 2},
{0, 1, 2},
{0, 1, 2},
{0, 1, 2},
{0, 1, 2}
};
__device__ const int FAC2TET[4][18] = {
{5, 4, 3, 11, 10, 9, 12, 13, 25, 24, 23, 26, 30, 31, 32, 42, 43, 44},
{5, 2, 1, 11, 8, 7, 14, 15, 25, 22, 21, 27, 33, 34, 35, 42, 43, 44},
{4, 2, 0, 10, 8, 6, 16, 17, 24, 22, 20, 28, 36, 37, 38, 42, 43, 44},
{3, 1, 0, 9, 7, 6, 18, 19, 23, 21, 20, 29, 39, 40, 41, 42, 43, 44}
};
#endif
//----------------------------------------------------------------------------------------------------------------------------
#if defined(DGTD_USE_CUDA) || defined(DGTD_USE_CUDA_OPENCL)
__global__ void makeNeighField(int* MappingArray, fp_t_ts* Field, fp_t_ts* auxiliar, int elements)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx < elements){
auxiliar[idx] = Field[MappingArray[idx]];
// printf("%d\n", MappingArray[idx]);
}
}
__global__ void makeNeighFieldRegular(int* MappingArray, fp_t_ts* Field, fp_t_ts* auxiliar, int elements, int polyOrder){
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx < NumOfFaces * FacePolyOrderDim[polyOrder] * elements){
int blockId = idx / FacePolyOrderDim[polyOrder];
int basisId = idx % FacePolyOrderDim[polyOrder];
int tetId = blockId / NumOfFaces;
int faceId = blockId % NumOfFaces;
auxiliar[(faceId * elements + tetId) * FacePolyOrderDim[polyOrder] + basisId] = Field[MappingArray[idx]];
// for(int i = 0; i < NumOfFaces; i++){
// auxiliar[idx + i * elements] = Field[MappingArray[idx + i * FacePolyOrderDim[polyOrder]]];
// }
}
}
__global__ void makeNeighFieldRegular_custom(int* MappingArray, fp_t_ts* Field, fp_t_ts* auxiliar, int elements, int polyOrder, int numFaces){
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx < numFaces * FacePolyOrderDim[polyOrder] * elements){
int blockId = idx / FacePolyOrderDim[polyOrder];
int basisId = idx % FacePolyOrderDim[polyOrder];
int tetId = blockId / numFaces;
int faceId = blockId % numFaces;
auxiliar[(faceId * elements + tetId) * FacePolyOrderDim[polyOrder] + basisId] = Field[MappingArray[idx]];
}
}
__global__ void addCouplingResults(fp_t_ts* Field, fp_t_ts* auxiliar, int* neighs, int* neighsOffset, int elements){
int tet = blockIdx.x * blockDim.y + threadIdx.y;
if(tet < elements){
int basis = blockDim.x;
fp_t_ts fieldAux = 0.0;
int offset = (neighsOffset[tet] - neighsOffset[0]) * basis;
for(int i = 0; i < neighs[tet]; i++){
fieldAux += auxiliar[offset + basis * i + threadIdx.x];
}
Field[tet * basis + threadIdx.x] += fieldAux;
}
}
__global__ void addCouplingResultsRegular(fp_t_ts* Field, fp_t_ts* auxiliar, int elements){
int tet = blockIdx.x * blockDim.y + threadIdx.y;
if(tet < elements){
int basis = blockDim.x;
fp_t_ts fieldAux = 0.0;
for(int i = 0; i < NumOfFaces; i++){
fieldAux += auxiliar[tet * basis + elements * basis * i + threadIdx.x];
}
Field[tet * basis + threadIdx.x] += fieldAux;
}
}
__global__ void addCouplingResultsRegular_custom(fp_t_ts* Field, fp_t_ts* auxiliar, int elements, int numFaces){
int tet = blockIdx.x * blockDim.y + threadIdx.y;
if(tet < elements)
{
int basis = blockDim.x;
fp_t_ts fieldAux = 0.0;
for(int i = 0; i < numFaces; i++)
{
fieldAux += auxiliar[tet * basis + elements * basis * i + threadIdx.x];
}
Field[tet * basis + threadIdx.x] += fieldAux;
}
}
__global__ void fill_ones_cols(fp_t_ts* __restrict__ M, int rows, int cols, int ld)
{
int r = blockIdx.x * blockDim.x + threadIdx.x;
int c = blockIdx.y * blockDim.y + threadIdx.y;
if (r < rows && c < cols) M[(size_t)c * ld + r] = 1.0f;
}
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
// 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 //
__device__ fp_t_ts dotP3131(const fp_t_ts *v1, const fp_t_ts *v2){
fp_t_ts result;
result = v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2];
return result;
}
__device__ void crossP3(const fp_t_ts *v1, const fp_t_ts *v2, fp_t_ts *result){
result[0] = v1[1] * v2[2] - v1[2] * v2[1];
result[1] = -(v1[0] * v2[2] - v1[2] * v2[0]);
result[2] = v1[0] * v2[1] - v1[1] * v2[0];
}
__device__ void scale3(fp_t_ts *v1, const fp_t_ts factor){
v1[0] = v1[0] * factor;
v1[1] = v1[1] * factor;
v1[2] = v1[2] * factor;
}
__device__ void scale3(const fp_t_ts *v1, const fp_t_ts factor, fp_t_ts *v2){
v2[0] = v1[0] * factor;
v2[1] = v1[1] * factor;
v2[2] = v1[2] * factor;
}
__device__ void unit3(fp_t_ts *v1){
fp_t_ts work[3];
fp_t_ts mag = sqrt(v1[0] * v1[0] + v1[1] * v1[1] + v1[2] * v1[2]);
work[0] = v1[0] / mag;
work[1] = v1[1] / mag;
work[2] = v1[2] / mag;
v1[0] = work[0];
v1[1] = work[1];
v1[2] = work[2];
}
__device__ void plus3(fp_t_ts *v1, fp_t_ts *v2){
v1[0] = v1[0] + v2[0];
v1[1] = v1[1] + v2[1];
v1[2] = v1[2] + v2[2];
}
__device__ void set3(const fp_t_ts *v1, fp_t_ts *v2){
v2[0] = v1[0];
v2[1] = v1[1];
v2[2] = v1[2];
}
__device__ void plus3(const fp_t_ts *v1, const fp_t_ts *v2, fp_t_ts *result){
result[0] = v1[0] + v2[0];
result[1] = v1[1] + v2[1];
result[2] = v1[2] + v2[2];
}
__device__ void plus33(const fp_t_ts *v1, const fp_t_ts *v2, const fp_t_ts *v3, fp_t_ts *result){
result[0] = v1[0] + v2[0] + v3[0];
result[1] = v1[1] + v2[1] + v3[1];
result[2] = v1[2] + v2[2] + v3[2];
}
__device__ void minus3(const fp_t_ts *v1, const fp_t_ts *v2, fp_t_ts *result){
result[0] = v1[0] - v2[0];
result[1] = v1[1] - v2[1];
result[2] = v1[2] - v2[2];
}
__device__ void reset3(fp_t_ts *result){
result[0] = 0.0;
result[1] = 0.0;
result[2] = 0.0;
}
__device__ void geometry3(const fp_t_ts *nd_coord, fp_t_ts lvtr[][3], fp_t_ts avtr[][3], fp_t_ts *vol){
int i, i1, i2, i3;
fp_t_ts vv[3], tt[3];
for (i = 0; i < 3; i++){
minus3(&nd_coord[(i + 1) * 3], &nd_coord[0], lvtr[i]);
}
for (i = 0; i < 3; i++){
i1 = (i + 1) % 4;
i2 = (i + 2) % 4;
i3 = (i + 3) % 4;
minus3(&nd_coord[i2 * 3], &nd_coord[i1 * 3], tt);
minus3(&nd_coord[i3 * 3], &nd_coord[i1 * 3], vv);
crossP3(tt, vv, avtr[i]);
scale3(avtr[i], 0.5);
minus3(&nd_coord[i * 3], &nd_coord[i1 * 3], tt);
if (dotP3131(avtr[i], tt) < 0.0)
scale3(avtr[i], -1);
;
}
reset3(avtr[3]);
plus33(avtr[0], avtr[1], avtr[2], avtr[3]);
scale3(avtr[3], -1);
(*vol) = dotP3131(avtr[2], lvtr[1]) / 3.0;
}
__device__ fp_t_ts magnitude3(const fp_t_ts *v1){
return sqrt(v1[0] * v1[0] + v1[1] * v1[1] + v1[2] * v1[2]);
}
__device__ void tet_center(const fp_t_ts *tet_nd_coord, fp_t_ts *tet_center){
tet_center[0] = (tet_nd_coord[0 * 3 + 0] + tet_nd_coord[1 * 3 + 0] + tet_nd_coord[2 * 3 + 0] + tet_nd_coord[3 * 3 + 0]) * 0.25;
tet_center[1] = (tet_nd_coord[0 * 3 + 1] + tet_nd_coord[1 * 3 + 1] + tet_nd_coord[2 * 3 + 1] + tet_nd_coord[3 * 3 + 1]) * 0.25;
tet_center[2] = (tet_nd_coord[0 * 3 + 2] + tet_nd_coord[1 * 3 + 2] + tet_nd_coord[2 * 3 + 2] + tet_nd_coord[3 * 3 + 2]) * 0.25;
}
// have to use 2d arrays in the device functions, using 1d array with offsets will result in erroneous values
__device__ void geometry2(const fp_t_ts *nd_coord, const fp_t_ts *tet0_nd_coord, fp_t_ts *normal, fp_t_ts nvtr[][3], fp_t_ts *area){
fp_t_ts t0[3], t1[3], tv[3], tet0center[3];
minus3(&nd_coord[2 * 3], &nd_coord[1 * 3], t0);
minus3(&nd_coord[0 * 3], &nd_coord[2 * 3], t1);
crossP3(t0, t1, normal);
(*area) = 0.5 * magnitude3(normal);
scale3(normal, 1 / magnitude3(normal));
crossP3(normal, t0, nvtr[0]);
crossP3(normal, t1, nvtr[1]);
if (dotP3131(nvtr[0], t1) < 0.0)
scale3(nvtr[0], -1);
if (dotP3131(nvtr[1], t0) > 0.0)
scale3(nvtr[1], -1);
tet_center(tet0_nd_coord, tet0center);
minus3(tet0center, &nd_coord[0 * 3], tv);
if (dotP3131(tv, normal) > 0.0)
scale3(normal, -1);
}
__device__ void geometry2(const fp_t_ts *nd_coord, fp_t_ts nvtr[][3], fp_t_ts *area){
fp_t_ts t0[3], t1[3], normal[3];
minus3(&nd_coord[2 * 3], &nd_coord[1 * 3], t0);
minus3(&nd_coord[0 * 3], &nd_coord[2 * 3], t1);
crossP3(t0, t1, normal);
(*area) = 0.5 * magnitude3(normal);
scale3(normal, 1 / magnitude3(normal));
crossP3(normal, t0, nvtr[0]);
crossP3(normal, t1, nvtr[1]);
if (dotP3131(nvtr[0], t1) < 0.0)
scale3(nvtr[0], -1);
if (dotP3131(nvtr[1], t0) > 0.0)
scale3(nvtr[1], -1);
}
__device__ void dotP3331(const fp_t_ts *tensor, const fp_t_ts *v1, fp_t_ts *result){
result[0] = v1[0] * tensor[0] + v1[1] * tensor[1] + v1[2] * tensor[2];
result[1] = v1[0] * tensor[3] + v1[1] * tensor[4] + v1[2] * tensor[5];
result[2] = v1[0] * tensor[6] + v1[1] * tensor[7] + v1[2] * tensor[8];
}
__device__ void makeCoeff33(const fp_t_ts avtr[][3], const fp_t_ts *epsr, fp_t_ts coeffA[][3]){
int i, j;
fp_t_ts temp[3];
for (i = 0; i < 3; i++){
for (j = 0; j < 3; j++){
dotP3331(epsr, avtr[j], temp);
coeffA[i][j] = dotP3131(temp, avtr[i]);
}
}
}
__device__ void quadObtention(int np, int id, fp_t_ts &zeta0, fp_t_ts &zeta1, fp_t_ts &zeta2, fp_t_ts &weight){
if (id < 0 || id >= np){
printf("Error in quadObtention");
return;
}
switch (np){
case 6:
zeta0 = g2d_6[id][0];
zeta1 = g2d_6[id][1];
zeta2 = g2d_6[id][2];
weight = g2d_6[id][3];
break;
case 9:
zeta0 = g2d_9[id][0];
zeta1 = g2d_9[id][1];
zeta2 = g2d_9[id][2];
weight = g2d_9[id][3];
break;
default:
break;
}
}
__device__ void faceBasisW(const fp_t_ts *node_coords, const fp_t_ts *zeta, const int p, fp_t_ts *W){
fp_t_ts gradZeta[3][3];
fp_t_ts area;
geometry2(node_coords, gradZeta, &area);
scale3(gradZeta[0], 0.5 / area);
scale3(gradZeta[1], 0.5 / area);
gradZeta[2][0] = 0;
gradZeta[2][1] = 0;
gradZeta[2][2] = 0;
minus3(gradZeta[2], gradZeta[0], gradZeta[2]);
minus3(gradZeta[2], gradZeta[1], gradZeta[2]);
int i = FACE_BASIS_IJK[p][0];
int j = FACE_BASIS_IJK[p][1];
int k = FACE_BASIS_IJK[p][2];
switch (p){
case 0:
scale3(gradZeta[j], zeta[i]);
scale3(gradZeta[i], zeta[j]);
minus3(gradZeta[j], gradZeta[i], W);
break;
case 1:
scale3(gradZeta[j], zeta[i]);
scale3(gradZeta[i], zeta[j]);
minus3(gradZeta[j], gradZeta[i], W);
break;
case 2:
scale3(gradZeta[j], zeta[i]);
scale3(gradZeta[i], zeta[j]);
minus3(gradZeta[j], gradZeta[i], W);
break;
case 3:
scale3(gradZeta[j], zeta[i]);
scale3(gradZeta[i], zeta[j]);
plus3(gradZeta[j], gradZeta[i], W);
scale3(W, 4.);
break;
case 4:
scale3(gradZeta[j], zeta[i]);
scale3(gradZeta[i], zeta[j]);
plus3(gradZeta[j], gradZeta[i], W);
scale3(W, 4.);
break;
case 5:
scale3(gradZeta[j], zeta[i]);
scale3(gradZeta[i], zeta[j]);
plus3(gradZeta[j], gradZeta[i], W);
scale3(W, 4.);
break;
case 6:
scale3(gradZeta[k], zeta[i]);
scale3(gradZeta[i], zeta[k]);
minus3(gradZeta[k], gradZeta[i], W);
scale3(W, 4 * zeta[j]);
break;
case 7:
scale3(gradZeta[j], zeta[i]);
scale3(gradZeta[i], zeta[j]);
minus3(gradZeta[j], gradZeta[i], W);
scale3(W, 4. * zeta[k]);
break;
case 8:
scale3(gradZeta[j], zeta[i] * zeta[i] - 2. * zeta[i] * zeta[j]);
scale3(gradZeta[i], zeta[j] * zeta[j] - 2. * zeta[i] * zeta[j]);
minus3(gradZeta[j], gradZeta[i], W);
scale3(W, 4.);
break;
case 9:
scale3(gradZeta[j], zeta[i] * zeta[i] - 2. * zeta[i] * zeta[j]);
scale3(gradZeta[i], zeta[j] * zeta[j] - 2. * zeta[i] * zeta[j]);
minus3(gradZeta[j], gradZeta[i], W);
scale3(W, 4.);
break;
case 10:
scale3(gradZeta[j], zeta[i] * zeta[i] - 2. * zeta[i] * zeta[j]);
scale3(gradZeta[i], zeta[j] * zeta[j] - 2. * zeta[i] * zeta[j]);
minus3(gradZeta[j], gradZeta[i], W);
scale3(W, 4.);
break;
case 11:
scale3(gradZeta[k], zeta[i] * zeta[j]);
scale3(gradZeta[j], zeta[i] * zeta[k]);
scale3(gradZeta[i], zeta[k] * zeta[j]);
plus33(gradZeta[k], gradZeta[j], gradZeta[i], W);
scale3(W, 8.);
break;
case 12:
scale3(gradZeta[k], zeta[j]);
scale3(gradZeta[j], zeta[k]);
minus3(gradZeta[k], gradZeta[j], W);
scale3(W, 4. * zeta[i] * zeta[i]);
break;
case 13:
scale3(gradZeta[k], zeta[i]);
scale3(gradZeta[i], zeta[k]);
minus3(gradZeta[k], gradZeta[i], W);
scale3(W, 4. * zeta[j] * zeta[j]);
break;
case 14:
scale3(gradZeta[j], zeta[i]);
scale3(gradZeta[i], zeta[j]);
minus3(gradZeta[j], gradZeta[i], W);
scale3(W, 4. * zeta[k] * zeta[k]);
break;
case 15:
W[0] = W[1] = W[2] = 0;
break;
case 16:
W[0] = W[1] = W[2] = 0;
break;
case 17:
W[0] = W[1] = W[2] = 0;
break;
default:
W[0] = W[1] = W[2] = 0;
printf("Check faceBasisW: you are asking for somthing out of scope");
break;
}
}
__device__ void getCartesianCoord(const fp_t_ts *node_coords, const fp_t_ts *zeta, fp_t_ts *pnt){
pnt[0] = node_coords[0 * 3 + 0] * zeta[0] + node_coords[1 * 3 + 0] * zeta[1] + node_coords[2 * 3 + 0] * zeta[2];
pnt[1] = node_coords[0 * 3 + 1] * zeta[0] + node_coords[1 * 3 + 1] * zeta[1] + node_coords[2 * 3 + 1] * zeta[2];
pnt[2] = node_coords[0 * 3 + 2] * zeta[0] + node_coords[1 * 3 + 2] * zeta[1] + node_coords[2 * 3 + 2] * zeta[2];
}
__device__ void PlaneWaveFields(int ExcitationFlag, fp_t_ts t, fp_t_ts to, fp_t_ts tau, const fp_t_ts *r, const fp_t_ts *ro, fp_t_ts freq_m, fp_t_ts *kvtr, fp_t_ts eta, fp_t_ts Emagnitude, fp_t_ts &IncidExcit_E, fp_t_ts &IncidExcit_H)
{
fp_t_ts omega = 2.0 * Pi * freq_m * MEGA;
fp_t_ts Exponent;
fp_t_ts SinModul;
fp_t_ts Neuman;
fp_t_ts work[3];
// printf("%d", ExcitationFlag);
switch (ExcitationFlag){
case 0:
minus3(r, ro, work);
Exponent = t - to - dotP3131(kvtr, work) / Vo;
// Time Harmonic
if (Exponent >= 0.0){
SinModul = cos(omega * Exponent);
IncidExcit_E = Emagnitude * SinModul;
IncidExcit_H = (Emagnitude / eta) * SinModul;
}else{
IncidExcit_E = 0.0;
IncidExcit_H = 0.0;
}
break;
case 1:
// Gauss Pulse
minus3(r, ro, work);
Exponent = t - to - dotP3131(kvtr, work) / Vo;
SinModul = cos(omega * Exponent);
IncidExcit_E = Emagnitude * SinModul * exp(-(Exponent * Exponent) / (tau * tau));
IncidExcit_H = (Emagnitude / eta) * SinModul * exp(-(Exponent * Exponent) / (tau * tau));
break;
case 2:
// Neuman Pulse
minus3(r, ro, work);
Exponent = t - to - dotP3131(kvtr, work) / Vo;
Neuman = (2.0 * Exponent) / (tau * tau);
IncidExcit_E = (Emagnitude * Neuman) * exp(-(Exponent * Exponent) / (tau * tau));
IncidExcit_H = ((Emagnitude / eta) * Neuman) * exp(-(Exponent * Exponent) / (tau * tau));
break;
case 3:
{
fp_t_ts work[3];
minus3(r, ro, work); // work = r - ro
Exponent = t - dotP3131(kvtr, work) / Vo;
if (Exponent >= 0.0 && Exponent <= to)
{
// Chirp parameters
fp_t_ts f_end = freq_m * MEGA; // Center frequency in Hz
fp_t_ts B = tau * MEGA; // Bandwidth in Hz
fp_t_ts f0 = f_end - B; // Start frequency
fp_t_ts f1 = f_end; // End frequency
fp_t_ts Tchirp = to; // Chirp duration
// Chirp phase
fp_t_ts chirpArg = 2.0 * Pi * f0 * Exponent
+ Pi * (f1 - f0) / Tchirp * Exponent * Exponent;
// Final tapered chirp signal
fp_t_ts chirpSignal = sin(chirpArg);
IncidExcit_E = Emagnitude * chirpSignal;
IncidExcit_H = (Emagnitude / eta) * chirpSignal;
}
else
{
IncidExcit_E = 0.0;
IncidExcit_H = 0.0;
}
break;
}
default:
break;
}
}
__global__ void CalculatePadeFreq(fp_t* a_k, fp_t* b_k, int M_global, int N, int* constantM, cuDoubleComplex* Hf){
int m = blockIdx.x * blockDim.x + threadIdx.x;
if(m < M_global){
fp_t sum_real_a = 0.0;
fp_t sum_imag_a = 0.0;
fp_t sum_real_b = 0.0;
fp_t sum_imag_b = 0.0;
for (int n = 0; n < N; n++) {
fp_t angle = -2.0f * Pi * constantM[m] * n / M_global;
fp_t cos_val, sin_val;
sincos(angle, &sin_val, &cos_val);
sum_real_a += a_k[n] * cos_val;
sum_imag_a += a_k[n] * sin_val;
sum_real_b += b_k[n] * cos_val;
sum_imag_b += b_k[n] * sin_val;
}
Hf[m] = cuCdiv(make_cuDoubleComplex(sum_real_a, sum_imag_a), make_cuDoubleComplex(sum_real_b, sum_imag_b));
}
return;
}
// === Utility Functions for Double Precision ===
__device__ __forceinline__ fp_t_ts cleanSmall(fp_t_ts val, fp_t_ts eps = 1e-12) {
return fabs(val) < eps ? 0.0 : val;
}
__device__ __forceinline__ void cleanVec3(fp_t_ts* v, fp_t_ts eps = 1e-12) {
for (int i = 0; i < 3; ++i)
v[i] = cleanSmall(v[i], eps);
}
// --------------------------------------------------------------------------------------
// Plane Wave Exciation Update
// --------------------------------------------------------------------------------------
__device__ void E_updateinc_PW(const fp_t_ts t, const fp_t_ts to, const fp_t_ts tau, const fp_t_ts freq_m, const fp_t_ts eta, const fp_t_ts Emagnitude, const fp_t_ts *ro, const fp_t_ts theta, const fp_t_ts phi, const fp_t_ts *Epol, const int ExcitationFlag, const fp_t_ts *r, const fp_t_ts *normalvtr, fp_t_ts *nxHinc, fp_t_ts *nxnxEinc)
{
fp_t_ts IncidExcit_E, IncidExcit_H;
fp_t_ts theta_in_rad = theta * (fp_t_ts)Pi / (fp_t_ts)180.0;
fp_t_ts phi_in_rad = phi * (fp_t_ts)Pi / (fp_t_ts)180.0;
fp_t_ts kvtr[3], Hpol[3], work[3];
kvtr[0] = sin(theta_in_rad) * cos(phi_in_rad);
kvtr[1] = sin(theta_in_rad) * sin(phi_in_rad);
kvtr[2] = cos(theta_in_rad);
// Fields in Time Domain
PlaneWaveFields(ExcitationFlag, t, to, tau, r, ro, freq_m, kvtr, eta, Emagnitude, IncidExcit_E, IncidExcit_H);
// EField
crossP3(normalvtr, Epol, work);
crossP3(normalvtr, work, nxnxEinc);
scale3(nxnxEinc, IncidExcit_E);
// HField
crossP3(kvtr, Epol, work);
crossP3(normalvtr, work, Hpol);
scale3(Hpol, IncidExcit_H, nxHinc);
/*
if (r[2] == 0.0 && r[1] < 1e-4 && r[1] > 0.0 && r[0] < 5e-4 && r[0] > 0.0)
{
printf(
"r=(%e,%e,%e) | nxnxE =(%e,%e,%e) | nxH =(%e,%e,%e) \n",
(double)r[0], (double)r[1], (double)r[2],
(double)nxnxEinc[0], (double)nxnxEinc[1], (double)nxnxEinc[2],
(double)nxHinc[0], (double)nxHinc[1], (double)nxHinc[2]
);
}
*/
for (int i = 0; i < 3; ++i)
{
if (fabs(nxHinc[i]) < (fp_t_ts)1e-12) nxHinc[i] = 0.0;
if (fabs(nxnxEinc[i]) < (fp_t_ts)1e-12) nxnxEinc[i] = 0.0;
}
}
__device__ void H_updateinc_PW(const fp_t_ts t, const fp_t_ts to, const fp_t_ts tau, const fp_t_ts freq_m, const fp_t_ts eta, const fp_t_ts Emagnitude, const fp_t_ts *ro, const fp_t_ts theta, const fp_t_ts phi, const fp_t_ts *Epol, const int ExcitationFlag, const fp_t_ts *r, const fp_t_ts *normalvtr, fp_t_ts *nxEinc, fp_t_ts *nxnxHinc)
{
fp_t_ts IncidExcit_E, IncidExcit_H;
fp_t_ts theta_in_rad = theta * (fp_t_ts)Pi / (fp_t_ts)180.0;
fp_t_ts phi_in_rad = phi * (fp_t_ts)Pi / (fp_t_ts)180.0;
fp_t_ts kvtr[3], Hpol[3], work[3];
kvtr[0] = sin(theta_in_rad) * cos(phi_in_rad);
kvtr[1] = sin(theta_in_rad) * sin(phi_in_rad);
kvtr[2] = cos(theta_in_rad);
PlaneWaveFields(ExcitationFlag, t, to, tau, r, ro, freq_m, kvtr, eta, Emagnitude, IncidExcit_E, IncidExcit_H);
// EField
crossP3(normalvtr, Epol, work);
scale3(work, IncidExcit_E, nxEinc);
// HField
crossP3(kvtr, Epol, Hpol);
crossP3(normalvtr, Hpol, work);
crossP3(normalvtr, work, nxnxHinc);
scale3(nxnxHinc, IncidExcit_H);
for (int i = 0; i < 3; ++i)
{
if (fabs(nxEinc[i]) < (fp_t_ts)1e-12) nxEinc[i] = 0.0;
if (fabs(nxnxHinc[i]) < (fp_t_ts)1e-12) nxnxHinc[i] = 0.0;
}
}
__global__ void addExcitationH_PML( int* ExcitationFacesCnt,
int* ExcitationFacesOffset,
int* ExcitationFacesNum,
int excitationCNT,
int excitationPMLCNT,
int8_t* map,
ExcitationProp exci_prop,
int PolyFlag,
fp_t_ts dt_muo, fp_t_ts t,
fp_t_ts* nd_coords_tet,
fp_t_ts* nd_coords_face,
fp_t_ts* Z_face,
fp_t_ts* InvMat,
fp_t_ts* Field)
{
int tet_id = blockIdx.x * blockDim.x + threadIdx.x;
if (tet_id < excitationCNT)
{
//-----TODO: Make this part dynamic----------------------------
const int TetPolyOrderDim2 = TetPolyOrderDim[3];
const int FacePolyOrderDim2 = FacePolyOrderDim[3];
fp_t_ts nxe_inc[TetPolyOrderDim2] = {0};
fp_t_ts nxnxh_inc[TetPolyOrderDim2] = {0};
//-------------------------------------------------------------
fp_t_ts Z = 0.0;
for (int i = 0; i < ExcitationFacesCnt[tet_id]; i++){
int face_id = ExcitationFacesOffset[tet_id] + i;
Z = Z_face[face_id];
fp_t_ts nvtr[2][3];
fp_t_ts normvtr[3] = {0};
fp_t_ts area = 0;
fp_t_ts nxentry;
fp_t_ts nxnxentry;
fp_t_ts z[3], w[3], r[3];
fp_t_ts wgt;
//-----------------TODO: Make this part dynamic--------------------------------------------
fp_t_ts nxelem[FacePolyOrderDim2] = {0};
fp_t_ts nxnxelem[FacePolyOrderDim2] = {0};
//-----------------------------------------------------------------------------------------
fp_t_ts nxE_inc[3];
fp_t_ts nxnxH_inc[3];
geometry2(&nd_coords_face[3 * 3 * face_id], &nd_coords_tet[4 * 3 * tet_id], normvtr, nvtr, &area);
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++){
nxentry = 0;
nxnxentry = 0;
for (int k = 0; k < GAUSS_POINT_NUM[PolyFlag]; k++){
quadObtention(GAUSS_POINT_NUM[PolyFlag], k, z[0], z[1], z[2], wgt);
faceBasisW(&nd_coords_face[3 * 3 * face_id], z, j, w);
getCartesianCoord(&nd_coords_face[3 * 3 * face_id], z, r);
H_updateinc_PW(t, exci_prop.to, exci_prop.tau, exci_prop.freq_m, Z, exci_prop.Emagnitude, exci_prop.ro, exci_prop.theta, exci_prop.phi, exci_prop.Epol, exci_prop.ExcitationFlag, r, normvtr, nxE_inc, nxnxH_inc);
if (!(isnan(nxE_inc[0]))){
nxentry += area * wgt * dotP3131(w, nxE_inc);
nxnxentry += area * wgt * dotP3131(w, nxnxH_inc);
}
}
//nxelem[j] += nxentry;
//nxnxelem[j] += nxnxentry;
nxelem[j] += cleanSmall(nxentry);
nxnxelem[j] += cleanSmall(nxnxentry);
}
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++){
// printf("%d\n", ExcitationFacesNum[face_id]);
nxe_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxelem[j];
nxnxh_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxnxelem[j];
}
}
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++){
nxe_inc[i] *= -dt_muo;
//nxe_inc[i] += (-Z * dt_muo) * nxnxh_inc[i];
nxe_inc[i] *= map[tet_id * TetPolyOrderDim[PolyFlag] + i];
}
fp_t_ts sum;
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++){
sum = 0.0;
for (int j = 0; j < TetPolyOrderDim[PolyFlag]; j++){
sum += InvMat[tet_id * (TetPolyOrderDim[PolyFlag] * TetPolyOrderDim[PolyFlag]) + j * TetPolyOrderDim[PolyFlag] + i] * nxe_inc[j] * map[tet_id * TetPolyOrderDim[PolyFlag] + i];
}
sum = cleanSmall(sum);
if (tet_id < excitationPMLCNT)
{
Field[tet_id * TetPolyOrderDim[PolyFlag] + i] -= 0.5 * sum; // Scattered Region
}
else
{
Field[tet_id * TetPolyOrderDim[PolyFlag] + i] += 0.5 * sum; // Total Region
}
}
}
}
__global__ void addExcitationE_PML( int* ExcitationFacesCnt,
int* ExcitationFacesOffset,
int* ExcitationFacesNum,
int excitationCNT,
int excitationPMLCNT,
int8_t* map,
ExcitationProp exci_prop,
int PolyFlag,
fp_t_ts dt_epsilon, fp_t_ts t,
fp_t_ts* nd_coords_tet,
fp_t_ts* nd_coords_face,
fp_t_ts* Z_face,
fp_t_ts* InvMat,
fp_t_ts* Field){
int tet_id = blockIdx.x * blockDim.x + threadIdx.x;
if (tet_id < excitationCNT){
//-----TODO: Make this part dynamic----------------------------
const int TetPolyOrderDim2 = TetPolyOrderDim[3];
const int FacePolyOrderDim2 = FacePolyOrderDim[3];
fp_t_ts nxh_inc[TetPolyOrderDim2] = {0};
fp_t_ts nxnxe_inc[TetPolyOrderDim2] = {0};
//-------------------------------------------------------------
fp_t_ts Y = 0.0;
for (int i = 0; i < ExcitationFacesCnt[tet_id]; i++){
int face_id = ExcitationFacesOffset[tet_id] + i;
Y = 1 / Z_face[face_id];
fp_t_ts nvtr[2][3];
fp_t_ts normvtr[3] = {0};
fp_t_ts area = 0;
fp_t_ts nxentry;
fp_t_ts nxnxentry;
fp_t_ts z[3], w[3], r[3];
fp_t_ts wgt;
//-----------------TODO: Make this part dynamic--------------------------------------------
fp_t_ts nxelem[FacePolyOrderDim2] = {0};
fp_t_ts nxnxelem[FacePolyOrderDim2] = {0};
//-----------------------------------------------------------------------------------------
fp_t_ts nxH_inc[3];
fp_t_ts nxnxE_inc[3];
geometry2(&nd_coords_face[3 * 3 * face_id], &nd_coords_tet[4 * 3 * tet_id], normvtr, nvtr, &area);
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++){
nxentry = 0;
nxnxentry = 0;
for (int k = 0; k < GAUSS_POINT_NUM[PolyFlag]; k++){
quadObtention(GAUSS_POINT_NUM[PolyFlag], k, z[0], z[1], z[2], wgt);
faceBasisW(&nd_coords_face[3 * 3 * face_id], z, j, w);
getCartesianCoord(&nd_coords_face[3 * 3 * face_id], z, r);
E_updateinc_PW(t, exci_prop.to, exci_prop.tau, exci_prop.freq_m, Z_face[face_id], exci_prop.Emagnitude, exci_prop.ro,
exci_prop.theta, exci_prop.phi, exci_prop.Epol, exci_prop.ExcitationFlag, r, normvtr, nxH_inc, nxnxE_inc);
if (!(isnan(nxH_inc[0]))){
nxentry += area * wgt * dotP3131(w, nxH_inc);
nxnxentry += area * wgt * dotP3131(w, nxnxE_inc);
}
}
//nxelem[j] += nxentry;
//nxnxelem[j] += nxnxentry;
nxelem[j] += cleanSmall(nxentry);
nxnxelem[j] += cleanSmall(nxnxentry);
}
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++){
// printf("%d\n", ExcitationFacesNum[face_id]);
nxh_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxelem[j];
nxnxe_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxnxelem[j];
}
}
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++){
nxh_inc[i] *= dt_epsilon;
//nxh_inc[i] += (-Y * dt_epsilon) * nxnxe_inc[i];
nxh_inc[i] *= map[tet_id * TetPolyOrderDim[PolyFlag] + i];
}
fp_t_ts sum;
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++){
sum = 0.0;
for (int j = 0; j < TetPolyOrderDim[PolyFlag]; j++)
{
sum += InvMat[tet_id * (TetPolyOrderDim[PolyFlag] * TetPolyOrderDim[PolyFlag]) + j * TetPolyOrderDim[PolyFlag] + i] * nxh_inc[j] * map[tet_id * TetPolyOrderDim[PolyFlag] + i];
}
sum = cleanSmall(sum);
if (tet_id < excitationPMLCNT)
{
Field[tet_id * TetPolyOrderDim[PolyFlag] + i] -= 0.5 * sum; // Scattered Region
}
else
{
Field[tet_id * TetPolyOrderDim[PolyFlag] + i] += 0.5 * sum; // Total Region
}
}
}
}
__global__ void addExcitationE( int* ExcitationFacesCnt,
int* ExcitationFacesOffset,
int* ExcitationFacesNum,
int excitationCNT,
int8_t* map,
ExcitationProp exci_prop,
int PolyFlag,
fp_t_ts dt_epsilon, fp_t_ts t,
fp_t_ts* nd_coords_tet,
fp_t_ts* nd_coords_face,
fp_t_ts* Z_face,
fp_t_ts* InvMat,
fp_t_ts* Field)
{
int tet_id = blockIdx.x * blockDim.x + threadIdx.x;
if (tet_id < excitationCNT)
{
//-----TODO: Make this part dynamic----------------------------
const int TetPolyOrderDim2 = TetPolyOrderDim[3];
const int FacePolyOrderDim2 = FacePolyOrderDim[3];
fp_t_ts nxh_inc[TetPolyOrderDim2] = {0};
fp_t_ts nxnxe_inc[TetPolyOrderDim2] = {0};
//-------------------------------------------------------------
fp_t_ts Y = 0.0;
for (int i = 0; i < ExcitationFacesCnt[tet_id]; i++)
{
int face_id = ExcitationFacesOffset[tet_id] + i;
Y = 1 / Z_face[face_id];
fp_t_ts nvtr[2][3];
fp_t_ts normvtr[3] = {0};
fp_t_ts area = 0;
fp_t_ts nxentry;
fp_t_ts nxnxentry;
fp_t_ts z[3], w[3], r[3];
fp_t_ts wgt;
//-----------------TODO: Make this part dynamic--------------------------------------------
fp_t_ts nxelem[FacePolyOrderDim2] = {0};
fp_t_ts nxnxelem[FacePolyOrderDim2] = {0};
//-----------------------------------------------------------------------------------------
fp_t_ts nxH_inc[3];
fp_t_ts nxnxE_inc[3];
geometry2(&nd_coords_face[3 * 3 * face_id], &nd_coords_tet[4 * 3 * tet_id], normvtr, nvtr, &area);
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++)
{
nxentry = 0;
nxnxentry = 0;
for (int k = 0; k < GAUSS_POINT_NUM[PolyFlag]; k++)
{
quadObtention(GAUSS_POINT_NUM[PolyFlag], k, z[0], z[1], z[2], wgt);
faceBasisW(&nd_coords_face[3 * 3 * face_id], z, j, w);
getCartesianCoord(&nd_coords_face[3 * 3 * face_id], z, r);
E_updateinc_PW(t, exci_prop.to, exci_prop.tau, exci_prop.freq_m, Z_face[face_id], exci_prop.Emagnitude,
exci_prop.ro, exci_prop.theta, exci_prop.phi,exci_prop.Epol,
exci_prop.ExcitationFlag,r,normvtr,nxH_inc,nxnxE_inc);
if (!(isnan(nxH_inc[0])))
{
nxentry += area * wgt * dotP3131(w, nxH_inc);
nxnxentry += area * wgt * dotP3131(w, nxnxE_inc);
}
}
//printf("face_id=%d j=%d nxentry=%.6e nxnxentry=%.6e\n", face_id, j, (double)nxentry, (double)nxnxentry);
nxelem[j] += nxentry;
nxnxelem[j] += nxnxentry;
}
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++){
//printf("%d\n", ExcitationFacesNum[face_id]);
nxh_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxelem[j];
nxnxe_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxnxelem[j];
//printf("tet_id=%d face_id=%d j=%d TetNode=%d nxh_inc=%.6e nxnxe_inc=%.6e\n", tet_id, face_id, j, FAC2TET[ExcitationFacesNum[face_id]][j], (double)nxh_inc[FAC2TET[ExcitationFacesNum[face_id]][j]], (double)nxnxe_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] );
}
}
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++){
nxh_inc[i] *= dt_epsilon;
//nxh_inc[i] += (-Y * dt_epsilon) * nxnxe_inc[i];
nxh_inc[i] += (-Y * dt_epsilon) * nxnxe_inc[i];
nxh_inc[i] *= map[tet_id * TetPolyOrderDim[PolyFlag] + i];
//printf("tet_id=%d i=%d nxh_inc=%.6e nxnxe_inc=%6e Y=%6e dt_epsilon=%6e \n", tet_id, i, (double)nxh_inc[i], (double)nxnxe_inc[i], (double)Y, (double)dt_epsilon );
}
fp_t_ts sum;
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++){
sum = 0.0;
for (int j = 0; j < TetPolyOrderDim[PolyFlag]; j++){
sum += InvMat[tet_id * (TetPolyOrderDim[PolyFlag] * TetPolyOrderDim[PolyFlag]) + j * TetPolyOrderDim[PolyFlag] + i] * nxh_inc[j] * map[tet_id * TetPolyOrderDim[PolyFlag] + i];
//printf("InvMat[%d][%d]=%.6e | nxh_inc[%d]=%.6e | map=%d\n", j, i, (double)InvMat[tet_id * (TetPolyOrderDim[PolyFlag] * TetPolyOrderDim[PolyFlag]) + j * TetPolyOrderDim[PolyFlag] + i], j, (double)nxh_inc[j], map[tet_id * TetPolyOrderDim[PolyFlag] + i]);
}
//printf("E-field update: tet_id=%d i=%d sum=%.6e\n", tet_id, i, (double)sum);
Field[tet_id * TetPolyOrderDim[PolyFlag] + i] += sum;
}
}
}
__global__ void addExcitationH( int* ExcitationFacesCnt,
int* ExcitationFacesOffset,
int* ExcitationFacesNum,
int excitationCNT,
int8_t* map,
ExcitationProp exci_prop,
int PolyFlag,
fp_t_ts dt_muo, fp_t_ts t,
fp_t_ts* nd_coords_tet,
fp_t_ts* nd_coords_face,
fp_t_ts* Z_face,
fp_t_ts* InvMat,
fp_t_ts* Field)
{
int tet_id = blockIdx.x * blockDim.x + threadIdx.x;
if (tet_id < excitationCNT)
{
//printf("ExcitationFacesOffset = %d\n",ExcitationFacesOffset[tet_id]);
//-----TODO: Make this part dynamic----------------------------
const int TetPolyOrderDim2 = TetPolyOrderDim[3];
const int FacePolyOrderDim2 = FacePolyOrderDim[3];
fp_t_ts nxe_inc[TetPolyOrderDim2] = {0};
fp_t_ts nxnxh_inc[TetPolyOrderDim2] = {0};
//-------------------------------------------------------------
fp_t_ts Z = 0.0;
for (int i = 0; i < ExcitationFacesCnt[tet_id]; i++){
int face_id = ExcitationFacesOffset[tet_id] + i;
Z = Z_face[face_id];
fp_t_ts nvtr[2][3];
fp_t_ts normvtr[3] = {0};
fp_t_ts area = 0;
fp_t_ts nxentry;
fp_t_ts nxnxentry;
fp_t_ts z[3], w[3], r[3];
fp_t_ts wgt;
//-----------------TODO: Make this part dynamic--------------------------------------------
fp_t_ts nxelem[FacePolyOrderDim2] = {0};
fp_t_ts nxnxelem[FacePolyOrderDim2] = {0};
//-----------------------------------------------------------------------------------------
fp_t_ts nxE_inc[3];
fp_t_ts nxnxH_inc[3];
geometry2(&nd_coords_face[3 * 3 * face_id], &nd_coords_tet[4 * 3 * tet_id], normvtr, nvtr, &area);
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++)
{
nxentry = 0;
nxnxentry = 0;
for (int k = 0; k < GAUSS_POINT_NUM[PolyFlag]; k++)
{
quadObtention(GAUSS_POINT_NUM[PolyFlag], k, z[0], z[1], z[2], wgt);
faceBasisW(&nd_coords_face[3 * 3 * face_id], z, j, w);
getCartesianCoord(&nd_coords_face[3 * 3 * face_id], z, r);
H_updateinc_PW(t, exci_prop.to, exci_prop.tau, exci_prop.freq_m, Z, exci_prop.Emagnitude,
exci_prop.ro, exci_prop.theta, exci_prop.phi,exci_prop.Epol,
exci_prop.ExcitationFlag,r,normvtr,nxE_inc,nxnxH_inc);
if (!(isnan(nxE_inc[0])))
{
nxentry += area * wgt * dotP3131(w, nxE_inc);
nxnxentry += area * wgt * dotP3131(w, nxnxH_inc);
/*
if (tet_id == 6)
{
printf("area = %0.6e | wgt = %0.6e | w = %0.6e,%0.6e,%0.6e | nxE_inc = %0.6e,%0.6e,%0.6e | nxnxH_inc = %0.6e,%0.6e,%0.6e \n",
area,wgt, w[0], w[1], w[2],nxE_inc[0],nxE_inc[1],nxE_inc[2],nxnxH_inc[0],nxnxH_inc[1],nxnxH_inc[2]);
}
*/
}
}
nxelem[j] += nxentry;
nxnxelem[j] += nxnxentry;
}
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++)
{
// printf("%d\n", ExcitationFacesNum[face_id]);
nxe_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxelem[j];
nxnxh_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxnxelem[j];
}
}
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++)
{
/*
if (tet_id == 3)
{
printf("tet=%d i=%d | nxe_inc=%.6e | dt_muo=%.6e Z=%.6e | nxnxh=%.6e | map[%d]=%d\n",
tet_id, i, (double)nxe_inc[i], (double)dt_muo, (double)Z,
(double)nxnxh_inc[i], map[tet_id * TetPolyOrderDim[PolyFlag] + i]);
}
*/
nxe_inc[i] *= -dt_muo;
nxe_inc[i] += (-Z * dt_muo) * nxnxh_inc[i];
nxe_inc[i] *= map[tet_id * TetPolyOrderDim[PolyFlag] + i];
}
fp_t_ts sum;
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++)
{
sum = 0.0;
for (int j = 0; j < TetPolyOrderDim[PolyFlag]; j++)
{
int index1 = tet_id * (TetPolyOrderDim[PolyFlag] * TetPolyOrderDim[PolyFlag]) + j * TetPolyOrderDim[PolyFlag] + i;
int index2 = tet_id * TetPolyOrderDim[PolyFlag] + i;
sum += InvMat[index1] * nxe_inc[j] * map[index2];
/*
if (index2 == 0)
{
printf("INDEX1 = %d | INDEX2 = %d | map[index2] = %d | nxe_inc = %0.6e\n",index1,index2,map[index2],nxe_inc[j]);
}
*/
}
Field[tet_id * TetPolyOrderDim[PolyFlag] + i] += sum;
/*
if (ExcitationFacesOffset[tet_id] == 7)
{
printf("SUM = %0.6e | ind = %d \n",sum, tet_id * TetPolyOrderDim[PolyFlag] + i);
}
*/
}
}
}
__global__ void checker(fp_t_ts* a, fp_t_ts* b, int length){
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx < length){
if(a[idx] != b[idx]){
printf("ERROR %.10f\n", a[idx] - b[idx]);
}
}
}
// ---- Small vector helpers (you probably already have similar ones) ----
__device__ inline void proj_to_plane(const fp_t_ts p[3], const fp_t_ts n[3], fp_t_ts out[3]) {
// out = p - (p·n) n
fp_t_ts dn = p[0]*n[0] + p[1]*n[1] + p[2]*n[2];
out[0] = p[0] - dn*n[0];
out[1] = p[1] - dn*n[1];
out[2] = p[2] - dn*n[2];
}
__device__ inline fp_t_ts dot3(const fp_t_ts a[3], const fp_t_ts b[3]) {
return a[0]*b[0] + a[1]*b[1] + a[2]*b[2];
}
__device__ inline void add3(const fp_t_ts a[3], const fp_t_ts b[3], fp_t_ts out[3]) {
out[0]=a[0]+b[0]; out[1]=a[1]+b[1]; out[2]=a[2]+b[2];
}
__device__ inline void sub3(const fp_t_ts a[3], const fp_t_ts b[3], fp_t_ts out[3]) {
out[0]=a[0]-b[0]; out[1]=a[1]-b[1]; out[2]=a[2]-b[2];
}
__device__ inline fp_t_ts norm3(const fp_t_ts a[3]) {
return sqrtf(a[0]*a[0]+a[1]*a[1]+a[2]*a[2]);
}
__device__ inline void normed(const fp_t_ts a[3], fp_t_ts out[3]) {
fp_t_ts n = norm3(a);
if (n > (fp_t_ts)0) { out[0]=a[0]/n; out[1]=a[1]/n; out[2]=a[2]/n; }
else { out[0]=0; out[1]=0; out[2]=0; }
}
// --------------------------------------------------------------------------------------
// Antenna Port Update
// Implement the port excitation using analytical solution
// TEM Port
// --------------------------------------------------------------------------------------
__device__ void TimeModulationInc(int TimeDistributionFlag, fp_t_ts t, fp_t_ts to, fp_t_ts freq_m,
fp_t_ts Emag, fp_t_ts tau, fp_t_ts CHIRP_BW_MHZ, fp_t_ts &IncidExcit)
{
fp_t_ts Exponent;
fp_t_ts omega = 2.0 * Pi * freq_m * MEGA;
fp_t_ts Neuman;
fp_t_ts SinModul;
fp_t_ts kr;
fp_t_ts work[3];
switch (TimeDistributionFlag){
// Time Harmonic
case 0:
IncidExcit = Emag * sin(- omega * (t));
break;
// Gauss Pulse
case 1:
Exponent = t - to;
SinModul = cos(omega * (t - to));
IncidExcit = Emag * SinModul * exp(-(Exponent * Exponent) / (tau * tau));
break;
// Neuman Pulse
case 2:
Exponent = t - to;
Neuman = (2.0 * Exponent) / (tau * tau);
IncidExcit = (Emag * Neuman) * exp(-(Exponent * Exponent) / (tau * tau));
break;
// 3) Linear FM chirp starting at 'to', duration 'tau', bandwidth CHIRP_BW_MHZ,
// Hann-tapered on [0, tau] to reduce spectral sidelobes.
case 3:
{
// Time since chirp start (seconds)
fp_t_ts tt = t - to;
// Gate to [0, tau]
if (tt >= (fp_t_ts)0.0 && tt <= tau)
{
// Sweep parameters
const fp_t_ts B = (fp_t_ts)(CHIRP_BW_MHZ * MEGA); // Hz
const fp_t_ts fc = (fp_t_ts)(freq_m * MEGA); // Hz
const fp_t_ts f0 = fc - (fp_t_ts)0.5 * B; // start freq [Hz]
const fp_t_ts k = (tau != (fp_t_ts)0 ? B / tau : (fp_t_ts)0); // Hz/s
// Phase: 2π( f0*tt + 0.5*k*tt^2 )
const fp_t_ts phase = (fp_t_ts)(2.0 * Pi) * (f0 * tt + (fp_t_ts)0.5 * k * tt * tt);
// Hann window on [0, tau]: w = 0.5*(1 - cos(2π*tt/tau))
const fp_t_ts w = (fp_t_ts)0.5 * ((fp_t_ts)1.0 - cos((fp_t_ts)(2.0 * Pi) * (tt / tau)));
const fp_t_ts s = sin(phase) * w;
IncidExcit = Emag * s;
}
else
{
// Outside the chirp window
IncidExcit = (fp_t_ts)0.0;
}
}
break;
// 4) Linear FM chirp starting at 'to', duration 'tau', bandwidth CHIRP_BW_MHZ,
case 4:
{
// Time since chirp start (seconds)
fp_t_ts tt = t - to;
// Gate to [0, tau]
if (tt >= (fp_t_ts)0.0 && tt <= tau)
{
// Sweep parameters
const fp_t_ts B = (fp_t_ts)(CHIRP_BW_MHZ * MEGA); // Hz
const fp_t_ts fc = (fp_t_ts)(freq_m * MEGA); // Hz
const fp_t_ts f0 = fc - (fp_t_ts)0.5 * B; // start freq [Hz]
const fp_t_ts k = (tau != (fp_t_ts)0 ? B / tau : (fp_t_ts)0); // Hz/s
// Phase: 2π( f0*tt + 0.5*k*tt^2 )
const fp_t_ts phase = (fp_t_ts)(2.0 * Pi) * (f0 * tt + (fp_t_ts)0.5 * k * tt * tt);
const fp_t_ts s = sin(phase);
IncidExcit = Emag * s;
}
else
{
// Outside the chirp window
IncidExcit = (fp_t_ts)0.0;
}
}
break;
default:
break;
}
}
// Computes nxH_inc and nxnxE_inc at a quadrature point.
// Inputs:
// t,to,tau,freq_m,TimeDistributionFlag,CHIRP_BW_MHZ : time envelope
// exci_prop : geometry & parameters for the selected port type
// n : port unit normal (propagation dir), length=1
// t1,t2 : orthonormal tangential basis on the port plane
// r : 3D position of the current quadrature point
//
// Outputs:
// nxHinc[3] = n × H_inc
// nxnxEinc[3] = n × (n × E_inc)
__device__ void updateinc_PORT(fp_t_ts t, const ExcitationProp& exci_prop,
const fp_t_ts r[3], const fp_t_ts *normalvtr, fp_t_ts* nxField,
fp_t_ts* nxnxField, int mode)
{
fp_t_ts t1[3] = {exci_prop.t1[0],exci_prop.t1[1],exci_prop.t1[2]};
fp_t_ts t2[3] = {exci_prop.t2[0],exci_prop.t2[1],exci_prop.t2[2]};
fp_t_ts nn[3] = {exci_prop.PortDirection[0],exci_prop.PortDirection[1],exci_prop.PortDirection[2]};
fp_t_ts n[3];
normed(nn, n);
// ===== 0) Constants & medium =====
const fp_t_ts pi = (fp_t_ts)3.14159265358979323846;
// ----- 1) Time envelope (scalar) -----
fp_t_ts IncidExcit = (fp_t_ts)1.0;
fp_t_ts Emag = (fp_t_ts)1.0; // you can feed a magnitude if needed
TimeModulationInc(exci_prop.TimeDistributionFlag, t, exci_prop.to, exci_prop.freq_m,
Emag, exci_prop.tau, exci_prop.CHIRP_BW_MHZ, IncidExcit);
// ----- 2) Build tangential E_inc shape per PortFlag -----
fp_t_ts E_tan[3] = {0,0,0};
// TE_mn geometry parameters (only meaningful for PortFlag==3)
const int m = exci_prop.m;
const int nmn = exci_prop.n; // avoid name clash with normal 'n'
const fp_t_ts a_rect = (exci_prop.rect_a > (fp_t_ts)0 ? exci_prop.rect_a : (fp_t_ts)1e-12);
const fp_t_ts b_rect = (exci_prop.rect_b > (fp_t_ts)0 ? exci_prop.rect_b : (fp_t_ts)1e-12);
// Coax parameters (only meaningful for PortFlag==2)
fp_t_ts r0[3] = {exci_prop.r0_port[0], exci_prop.r0_port[1], exci_prop.r0_port[2]};
fp_t_ts r1[3] = {exci_prop.r1_port[0], exci_prop.r1_port[1], exci_prop.r1_port[2]};
fp_t_ts r2[3] = {exci_prop.r2_port[0], exci_prop.r2_port[1], exci_prop.r2_port[2]};
if (exci_prop.PortFlag == 1)
{
// ================= TEM rectangular =================
// E is uniform along vpath projected into the port plane.
fp_t_ts ehat[3];
normed(exci_prop.vpath, ehat);
E_tan[0] = ehat[0]*Emag*IncidExcit;
E_tan[1] = ehat[1]*Emag*IncidExcit;
E_tan[2] = ehat[2]*Emag*IncidExcit;
//printf("Etan = %0.6f,%0.6f,%0.6f\n", E_tan[0], E_tan[1], E_tan[2]);
}
else if (exci_prop.PortFlag == 2)
{
// ================= TEM coax =========================
// E(r) = (V / (rho * ln(b/a))) * rho_hat (in port plane)
// Use Emag as V (scales overall level). Region: a <= rho <= b.
// --- TEM coax: E = (V / (rho ln(b/a))) * rho_hat on a<=rho<=b ---
fp_t_ts v10[3], v20[3]; sub3(r1,r0,v10); sub3(r2,r0,v20);
fp_t_ts v10p[3], v20p[3]; proj_to_plane(v10,n,v10p); proj_to_plane(v20,n,v20p);
const fp_t_ts a = norm3(v10p);
const fp_t_ts b = norm3(v20p);
fp_t_ts rp0[3], s[3]; sub3(r, r0, rp0); proj_to_plane(rp0, n, s);
const fp_t_ts rho = norm3(s);
if (rho >= a && rho <= b && a > (fp_t_ts)0 && b > a)
{
fp_t_ts sr_hat[3]; normed(s, sr_hat);
const fp_t_ts amp = Emag*IncidExcit / (rho * logf(b/a));
E_tan[0] = sr_hat[0]*amp;
E_tan[1] = sr_hat[1]*amp;
E_tan[2] = sr_hat[2]*amp;
}
}
else
{
// ================= TE_mn rectangular (PortFlag == 3) ==============
// Local coords: u in [0,a] along t1, v in [0,b] along t2 from uv0.
// Shape (time-domain real form, scaled by Emag*IncidExcit):
// E_u ∝ (nπ/b) * cos(mπ u/a) * sin(nπ v/b)
// E_v ∝ −(mπ/a) * sin(mπ u/a) * cos(nπ v/b)
// (signs chosen to be consistent with ∇·E_t = 0; overall scale via Emag)
// H_t for BC is supplied by Zport you set; here we’ll just use H = (1/Zport) n×E.
// Local (u,v) from uv0
fp_t_ts uv0[3] = {exci_prop.uv0[0], exci_prop.uv0[1], exci_prop.uv0[2]};
fp_t_ts drv[3]; sub3(r, uv0, drv);
const fp_t_ts u = dot3(drv, t1);
const fp_t_ts v = dot3(drv, t2);
const fp_t_ts cu = cosf((fp_t_ts)m * pi * u / a_rect);
const fp_t_ts su = sinf((fp_t_ts)m * pi * u / a_rect);
const fp_t_ts cv = cosf((fp_t_ts)nmn * pi * v / b_rect);
const fp_t_ts sv = sinf((fp_t_ts)nmn * pi * v / b_rect);
const fp_t_ts mu_u = (nmn!=0) ? ((fp_t_ts)nmn*pi/b_rect) : (fp_t_ts)0;
const fp_t_ts mu_v = (m !=0) ? ((fp_t_ts)m *pi/a_rect) : (fp_t_ts)0;
fp_t_ts Eu = ( mu_u ) * ( cu * sv ) * (Emag*IncidExcit);
fp_t_ts Ev = -( mu_v ) * ( su * cv ) * (Emag*IncidExcit);
// back to 3D
E_tan[0] = Eu*t1[0] + Ev*t2[0];
E_tan[1] = Eu*t1[1] + Ev*t2[1];
E_tan[2] = Eu*t1[2] + Ev*t2[2];
}
// ----- 3) Zport (implicit calc if exci_prop.PortImpedance <= 0) -----
fp_t_ts Zport = exci_prop.PortImpedance;
//printf("Zport = %0.6e\n",Zport);
// ===== 4) One cross, then reuse for both outputs =====
fp_t_ts H_tan[3];
// Calculate Magnetic Field
crossP3(n, E_tan, H_tan);
const fp_t_ts invZ = 1.0 / Zport;
scale3(H_tan, invZ);
fp_t_ts work[3];
if (mode == 1)
{
// nxHinc
crossP3(normalvtr, H_tan, nxField);
// nxnxEinc
crossP3(normalvtr, E_tan, work);
crossP3(normalvtr, work, nxnxField);
}
// nxnxHinc and nxEinc
else
{
// nxEinc
crossP3(normalvtr, E_tan, nxField);
// nxnxHinc
crossP3(normalvtr, H_tan, work);
crossP3(normalvtr, work, nxnxField);
}
// cleanup
for (int i=0;i<3;++i)
{
if (fabs(nxField[i]) < (fp_t_ts)1e-12) nxField[i] = 0.0;
if (fabs(nxnxField[i]) < (fp_t_ts)1e-12) nxnxField[i] = 0.0;
}
}
// Port-only version of H-field excitation
__global__ void addExcitationH_port( int* ExcitationFacesCnt,
int* ExcitationFacesOffset,
int* ExcitationFacesNum,
int excitationCNT,
int8_t* map,
ExcitationProp* exci_prop_vec,
int* PortFacePidx_d,
int PolyFlag,
fp_t_ts dt_muo, fp_t_ts t,
fp_t_ts* nd_coords_tet,
fp_t_ts* nd_coords_face,
fp_t_ts* InvMat,
fp_t_ts* Field)
{
int tet_id = blockIdx.x * blockDim.x + threadIdx.x;
if (tet_id < excitationCNT)
{
//printf("ExcitationFacesOffset = %d\n",ExcitationFacesOffset[tet_id]);
const int dim = TetPolyOrderDim[PolyFlag];
const int fdim = FacePolyOrderDim[PolyFlag];
const int Q = GAUSS_POINT_NUM[PolyFlag];
//-----TODO: Make this part dynamic----------------------------
const int TetPolyOrderDim2 = TetPolyOrderDim[3];
const int FacePolyOrderDim2 = FacePolyOrderDim[3];
fp_t_ts nxe_inc[TetPolyOrderDim2] = {0};
fp_t_ts nxnxh_inc[TetPolyOrderDim2] = {0};
//-------------------------------------------------------------
fp_t_ts Z = 0.0;
for (int i = 0; i < ExcitationFacesCnt[tet_id]; i++)
{
//printf("i = %d | limit = %d \n",i,ExcitationFacesCnt[tet_id]);
int face_id = ExcitationFacesOffset[tet_id] + i;
//printf("face_id = %d\n",face_id);
int port_id = PortFacePidx_d[face_id];
ExcitationProp exci_prop = exci_prop_vec[port_id];
Z = exci_prop.PortImpedance;
fp_t_ts nvtr[2][3];
fp_t_ts normvtr[3] = {0};
fp_t_ts area = 0;
fp_t_ts nxentry;
fp_t_ts nxnxentry;
fp_t_ts z[3], w[3], r[3];
fp_t_ts wgt;
//-----------------TODO: Make this part dynamic--------------------------------------------
fp_t_ts nxelem[FacePolyOrderDim2] = {0};
fp_t_ts nxnxelem[FacePolyOrderDim2] = {0};
//-----------------------------------------------------------------------------------------
fp_t_ts nxE_inc_[3];
fp_t_ts nxnxH_inc_[3];
/*
if (face_id == 7)
{
printf("%0.6e, %0.6e, %0.6e %0.6e, %0.6e, %0.6e %0.6e, %0.6e, %0.6e\n",
nd_coords_face[3 * 3 * face_id],nd_coords_face[3 * 3 * face_id+1],nd_coords_face[3 * 3 * face_id+2],
nd_coords_face[3 * 3 * face_id+3],nd_coords_face[3 * 3 * face_id+4],nd_coords_face[3 * 3 * face_id+5],
nd_coords_face[3 * 3 * face_id+6],nd_coords_face[3 * 3 * face_id+7],nd_coords_face[3 * 3 * face_id+8]);
}
*/
geometry2(&nd_coords_face[3 * 3 * face_id], &nd_coords_tet[4 * 3 * tet_id], normvtr, nvtr, &area);
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++)
{
nxentry = 0;
nxnxentry = 0;
for (int k = 0; k < Q; k++)
{
quadObtention(Q, k, z[0], z[1], z[2], wgt);
faceBasisW(&nd_coords_face[3 * 3 * face_id], z, j, w);
getCartesianCoord(&nd_coords_face[3 * 3 * face_id], z, r);
updateinc_PORT(t, exci_prop, r, normvtr, nxE_inc_, nxnxH_inc_, 2);
if (!(isnan(nxE_inc_[0])))
{
nxentry += area * wgt * dotP3131(w, nxE_inc_);
nxnxentry += area * wgt * dotP3131(w, nxnxH_inc_);
}
/*
if (tet_id == 6)
{
printf("area = %0.6e | wgt = %0.6e | w = %0.6e,%0.6e,%0.6e | nxE_inc = %0.6e,%0.6e,%0.6e | nxnxH_inc = %0.6e,%0.6e,%0.6e \n",
area,wgt, w[0], w[1], w[2],nxE_inc_[0],nxE_inc_[1],nxE_inc_[2],nxnxH_inc_[0],nxnxH_inc_[1],nxnxH_inc_[2]);
}
*/
}
nxelem[j] += nxentry;
nxnxelem[j] += nxnxentry;
}
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++)
{
// printf("%d\n", ExcitationFacesNum[face_id]);
nxe_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxelem[j];
nxnxh_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxnxelem[j];
}
}
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++)
{
/*
if (tet_id == 3)
{
printf("tet=%d i=%d | nxe_inc=%.6e | dt_muo=%.6e Z=%.6e | nxnxh=%.6e | map[%d]=%d\n",
tet_id, i, (double)nxe_inc[i], (double)dt_muo, (double)Z,
(double)nxnxh_inc[i], map[tet_id * TetPolyOrderDim[PolyFlag] + i]);
}
*/
nxe_inc[i] *= -dt_muo;
nxe_inc[i] += (-Z * dt_muo) * nxnxh_inc[i];
nxe_inc[i] *= map[tet_id * TetPolyOrderDim[PolyFlag] + i];
}
fp_t_ts sum;
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++){
sum = 0.0;
for (int j = 0; j < TetPolyOrderDim[PolyFlag]; j++)
{
int index1 = tet_id * (TetPolyOrderDim[PolyFlag] * TetPolyOrderDim[PolyFlag]) + j * TetPolyOrderDim[PolyFlag] + i;
int index2 = tet_id * TetPolyOrderDim[PolyFlag] + i;
sum += InvMat[index1] * nxe_inc[j] * map[index2];
/*
if (index2 == 0)
{
printf("INDEX1 = %d | INDEX2 = %d | map[index2] = %d | nxe_inc = %0.6e\n",index1,index2,map[index2],nxe_inc[j]);
}
*/
}
Field[tet_id * TetPolyOrderDim[PolyFlag] + i] += sum;
/*
if (ExcitationFacesOffset[tet_id] == 7)
{
printf("SUM = %0.6e | ind = %d \n",sum, tet_id * TetPolyOrderDim[PolyFlag] + i);
}
*/
}
}
}
__global__ void addExcitationE_port( int* ExcitationFacesCnt,
int* ExcitationFacesOffset,
int* ExcitationFacesNum,
int excitationCNT,
int8_t* map,
ExcitationProp* exci_prop_vec,
int* PortFacePidx_d,
int PolyFlag,
fp_t_ts dt_epsilon, fp_t_ts t,
fp_t_ts* nd_coords_tet,
fp_t_ts* nd_coords_face,
fp_t_ts* InvMat,
fp_t_ts* Field )
{
int tet_id = blockIdx.x * blockDim.x + threadIdx.x;
if (tet_id < excitationCNT)
{
//-----TODO: Make this part dynamic----------------------------
const int TetPolyOrderDim2 = TetPolyOrderDim[3];
const int FacePolyOrderDim2 = FacePolyOrderDim[3];
fp_t_ts nxh_inc[TetPolyOrderDim2] = {0};
fp_t_ts nxnxe_inc[TetPolyOrderDim2] = {0};
//-------------------------------------------------------------
fp_t_ts Y = 0.0;
for (int i = 0; i < ExcitationFacesCnt[tet_id]; i++)
{
int face_id = ExcitationFacesOffset[tet_id] + i;
int port_id = PortFacePidx_d[face_id];
ExcitationProp exci_prop = exci_prop_vec[port_id];
Y = 1.0 / exci_prop.PortImpedance;
fp_t_ts nvtr[2][3];
fp_t_ts normvtr[3] = {0};
fp_t_ts area = 0;
fp_t_ts nxentry;
fp_t_ts nxnxentry;
fp_t_ts z[3], w[3], r[3];
fp_t_ts wgt;
//-----------------TODO: Make this part dynamic--------------------------------------------
fp_t_ts nxelem[FacePolyOrderDim2] = {0};
fp_t_ts nxnxelem[FacePolyOrderDim2] = {0};
//-----------------------------------------------------------------------------------------
fp_t_ts nxH_inc_[3];
fp_t_ts nxnxE_inc_[3];
geometry2(&nd_coords_face[3 * 3 * face_id], &nd_coords_tet[4 * 3 * tet_id], normvtr, nvtr, &area);
const int Q = GAUSS_POINT_NUM[PolyFlag];
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++)
{
nxentry = 0;
nxnxentry = 0;
for (int k = 0; k < Q; k++)
{
quadObtention(Q, k, z[0], z[1], z[2], wgt);
faceBasisW(&nd_coords_face[3 * 3 * face_id], z, j, w);
getCartesianCoord(&nd_coords_face[3 * 3 * face_id], z, r);
updateinc_PORT(t, exci_prop, r, normvtr, nxH_inc_, nxnxE_inc_, 1);
if (!(isnan(nxH_inc_[0])))
{
nxentry += area * wgt * dotP3131(w, nxH_inc_);
nxnxentry += area * wgt * dotP3131(w, nxnxE_inc_);
}
}
nxelem[j] += nxentry;
nxnxelem[j] += nxnxentry;
}
for (int j = 0; j < FacePolyOrderDim[PolyFlag]; j++)
{
nxh_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxelem[j];
nxnxe_inc[FAC2TET[ExcitationFacesNum[face_id]][j]] += nxnxelem[j];
}
}
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++)
{
nxh_inc[i] *= dt_epsilon;
nxh_inc[i] += (-Y * dt_epsilon) * nxnxe_inc[i];
nxh_inc[i] *= map[tet_id * TetPolyOrderDim[PolyFlag] + i];
}
fp_t_ts sum;
for (int i = 0; i < TetPolyOrderDim[PolyFlag]; i++)
{
sum = 0.0;
for (int j = 0; j < TetPolyOrderDim[PolyFlag]; j++)
{
sum += InvMat[tet_id * (TetPolyOrderDim[PolyFlag] * TetPolyOrderDim[PolyFlag]) + j * TetPolyOrderDim[PolyFlag] + i] * nxh_inc[j] * map[tet_id * TetPolyOrderDim[PolyFlag] + i];
}
Field[tet_id * TetPolyOrderDim[PolyFlag] + i] += sum;
//printf("Sum = %0.6e \n",sum);
}
}
}
#endif
#endif // DGTD_KERNELS_CU