#include #include #include "mesh3d.h" #include "group.h" #include "float_regcvtr.h" #include "Constants.h" #include "proc.h" #include "global.h" #include "fftw3.h" #include "integral.h" #include "transform.h" #include extern vtr O; void InitFFTW() { fftwf_init_threads(); fftwf_plan_with_nthreads(omp_get_max_threads()); } void FastFarField( group **SVDgroup, int groupNum, double ko, regcvtr *regJ, regcvtr *regM, int numTheta, int numPhi, cVtr *MainField, bool Pec ) { group *grp; int nGroupTri; double d; int i, offset, Lp; double theta, phi; vtr Op; cVtr NPole; fftwf_complex *tmp_data; grp = SVDgroup[groupNum]; Op = grp->GetGroupCenter(); // load the partition triangles and currents nGroupTri = grp->GetnElement(); for ( i = 0; i < nGroupTri; i++ ) { groupTriArray[i] = grp->GetElemArray( i ); int idx = groupTriArray[i]->getNum(); groupRegJ[i] = regJ[idx]; if (!Pec) groupRegM[i] = regM[idx]; } // calculate partition sampling rate d = 2.0*grp->GetRadius(); Lp = int( ko*d+5.0*log(ko*d+Pi) ); int sizeTheta = 2*Lp+1; int sizePhi = 2*Lp; // there's no sense in sampleing at a higher rate than the user wants, // so take the lesser of the two sizePhi = (sizePhi > numPhi) ? numPhi : sizePhi; sizeTheta = (Lp > numTheta) ? 2*numTheta : sizeTheta; int coeffSize = sizeTheta*sizePhi; double deltaTheta = 360.0 / sizeTheta; double deltaPhi = 360.0 / sizePhi; if (Pec) NPole = Near2FarTransform(nGroupTri, ko, Op, 0.0, 0.0, groupRegJ, groupTriArray ); else NPole = Near2FarTransform(nGroupTri, ko, Op, 0.0, 0.0, groupRegJ, groupRegM, groupTriArray ); #pragma omp parallel for default(none) private(theta,phi) \ shared(sizeTheta, deltaTheta, deltaPhi, sizePhi, ko, Pec, Op, \ nGroupTri, groupRegJ, groupRegM, groupTriArray, Epart) for ( i = 1; i < sizeTheta/2+1; i++ ) { theta = double(i) * deltaTheta; for ( int j = 0; j < sizePhi; j++ ) { phi = double(j) * deltaPhi; int k = (i-1)*sizePhi+j; if (Pec) Epart[k] = Near2FarTransform(nGroupTri, ko, Op, theta, phi, groupRegJ, groupTriArray ); else Epart[k] = Near2FarTransform(nGroupTri, ko, Op, theta, phi, groupRegJ, groupRegM, groupTriArray ); } // end of phi loop } // end of theta loop // In order to do a Fourier transform along the theta and phi axes, both // need to be 2pi-periodic. Therefore the sample must be extended to theta // in the range (180,360), based on the values already calculated for the // range [0,180]. // x component // copy theta = 0 samples for ( i = 0; i < sizePhi; i++ ) { tmp_x[i][0] = NPole.x.real(); tmp_x[i][1] = NPole.x.imag(); } // copy theta in [0,180] samples offset = sizePhi; for ( i = offset; i < (sizeTheta/2+1)*sizePhi; i++ ) { tmp_x[i][0] = Epart[i-offset].x.real(); tmp_x[i][1] = Epart[i-offset].x.imag(); } // copy theta in (180,360) samples offset = (sizeTheta/2+1)*sizePhi; for ( i = 0; i < (sizeTheta-1)/2; i++ ) { for ( int j = 0; j < sizePhi/2; j++ ) { tmp_x[offset+sizePhi*i+j][0] = tmp_x[offset-sizePhi*(i+1)+sizePhi/2+j][0]; tmp_x[offset+sizePhi*i+j][1] = tmp_x[offset-sizePhi*(i+1)+sizePhi/2+j][1]; tmp_x[offset+sizePhi*i+sizePhi/2+j][0] = tmp_x[offset-sizePhi*(i+1)+j][0]; tmp_x[offset+sizePhi*i+sizePhi/2+j][1] = tmp_x[offset-sizePhi*(i+1)+j][1]; } } // repeat for y component for ( i = 0; i < sizePhi; i++ ) { tmp_y[i][0] = NPole.y.real(); tmp_y[i][1] = NPole.y.imag(); } offset = sizePhi; for ( i = offset; i < (sizeTheta/2+1)*sizePhi; i++ ) { tmp_y[i][0] = Epart[i-offset].y.real(); tmp_y[i][1] = Epart[i-offset].y.imag(); } offset = (sizeTheta/2+1)*sizePhi; for ( i = 0; i < (sizeTheta-1)/2; i++ ) { for ( int j = 0; j < sizePhi/2; j++ ) { tmp_y[offset+sizePhi*i+j][0] = tmp_y[offset-sizePhi*(i+1)+sizePhi/2+j][0]; tmp_y[offset+sizePhi*i+j][1] = tmp_y[offset-sizePhi*(i+1)+sizePhi/2+j][1]; tmp_y[offset+sizePhi*i+sizePhi/2+j][0] = tmp_y[offset-sizePhi*(i+1)+j][0]; tmp_y[offset+sizePhi*i+sizePhi/2+j][1] = tmp_y[offset-sizePhi*(i+1)+j][1]; } } // repeat z component for ( i = 0; i < sizePhi; i++ ) { tmp_z[i][0] = NPole.z.real(); tmp_z[i][1] = NPole.z.imag(); } offset = sizePhi; for ( i = offset; i < (sizeTheta/2+1)*sizePhi; i++ ) { tmp_z[i][0] = Epart[i-offset].z.real(); tmp_z[i][1] = Epart[i-offset].z.imag(); } offset = (sizeTheta/2+1)*sizePhi; for ( i = 0; i < (sizeTheta-1)/2; i++ ) { for ( int j = 0; j < sizePhi/2; j++ ) { tmp_z[offset+sizePhi*i+j][0] = tmp_z[offset-sizePhi*(i+1)+sizePhi/2+j][0]; tmp_z[offset+sizePhi*i+j][1] = tmp_z[offset-sizePhi*(i+1)+sizePhi/2+j][1]; tmp_z[offset+sizePhi*i+sizePhi/2+j][0] = tmp_z[offset-sizePhi*(i+1)+j][0]; tmp_z[offset+sizePhi*i+sizePhi/2+j][1] = tmp_z[offset-sizePhi*(i+1)+j][1]; } } UpdateMainField( Lp, ko, MainField, numTheta, numPhi, Op ); } void UpdateMainField ( int L, double ko, cVtr *MainField, int numTheta, int numPhi, vtr Op ) { int i, j, k; double theta, phi; vtr rVtr; Complex cval; int sizeTheta = 2*numTheta; int sizePhi = numPhi; int coeffSize = sizeTheta*sizePhi; int partSizeTheta = 2*L+1; int partSizePhi = 2*L; partSizePhi = (partSizePhi > numPhi) ? numPhi : partSizePhi; partSizeTheta = ( L > numTheta) ? 2*numTheta : partSizeTheta; int partCoeffSize = partSizeTheta*partSizePhi; // x component memcpy( field, tmp_x, sizeof(fftwf_complex)*partCoeffSize ); // interpolate along theta direction TrigInterp( field, partSizeTheta, partSizePhi, sizeTheta, out ); for ( i = 0; i < sizeTheta*partSizePhi; i++ ) { field[i][0] = out[i][0]; field[i][1] = out[i][1]; } // interpolate along phi direction Transpose( field, sizeTheta, partSizePhi, workspace ); TrigInterp( field, partSizePhi, sizeTheta, sizePhi, out ); for ( i = 0; i < sizeTheta*sizePhi; i++ ) { field[i][0] = out[i][0]; field[i][1] = out[i][1]; } Transpose( field, sizePhi, sizeTheta, workspace ); // add partition field to the total field, taking into account phase shift k = 0; for ( i = 0; i < sizeTheta/2+1; i++ ) { theta = 360.0*double(i)/double(sizeTheta); for ( j = 0; j < sizePhi; j++ ) { phi = 360.0*double(j) / double(sizePhi); rVtr = RDirection( theta, phi ); cval = Complex(0.0f, ko*dotP(Op-O,rVtr)); cval = exp( cval ); MainField[k].x += cval*Complex( field[k][0], field[k][1] ); k++; } } // repeat for y component memcpy( field, tmp_y, sizeof(fftwf_complex)*partCoeffSize ); TrigInterp( field, partSizeTheta, partSizePhi, sizeTheta, out ); for ( i = 0; i < sizeTheta*partSizePhi; i++ ) { field[i][0] = out[i][0]; field[i][1] = out[i][1]; } Transpose( field, sizeTheta, partSizePhi, workspace ); TrigInterp( field, partSizePhi, sizeTheta, sizePhi, out ); for ( i = 0; i < sizeTheta*sizePhi; i++ ) { field[i][0] = out[i][0]; field[i][1] = out[i][1]; } Transpose( field, sizePhi, sizeTheta, workspace ); k = 0; for ( i = 0; i < sizeTheta/2+1; i++ ) { theta = 360.0*double(i)/double(sizeTheta); for ( j = 0; j < sizePhi; j++ ) { phi = 360.0*double(j) / double(sizePhi); rVtr = RDirection( theta, phi ); cval = Complex(0.0f, ko*dotP(Op-O,rVtr)); cval = exp( cval ); MainField[k].y += cval*Complex( field[k][0], field[k][1] ); k++; } } // repeat z component memcpy( field, tmp_z, sizeof(fftwf_complex)*partCoeffSize ); TrigInterp( field, partSizeTheta, partSizePhi, sizeTheta, out ); for ( i = 0; i < sizeTheta*partSizePhi; i++ ) { field[i][0] = out[i][0]; field[i][1] = out[i][1]; } Transpose( field, sizeTheta, partSizePhi, workspace ); TrigInterp( field, partSizePhi, sizeTheta, sizePhi, out ); for ( i = 0; i < sizeTheta*sizePhi; i++ ) { field[i][0] = out[i][0]; field[i][1] = out[i][1]; } Transpose( field, sizePhi, sizeTheta, workspace ); k = 0; for ( i = 0; i < sizeTheta/2+1; i++ ) { theta = 360.0*double(i)/double(sizeTheta); for ( j = 0; j < sizePhi; j++ ) { phi = 360.0*double(j) / double(sizePhi); rVtr = RDirection( theta, phi ); cval = Complex(0.0f, ko*dotP(Op-O,rVtr)); cval = exp( cval ); MainField[k].z += cval*Complex( field[k][0], field[k][1] ); k++; } } } // Given a matrix stored in row-major format, calcualte the matrix transpose void Transpose ( fftwf_complex *in, int N, int M, fftwf_complex *workspace ) { fftwf_complex *ptr = in, *ws = workspace; for ( int i = 0; i < M; i ++ ) { ptr = in + i; for ( int j = 0; j < N; j++ ) { (*ws)[0] = (*ptr)[0]; (*ws)[1] = (*ptr)[1]; ptr += M; ws++; } } memcpy( in, workspace, sizeof(fftwf_complex)*N*M ); } // Given an N by M matrix in row-major format, perform trigonometric // interpolation column-wise, resulting in an nN by M matrix. void TrigInterp ( fftwf_complex * in, int N, int M, int nN, fftwf_complex * out ) { fftwf_plan fftPlan, ifftPlan; int size1 = N, size2 = nN; int nyquist = (int) ceil(float(N+1)/2.0f); fftPlan = fftwf_plan_many_dft( 1, &size1, M, in, NULL, M, 1, out, NULL, M, 1, FFTW_FORWARD, FFTW_ESTIMATE ); ifftPlan = fftwf_plan_many_dft( 1, &size2, M, out, NULL, M, 1, out, NULL, M, 1, FFTW_BACKWARD, FFTW_MEASURE ); memset( out, 0, sizeof(fftwf_complex)*nN*M ); fftwf_execute( fftPlan ); if ( N == nN ) goto ifft; memcpy(out+M*(nyquist+nN-N), out+M*nyquist, sizeof(fftwf_complex)*M*(N-nyquist) ); memset(out+M*nyquist, 0, sizeof(fftwf_complex)*M*(N-nyquist) ); if ( (N % 2) == 0 ) { for ( int i = 0; i < M; i++ ) { out[M*(nyquist-1)+i][0] /= 2.0; out[M*(nyquist-1)+i][1] /= 2.0; out[M*(nyquist+nN-N-1)+i][0] = out[M*(nyquist-1)+i][0]; out[M*(nyquist+nN-N-1)+i][1] = out[M*(nyquist-1)+i][1]; } } ifft: fftwf_execute( ifftPlan ); double C = 1.0/double(N); int k = 0; for ( int i = 0; i < M; i++ ) { for ( int j = 0; j < nN; j++ ) { out[k][0] *= C; out[k][1] *= C; k++; } } }