TimeDomainRCS/n2f_FAST/fast_field.cc

#include <stdio.h>
#include <string.h>

#include "mesh3d.h"
#include "group.h"
#include "float_regcvtr.h"
#include "Constants.h"
#include "proc.h"
#include "global.h"
#include "fftw3.h"
#include "integral.h"
#include "transform.h"

#include <omp.h>

extern vtr O;

void InitFFTW()
{
	fftwf_init_threads();
	fftwf_plan_with_nthreads(omp_get_max_threads());
}

void FastFarField( group<tri3d> **SVDgroup, int groupNum, double ko,
		   regcvtr *regJ, regcvtr *regM, int numTheta, int numPhi,
		   cVtr *MainField, bool Pec )
{
  group<tri3d> *grp;
  int nGroupTri;

  double d;
  int i, offset, Lp;
  double theta, phi;
  vtr Op;

  cVtr NPole;
  fftwf_complex *tmp_data;

  grp = SVDgroup[groupNum];
  Op = grp->GetGroupCenter();

  // load the partition triangles and currents
  nGroupTri = grp->GetnElement();
  for ( i = 0; i < nGroupTri; i++ ) {
    groupTriArray[i] = grp->GetElemArray( i );
    int idx = groupTriArray[i]->getNum();
    groupRegJ[i] = regJ[idx];
    if (!Pec) groupRegM[i] = regM[idx];
  }

  // calculate partition sampling rate
  d =	2.0*grp->GetRadius();
  Lp = int( ko*d+5.0*log(ko*d+Pi) );
  int sizeTheta = 2*Lp+1;
  int sizePhi = 2*Lp;

  // there's no sense in sampleing at a higher rate than the user wants,
  // so take the lesser of the two
  sizePhi = (sizePhi > numPhi) ? numPhi : sizePhi;
  sizeTheta = (Lp > numTheta) ? 2*numTheta  : sizeTheta;

  int coeffSize = sizeTheta*sizePhi;
  double deltaTheta = 360.0 / sizeTheta;
  double deltaPhi = 360.0 / sizePhi;

  if (Pec)
    NPole = Near2FarTransform(nGroupTri, ko, Op, 0.0, 0.0, groupRegJ,
			      groupTriArray );
  else
    NPole = Near2FarTransform(nGroupTri, ko, Op, 0.0, 0.0, groupRegJ,
			      groupRegM, groupTriArray );
#pragma omp parallel for default(none) private(theta,phi) \
	shared(sizeTheta, deltaTheta, deltaPhi, sizePhi, ko, Pec, Op, \
	       nGroupTri, groupRegJ, groupRegM, groupTriArray, Epart)
  for	( i = 1; i < sizeTheta/2+1; i++ )	{
    theta = double(i) * deltaTheta;
    for	( int j	= 0; j < sizePhi; j++ )	{
      phi = double(j) * deltaPhi;

	  int k = (i-1)*sizePhi+j;

     if (Pec) Epart[k] = Near2FarTransform(nGroupTri, ko, Op, theta, phi,
				     groupRegJ, groupTriArray );
      else Epart[k] = Near2FarTransform(nGroupTri, ko, Op, theta, phi,
				     groupRegJ, groupRegM, groupTriArray );
    } // end of	phi	loop
  } // end of	theta loop

  // In order to do a Fourier transform along the theta and phi axes, both
  // need to be 2pi-periodic.  Therefore the sample must be extended to theta
  // in the range (180,360), based on the values already calculated for the
  // range [0,180].

  // x component
  // copy theta = 0 samples
  for ( i = 0; i < sizePhi; i++ ) {
    tmp_x[i][0] = NPole.x.real();
    tmp_x[i][1] = NPole.x.imag();
  }

  // copy theta in [0,180] samples
  offset = sizePhi;
  for ( i = offset; i < (sizeTheta/2+1)*sizePhi; i++ ) {
    tmp_x[i][0] = Epart[i-offset].x.real();
    tmp_x[i][1] = Epart[i-offset].x.imag();
  }

  // copy theta in (180,360) samples
  offset = (sizeTheta/2+1)*sizePhi;
  for ( i = 0; i < (sizeTheta-1)/2; i++ ) {
    for ( int j = 0; j < sizePhi/2; j++ ) {
      tmp_x[offset+sizePhi*i+j][0] =
	tmp_x[offset-sizePhi*(i+1)+sizePhi/2+j][0];

      tmp_x[offset+sizePhi*i+j][1] =
	tmp_x[offset-sizePhi*(i+1)+sizePhi/2+j][1];

      tmp_x[offset+sizePhi*i+sizePhi/2+j][0] =
	tmp_x[offset-sizePhi*(i+1)+j][0];

      tmp_x[offset+sizePhi*i+sizePhi/2+j][1] =
	tmp_x[offset-sizePhi*(i+1)+j][1];
    }
  }

  // repeat for y component
  for ( i = 0; i < sizePhi; i++ ) {
    tmp_y[i][0] = NPole.y.real();
    tmp_y[i][1] = NPole.y.imag();
  }

  offset = sizePhi;
  for ( i = offset; i < (sizeTheta/2+1)*sizePhi; i++ ) {
    tmp_y[i][0] = Epart[i-offset].y.real();
    tmp_y[i][1] = Epart[i-offset].y.imag();
  }

  offset = (sizeTheta/2+1)*sizePhi;
  for ( i = 0; i < (sizeTheta-1)/2; i++ ) {
    for ( int j = 0; j < sizePhi/2; j++ ) {
      tmp_y[offset+sizePhi*i+j][0] =
	tmp_y[offset-sizePhi*(i+1)+sizePhi/2+j][0];

      tmp_y[offset+sizePhi*i+j][1] =
	tmp_y[offset-sizePhi*(i+1)+sizePhi/2+j][1];

      tmp_y[offset+sizePhi*i+sizePhi/2+j][0] =
	tmp_y[offset-sizePhi*(i+1)+j][0];

      tmp_y[offset+sizePhi*i+sizePhi/2+j][1] =
	tmp_y[offset-sizePhi*(i+1)+j][1];
    }
  }

  // repeat z component
  for ( i = 0; i < sizePhi; i++ ) {
    tmp_z[i][0] = NPole.z.real();
    tmp_z[i][1] = NPole.z.imag();
  }

  offset = sizePhi;
  for ( i = offset; i < (sizeTheta/2+1)*sizePhi; i++ ) {
    tmp_z[i][0] = Epart[i-offset].z.real();
    tmp_z[i][1] = Epart[i-offset].z.imag();
  }

  offset = (sizeTheta/2+1)*sizePhi;
  for ( i = 0; i < (sizeTheta-1)/2; i++ ) {
    for ( int j = 0; j < sizePhi/2; j++ ) {
      tmp_z[offset+sizePhi*i+j][0] =
	tmp_z[offset-sizePhi*(i+1)+sizePhi/2+j][0];

      tmp_z[offset+sizePhi*i+j][1] =
	tmp_z[offset-sizePhi*(i+1)+sizePhi/2+j][1];

      tmp_z[offset+sizePhi*i+sizePhi/2+j][0] =
	tmp_z[offset-sizePhi*(i+1)+j][0];

      tmp_z[offset+sizePhi*i+sizePhi/2+j][1] =
	tmp_z[offset-sizePhi*(i+1)+j][1];
    }
  }

  UpdateMainField( Lp, ko, MainField, numTheta, numPhi, Op );
}


void UpdateMainField
( int L, double ko, cVtr *MainField, int numTheta, int numPhi, vtr Op )
{
  int i, j, k;
  double theta, phi;
  vtr rVtr;
  Complex cval;

  int sizeTheta = 2*numTheta;
  int sizePhi = numPhi;
  int coeffSize = sizeTheta*sizePhi;

  int partSizeTheta = 2*L+1;
  int partSizePhi = 2*L;

  partSizePhi = (partSizePhi > numPhi) ? numPhi : partSizePhi;
  partSizeTheta = ( L > numTheta) ? 2*numTheta  :  partSizeTheta;

  int partCoeffSize = partSizeTheta*partSizePhi;

  // x component
  memcpy( field, tmp_x, sizeof(fftwf_complex)*partCoeffSize );

  // interpolate along theta direction
  TrigInterp( field, partSizeTheta, partSizePhi, sizeTheta, out );

  for ( i = 0; i < sizeTheta*partSizePhi; i++ ) {
    field[i][0] = out[i][0];
    field[i][1] = out[i][1];
  }

  // interpolate along phi direction
  Transpose( field, sizeTheta, partSizePhi, workspace );
  TrigInterp( field, partSizePhi, sizeTheta, sizePhi, out );

  for ( i = 0; i < sizeTheta*sizePhi; i++ ) {
    field[i][0] = out[i][0];
    field[i][1] = out[i][1];
  }

  Transpose( field, sizePhi, sizeTheta, workspace );

  // add partition field to the total field, taking into account phase shift
  k = 0;
  for ( i = 0; i < sizeTheta/2+1; i++ ) {
    theta = 360.0*double(i)/double(sizeTheta);
    for ( j = 0; j < sizePhi; j++ ) {
      phi = 360.0*double(j) / double(sizePhi);
      rVtr = RDirection( theta, phi );
      cval = Complex(0.0f, ko*dotP(Op-O,rVtr));
      cval = exp( cval );

      MainField[k].x += cval*Complex( field[k][0], field[k][1] );
      k++;
    }
  }

  // repeat for y component
  memcpy( field, tmp_y, sizeof(fftwf_complex)*partCoeffSize );

  TrigInterp( field, partSizeTheta, partSizePhi, sizeTheta, out );

  for ( i = 0; i < sizeTheta*partSizePhi; i++ ) {
    field[i][0] = out[i][0];
    field[i][1] = out[i][1];
  }

  Transpose( field, sizeTheta, partSizePhi, workspace );

  TrigInterp( field, partSizePhi, sizeTheta, sizePhi, out );

  for ( i = 0; i < sizeTheta*sizePhi; i++ ) {
    field[i][0] = out[i][0];
    field[i][1] = out[i][1];
  }

  Transpose( field, sizePhi, sizeTheta, workspace );

  k = 0;
  for ( i = 0; i < sizeTheta/2+1; i++ ) {
    theta = 360.0*double(i)/double(sizeTheta);
    for ( j = 0; j < sizePhi; j++ ) {
      phi = 360.0*double(j) / double(sizePhi);
      rVtr = RDirection( theta, phi );
      cval = Complex(0.0f, ko*dotP(Op-O,rVtr));
      cval = exp( cval );

      MainField[k].y += cval*Complex( field[k][0], field[k][1] );
      k++;
    }
  }

  // repeat z component
  memcpy( field, tmp_z, sizeof(fftwf_complex)*partCoeffSize );

  TrigInterp( field, partSizeTheta, partSizePhi, sizeTheta, out );

  for ( i = 0; i < sizeTheta*partSizePhi; i++ ) {
    field[i][0] = out[i][0];
    field[i][1] = out[i][1];
  }

  Transpose( field, sizeTheta, partSizePhi, workspace );

  TrigInterp( field, partSizePhi, sizeTheta, sizePhi, out );

  for ( i = 0; i < sizeTheta*sizePhi; i++ ) {
    field[i][0] = out[i][0];
    field[i][1] = out[i][1];
  }

  Transpose( field, sizePhi, sizeTheta, workspace );

  k = 0;
  for ( i = 0; i < sizeTheta/2+1; i++ ) {
    theta = 360.0*double(i)/double(sizeTheta);
    for ( j = 0; j < sizePhi; j++ ) {
      phi = 360.0*double(j) / double(sizePhi);
      rVtr = RDirection( theta, phi );
      cval = Complex(0.0f, ko*dotP(Op-O,rVtr));
      cval = exp( cval );

      MainField[k].z += cval*Complex( field[k][0], field[k][1] );
      k++;
    }
  }
}


// Given a matrix stored in row-major format, calcualte the matrix transpose
void Transpose
( fftwf_complex *in, int N, int M, fftwf_complex *workspace )
{
  fftwf_complex *ptr = in, *ws = workspace;

  for ( int i = 0; i < M; i ++ ) {
    ptr = in + i;
    for ( int j = 0; j < N; j++ ) {
      (*ws)[0] = (*ptr)[0];
      (*ws)[1] = (*ptr)[1];
      ptr += M;
      ws++;
    }
  }

  memcpy( in, workspace, sizeof(fftwf_complex)*N*M );
}

// Given an N by M matrix in row-major format, perform trigonometric
// interpolation column-wise, resulting in an nN by M matrix.
void TrigInterp
( fftwf_complex * in, int N, int M, int nN, fftwf_complex * out  )
{
  fftwf_plan fftPlan, ifftPlan;
  int size1 = N, size2 = nN;
  int nyquist = (int) ceil(float(N+1)/2.0f);

  fftPlan = fftwf_plan_many_dft( 1, &size1, M,
	    in, NULL, M, 1, out, NULL, M, 1, FFTW_FORWARD, FFTW_ESTIMATE );

  ifftPlan = fftwf_plan_many_dft( 1, &size2, M,
	     out, NULL, M, 1, out, NULL, M, 1, FFTW_BACKWARD, FFTW_MEASURE );

  memset( out, 0, sizeof(fftwf_complex)*nN*M );
  fftwf_execute( fftPlan );

  if ( N == nN ) goto ifft;
  memcpy(out+M*(nyquist+nN-N), out+M*nyquist,
	 sizeof(fftwf_complex)*M*(N-nyquist) );
  memset(out+M*nyquist, 0, sizeof(fftwf_complex)*M*(N-nyquist) );

  if ( (N % 2) == 0 ) {
    for ( int i = 0; i < M; i++ ) {
      out[M*(nyquist-1)+i][0] /= 2.0;
      out[M*(nyquist-1)+i][1] /= 2.0;
      out[M*(nyquist+nN-N-1)+i][0] = out[M*(nyquist-1)+i][0];
      out[M*(nyquist+nN-N-1)+i][1] = out[M*(nyquist-1)+i][1];
    }
  }

ifft:
  fftwf_execute( ifftPlan );

  double C = 1.0/double(N);
  int k = 0;
  for ( int i = 0; i < M; i++ ) {
    for ( int j = 0; j < nN; j++ ) {
      out[k][0] *= C;
      out[k][1] *= C;
      k++;
    }
  }
}