2D( 2k, 4k, 8k ) FFT (IFFT) source code in C99
#include <math.h>
#include <omp.h> // For CPUs with many cores like Ryzen 9 3900X and later
//////// omp.h =>OpenMP // In distributed computers, you should use OpenMPI.
#include <complex.h> // If use complex.h performance will spoil 10% or more,
// but readability is dramatically improved.
#define INT register int
#define DOUBLE register double
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#define Rotor(a) (double _Complex)( cos(a) + I*sin(a) )
enum { FFT=DFT, IFFT=IDFT };
void FFT_TwoDimensionalC99( DOUBLE _Complex **d, INT elements, INT flag ) {
INT x, y, lx=elements, ly=elements;
DOUBLE unit = (flag==IFFT) ? 2*M_PI/elements : -2*M_PI/elements;
double _Complex rotor[elements]; for(x=0;x<elements;rotor[x]=Rotor(unit*x),x++);
////// If the flag is FFT, unit=-2*M_PI/elements, if IFFT, unit=2*M_PI/elements.
#pragma omp parallel for // gcc -fopenmp -O3 (Strongly recommendation)
for( y=0; y<ly; y++ ) { // X軸方向の1次元FFT(IFFT)をy行分行う <=並列化
INT i, j, k, m, mh, ir;// ir: irev
DOUBLE _Complex W, temp; // W: Rotation factor
for( i=0, j=1; j<elements-1; j++ ) { // scrambler, for initial placement
for( k=elements>>1; k>(i^=k); k>>=1 );
if( j<i ){ temp=d[y][j], d[y][j]=d[y][i], d[y][i]=temp; } // exchange
}// Because in this pare (i,j), there are two, (a,b) or (b,a), cases.
for( mh=1; (m=mh<<1)<=elements; mh=m ) { // Operations
for( ir=0, i=0; i<elements; i+=m ) { // ir=0: Rotation angle is 0 rad.
for( W=rotor[ir], k=elements>>2; k>(ir^=k); k>>=1 ); //next ir is
for( j=i; j<mh+i; j++ ) { INT a = j, b = j + mh;
temp = d[y][a] - d[y][b]; // For butterfly operation
d[y][a] = d[y][a] + d[y][b]; // a ->- (a + b) ->- a
d[y][b] = temp * W; // b ->- (a-b)*W ->- b
} } }//WELL: DOUBLE reciprocal=1./elements; =>d[y][i++]*=reciprocal);
if(flag==IFFT)for( i=0; i<elements; d[y][i++]/=elements); //Normalization
} // X_END <= このブロック ( { ... } の部分 ) は、スレッドセーフ(thread-safe)
#pragma omp parallel for // gcc -fopenmp -O3 (Strongly recommendation)
for( x=0; x<lx; x++ ) { // Y軸方向の1次元FFT(IFFT)をx列分行う <=並列化
INT i, j, k, m, mh, ir;// ir: irev
DOUBLE _Complex W, temp; // W: Rotation factor
for( i=0, j=1; j<elements-1; j++ ) { // scrambler, for initial placement
for( k=elements>>1; k>(i^=k); k>>=1 );
if( j<i ){ temp=d[j][x], d[j][x]=d[i][x], d[i][x]=temp; } // exchange
}// Because in this pare (i,j), there are two, (a,b) or (b,a), cases.
for( mh=1; (m=mh<<1)<=elements; mh=m ) { // Operations
for( ir=0, i=0; i<elements; i+=m ) { // ir=0: Rotation angle is 0 rad.
for( W=rotor[ir], k=elements>>2; k>(ir^=k); k>>=1 ); //next ir is
for( j=i; j<mh+i; j++ ) { INT a = j, b = j + mh;
temp = d[a][x] - d[b][x]; // For butterfly operation
d[a][x] = d[a][x] + d[b][x]; // a ->- (a + b) ->- a
d[b][x] = temp * W; // b ->- (a-b)*W ->- b
} } }//WELL: DOUBLE reciprocal=1./elements; =>d[i++][x]*=reciprocal);
if(flag==IFFT)for( i=0; i<elements; d[i++][x]/=elements); //Normalization
} // Y_END <= このブロック ( { ... } の部分 ) は、スレッドセーフ(thread-safe)
}
// Ex. (i,j): (32,16), (16,32), (48,48), (64,8), (8,64), (72,72), (80,40), .....
////////////////////////////////////////////////////////////////////////
// How to use FFT_TwoDimensionalC99(...)
//
//extern int fft_check( int );
//void FFT_TwoDimensionalC99( double _Complex **d, int elements, int flag ) {
//if(!fft_check(elements)) return DFT_TwoDimensionalC99(d,elements,elements,flag);
//
int fft_check( int n ) {
if( n<1000 || 20000<n ) return 0;
for(;n>1;n>>=1) if(n&1) return 0;
return 1;
}
int fft_neo_check( int lx, int ly ) {// 2K, 4K, 8K サイズ対応用
if( lx!=ly || lx<2000 || 10000<lx ) return 0;
return fft_check(lx);
}
void dft_TwoDimensional( COMPLEX **s, int lx, int ly, int flag ) {
int w = lx>ly ? lx : ly;
COMPLEX **spreadsheet = NULL;
double _Complex **ss = (double _Complex **)s;
if(fft_neo_check(lx,ly)) return FFT_TwoDimensionalC99(ss,lx,flag);
if( (spreadsheet=(COMPLEX**)alloc2Darray(sizeof(COMPLEX),w,w)) ) {
dft_TwoDimensionalWithTable( spreadsheet, w, s, lx, ly, flag );
free(spreadsheet);
} else dft_TwoDimensionalStandard( s, lx, ly, flag );
}