///////////  Fourier Transform Full Source Code  ///////////////////////////////
////  Multi-threaded version of Fourier Transform source code for 2D data.  ////
////  コンパイル方法の例: gcc -fopenmp -O3 -Wall -W -Wextra -o 2DFT 2DFT.c -lm
//
//////// https://github.com/nothings/stb/blob/master/stb_image.h
#define  STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"  // To use stbi_load() and stbi_image_free()
#include <stdlib.h>
#include <stddef.h>

#pragma pack(push,1)
typedef struct PIXEL   { unsigned char B, G, R, A; } PIXEL ;
#pragma pack(pop)

#pragma pack(push,2)
typedef struct HEADER {       // BMP => bfType==0x4D42 (0x42,0x4D) little endian
    unsigned short bfType;          // identification  0x42='B', 0x4D='M'
    unsigned int   bfSize;          // ??
    unsigned short bfReserved1;     //  0
    unsigned short bfReserved2;     //  0
    unsigned int   bfOffBits;       // 54  sizeof(HEADER)
    ////////////////////////////////////////////////////////////////////////////
    unsigned int   biSize;          // 40 sizeof(biSize ~ biClrImportant)
    signed int     biWidth;         // lx
    signed int     biHeight;        // ly
    unsigned short biPlanes;        //  1
    unsigned short biBitCount;      // 32
    unsigned int   biCompression;   //  0
    unsigned int   biSizeImage;     // ??
    signed int     biXPelsPerMeter; //  0
    signed int     biYPelsPerMeter; //  0
    unsigned int   biClrUsed;       //  0
    unsigned int   biClrImportant;  //  0
    ////////////////////////////////////////////////////////////////////////////
    unsigned char  lut[];
} HEADER ;
#pragma pack(pop)

#define BMPID              0x4D42
#define BMPIFHSIZE         40
#define M_PI               3.14159265358979323846
#define IFNAMEDEFAULT      "sample.bmp"
#define OFNAMEDEFAULT      "result.bmp"
#define saveArray2bmp32(name,a,w,h)   \
                           saveDataAsBMP32((name),(&(a)[0][0]),(w),(h))

void **alloc2D( void *data, int s, int lx, int ly ){
  long long i, w=lx, h=ly;  if( !data || s<1 || lx<1 || ly<1 ) return NULL;
  void **d, *base=data ;

    if( (d=(void **)malloc(h*sizeof(void *))) == NULL ) return NULL ;
    for( i=0; i<ly; i++ ) d[i] = (void *)((unsigned char *)base + w*s*i) ;
    return d ;
}

void **alloc2Darray( int s, int lx, int ly ){
  long long i, w=lx, h=ly;  if(s<1 || lx<1 || ly<1) return NULL ;
  void **d, *base ;

    if( (d=(void **)calloc(1,h*sizeof(void *)+(w*h*s))) == NULL ) return NULL ;
    base = (unsigned char *)d + h*sizeof(void *) ;
    for( i=0; i<ly; i++ ) d[i] = (void *)((unsigned char *)base + w*s*i) ;
    return d ;
}

int saveDataAsBMP32( char *fname, void *b, int lx, int ly ){
  HEADER h = {BMPID, lx*abs(ly)*sizeof(PIXEL)+sizeof(HEADER), 0, 0, sizeof(HEADER),
              BMPIFHSIZE, lx, ly, 1, 32, 0, lx*abs(ly)*sizeof(PIXEL), 0, 0, 0, 0} ;
  FILE *fp=NULL ;
  int  st=-1;

    if( (fp=fopen(fname, "wb")) == NULL ) {
        fprintf( stderr, "File '%s': cannot create.\n", fname );
    } else if ( fwrite( &h, sizeof(HEADER), 1, fp ) != 1 ) {
        fprintf( stderr, "File '%s': Header write error.\n", fname );
    } else if ( fwrite( b, h.biSizeImage, 1, fp ) != 1 ) {
        fprintf( stderr, "File '%s': Data write error.\n", fname );
    } else st=0;
    if(fp)fclose(fp) ;
    return st;
}
////////////////////////////////////////////////////////////////////////////////
#include <math.h>
#include <omp.h>       // gcc -fopenmp -O3  ( -O3: Strongly recommendation )

typedef struct COMPLEX { double r, i; } COMPLEX;

enum { DFT, IDFT };

#define RotorCOMPLEX(a)    (COMPLEX){ cos(a), sin(a) }
#define AddCOMPLEX(a,b)    (COMPLEX){ (a).r + (b).r, (a).i + (b).i }
#define MulCOMPLEX(a,b)    (COMPLEX){ (a).r*(b).r - (a).i*(b).i, (a).r*(b).i + (a).i*(b).r }
#define ImeDivCOMPLEX(a,b) (COMPLEX){ (a).r / (b), (a).i / (b) }
#define ImeMulCOMPLEX(a,b) (COMPLEX){ (a).r * (b), (a).i * (b) }
#define AppendCOMPLEX(a,b) { (a).r += (b).r, (a).i += (b).i ; }

void dft_TwoDimensional( COMPLEX **d, int lx, int ly, int flag ) {
  int     x, y, elements;
  double  unit;
  COMPLEX rotor[(lx>ly?lx:ly)];

    elements = lx;  unit = (flag==IDFT) ? 2*M_PI/elements : -2*M_PI/elements;
    for( int e=0; e<elements; rotor[e]=RotorCOMPLEX(unit*e), e++ );
    #pragma omp parallel for
    for( y=0; y<ly; y++ ) { COMPLEX tmp[elements];//You need to be aware of the thread stack overflow.
        for(int u=0; u<elements; u++ ){ int v; COMPLEX t = (COMPLEX){0,0};
            for( v=0; v<elements; v++ ) AppendCOMPLEX( t, MulCOMPLEX(d[y][v],rotor[(u*v)%elements]) );
            tmp[u] = t;
        }   double reciprocal = 1./elements; // tmp[u]/elements -> tmp[u]*reciprocal
        if(flag==IDFT) for(int e=0; e<elements; e++ ) d[y][e] = ImeMulCOMPLEX(tmp[e],reciprocal);
        else           for(int e=0; e<elements; e++ ) d[y][e] = tmp[e];
    } //////////////////////////////////////////////////////////////////////////
    elements = ly;  unit = (flag==IDFT) ? 2*M_PI/elements : -2*M_PI/elements;
    if(lx!=ly){ for( int e=0; e<elements; rotor[e]=RotorCOMPLEX(unit*e), e++ ); }
    #pragma omp parallel for
    for( x=0; x<lx; x++ ) { COMPLEX tmp[elements];//You need to be aware of the thread stack overflow.
        for(int u=0; u<elements; u++ ){ int v; COMPLEX t = (COMPLEX){0,0};
            for( v=0; v<elements; v++ ) AppendCOMPLEX( t, MulCOMPLEX(d[v][x],rotor[(u*v)%elements]) );
            tmp[u] = t;
        }   double reciprocal = 1./elements; // tmp[u]/elements -> tmp[u]*reciprocal
        if(flag==IDFT) for(int e=0; e<elements; e++ ) d[e][x] = ImeMulCOMPLEX(tmp[e],reciprocal);
        else           for(int e=0; e<elements; e++ ) d[e][x] = tmp[e];
    } //////////////////////////////////////////////////////////////////////////
}

#include <complex.h>

#define INT       register int
#define DOUBLE    register double
#define Rotor(a)  (double _Complex)( cos(a) + I*sin(a) )

enum { FFT=DFT, IFFT=IDFT };

void FFT_TwoDimensionalC99( DOUBLE _Complex **d, INT elements, INT flag ) {
  INT     x, y,  lx=elements, ly=elements;
  DOUBLE  unit = (flag==IFFT) ? 2*M_PI/elements : -2*M_PI/elements;
  double _Complex rotor[elements]; for(x=0;x<elements;rotor[x]=Rotor(unit*x),x++);
  ////  If the flag is FFT, unit=-2*M_PI/elements, if IFFT, unit=2*M_PI/elements.

    #pragma omp parallel for
    for( y=0; y<ly; y++ ) {
        INT     i, j, k, m, mh, ir;  // ir: irev
        DOUBLE _Complex  W, temp;    //  W: Rotation factor
        for( i=0, j=1; j<elements-1; j++ ) {// scrambler, for initial placement
            for( k=elements>>1; k>(i^=k); k>>=1 );
            if( j<i ){ temp=d[y][j], d[y][j]=d[y][i], d[y][i]=temp; }// exchange
        }
        for( mh=1; (m=mh<<1)<=elements; mh=m ) { // Operations
            for( ir=0, i=0; i<elements; i+=m ) { // ir=0: Rotation angle is 0rad.
                for( W=rotor[ir], k=elements>>2; k>(ir^=k); k>>=1 );
                for( j=i; j<mh+i; j++ ) { INT a = j,  b = j + mh;
                      temp  = d[y][a] - d[y][b]; // For butterfly operation
                    d[y][a] = d[y][a] + d[y][b]; // a ->- (a + b) ->- a
                    d[y][b] = temp * W;          // b ->- (a-b)*W ->- b
        }   }   }   DOUBLE reciprocal=1./elements;
        if(flag==IFFT)for( i=0; i<elements; d[y][i++]*=reciprocal);
    }
    #pragma omp parallel for
    for( x=0; x<lx; x++ ) {
        INT     i, j, k, m, mh, ir;  // ir: irev
        DOUBLE _Complex  W, temp;    //  W: Rotation factor
        for( i=0, j=1; j<elements-1; j++ ) {// scrambler, for initial placement
            for( k=elements>>1; k>(i^=k); k>>=1 );
            if( j<i ){ temp=d[j][x], d[j][x]=d[i][x], d[i][x]=temp; }// exchange
        }
        for( mh=1; (m=mh<<1)<=elements; mh=m ) { // Operations
            for( ir=0, i=0; i<elements; i+=m ) { // ir=0: Rotation angle is 0rad.
                for( W=rotor[ir], k=elements>>2; k>(ir^=k); k>>=1 );
                for( j=i; j<mh+i; j++ ) { INT a = j,  b = j + mh;
                      temp  = d[a][x] - d[b][x]; // For butterfly operation
                    d[a][x] = d[a][x] + d[b][x]; // a ->- (a + b) ->- a
                    d[b][x] = temp * W;          // b ->- (a-b)*W ->- b
        }   }   }   DOUBLE reciprocal=1./elements;
        if(flag==IFFT)for( i=0; i<elements; d[i++][x]*=reciprocal);
    }
}


void interchange2D( COMPLEX **d, int lx, int ly ) {
  int  x, y, hlx=lx/2, hly=ly/2;

    for( y=0; y<hly; y++ ) for( x=0; x<hlx; x++ ){ COMPLEX t;
        t = d[y][x]; d[y][x] = d[hly +y][hlx +x]; d[hly +y][hlx +x] = t;
        t = d[y][hlx +x]; d[y][hlx +x] = d[hly +y][x]; d[hly +y][x] = t;
    }
}

void PowerSpectrum( char *fname, COMPLEX **u, PIXEL **a, int lx, int ly ) {
  int    x, y, gray;
  double temp, min, max;

    min=max=u[0][0].r*u[0][0].r + u[0][0].i*u[0][0].i;
    for( y=0; y<ly; y++ ) for( x=0; x<lx; x++ ) {
        if( max<(temp=u[y][x].r*u[y][x].r + u[y][x].i*u[y][x].i) ) max = temp;
        else if( min>temp ) min = temp;
    }   min = sqrt(min);  max = sqrt(max);
    double mag = 255.0 / (double)( max>min ? log10(max-min) : 1.0 );
    for( y=0; y<ly; y++ ) for( x=0; x<lx; x++ ) {
        gray = mag*log10(sqrt(u[y][x].r*u[y][x].r + u[y][x].i*u[y][x].i) -min);
        if(255<gray) gray=255; else if( gray<0 ) gray=0;
        a[y][x] = (PIXEL){gray,gray,gray,0};
    }   if(fname)saveArray2bmp32( fname, a, lx, ly );
}

int f_check( int lx, int ly ) {// 512 1024, 2K, 4K, 8K サイズ対応用
    if( lx!=ly || lx<500 || 10000<lx ) return 0;
    for(int n=lx;n>1;n>>=1) if(n&1)    return 0;
    return 1;
}// If you want to handle larger image, you may need to change the stack size

void ft_TwoDimensional( COMPLEX **s, int lx, int ly, int flag ) {
    double _Complex **ss = (double _Complex **)s;
    if(f_check(lx,ly)) FFT_TwoDimensionalC99( ss, lx, flag );
    else               dft_TwoDimensional( s, lx, ly, flag );
}


int FourierTransform( PIXEL **s, PIXEL **d, COMPLEX **c, int lx, int ly ){
    int x, y;
    // 画像データを濃淡画像に変換して、複素数データを作成
    for( y=0; y<ly; y++ )for( x=0; x<lx; x++ ){// For all pixels (idiomatic phrase)
        int gray = (306*s[y][x].R + 601*s[y][x].G + 117*s[y][x].B)>>10;
        c[y][x]  = (COMPLEX){gray,0};
    }
   
    //// フーリエ 変換  順変換( DFT or FFT )
    ft_TwoDimensional( c, lx, ly, DFT );
    //// パワースペクトラム画像作成) 中心が低周波、四隅が高周波成分
    interchange2D( c, lx, ly );  // 低周波と高周波成分の場所の入れ替え
    PowerSpectrum( "PowerSpectrum.bmp", c, d, lx, ly );
    interchange2D( c, lx, ly );  // 低周波と高周波成分の場所の入れ替え(戻す)
    //// フーリエ 変換  逆変換( IDFT or IFFT )
    ft_TwoDimensional( c, lx, ly, IDFT );

    // 複素数データのリアルパートを 画像データ(濃淡画像)として抽出
    for( y=0; y<ly; y++ )for( x=0; x<lx; x++ ){// For all pixels (idiomatic phrase)
        int real = c[y][x].r,  gray = real<0 ? 0 : ( 255<real ? 255 : real );
        d[y][x]  = (PIXEL){ gray, gray, gray, 0 };
    }
    return 0;
}

////////////////////////////////////////////////////////////////////////////////
void argumentAnalysis(int argc, char *argv[], char **iname, char **oname){
  int  i, st=1;
    for( i=1; i<argc; i++ ) {
        if( argv[i][0]=='-' ) {
            switch(argv[i][1]) {
             /* case 'v': verbose=1; break;  // Normal definition */
                case 'h': st=0;      // Does not break INTENTIONALLY
                default : goto GOOD_BY;
            }
        }
        else if( *iname==NULL ) *iname = argv[i];
        else if( *oname==NULL ) *oname = argv[i];
        else goto GOOD_BY;
    }
    return;
  GOOD_BY:;
    char *p=argv[0], *op="[-h] ";
    for( i=strlen(p); i>0 && p[i]!='\\' && p[i]!='/'; i-- );; if(i) i++;
    fprintf( stderr, "\nusage: %s %s[input_image [output.bmp]]\n\n", &p[i], op );
    fprintf( stderr, "\t-h: This option shows only this message.\n" );
    exit(st);
}

void imageFittingOperation( PIXEL **s, int lx, int ly ) {
  int   x, y, w, ey=(ly +1)/2, ty=ly-1;
    for( y=0; y<ey; y++ ) for( w=ty-y, x=0; x<lx; x++ ) {
        PIXEL p = (PIXEL){ s[y][x].R, s[y][x].G, s[y][x].B, 0 };
        s[y][x] = (PIXEL){ s[w][x].R, s[w][x].G, s[w][x].B, 0 };
        s[w][x] = p;
    }// 色要素順の変更( B ⇔ R )及び 左上原点系 から 左下原点系への変換
}

int main(int argc, char *argv[]) {
  int     lx, ly, bpp, st=1;
  char    *raw=NULL, *ifname=NULL, *ofname=NULL;
  PIXEL   **s=NULL, **d=NULL ;
  COMPLEX **c=NULL;;

    argumentAnalysis( argc, argv, &ifname, &ofname );
    if( ifname==NULL ) ifname = IFNAMEDEFAULT;
    if( ofname==NULL ) ofname = OFNAMEDEFAULT;
   
    if( (raw=(char*)stbi_load( ifname, &lx, &ly, &bpp, 4 ))==NULL ) {
        fprintf(stderr,"Couldn't load the image '%s'.\n",ifname);  goto GOOD_BY;
    } else if( (s=(PIXEL**)alloc2D( raw, sizeof(PIXEL), lx, ly ))==NULL ) {
        fprintf(stderr, "Couldn't allocate memory.\n");            goto GOOD_BY;
    } else imageFittingOperation( s, lx, ly );
   
    if( (d=(PIXEL**)alloc2Darray( sizeof(PIXEL), lx, ly ))==NULL ) {
        fprintf(stderr, "Couldn't allocate memory.\n");            goto GOOD_BY;
    } else if( (c=(COMPLEX**)alloc2Darray( sizeof(COMPLEX), lx, ly ))==NULL ) {
        fprintf(stderr, "Couldn't allocate memory.\n");            goto GOOD_BY;
    } else if( (st=FourierTransform( s ,d, c, lx, ly ))==0 ) {
        saveArray2bmp32( ofname, d, lx, ly );
    }
  GOOD_BY:;
    free(c);
    free(d);
    free(s);
    stbi_image_free(raw);
    return st;
}