///////  CZT and ICZT  Chirp Z-transform Full Source Code For Image Data  //////
///
///  gcc -fopenmp -O3 -Wall -W -Wextra -o CZT_for_image CZT_for_image.c -lm
//
//
////////////////////////////////////////////////////////////////////////////////
#define STB_IMAGE_IMPLEMENTATION

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

//////// https://github.com/nothings/stb/blob/master/stb_image.h
#include "stb_image.h"

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <math.h>

#include <complex.h>

#undef  PI
#define PI  3.1415926535897932384626433832795L  // 円周率(π)

#define Rotor(a)      (double complex)( cos(a) + I*sin(a) )
#define COMPLEX(a,b)  (double complex)( (a) + I*(b) )

enum { DFT, IDFT };
enum { FFT=DFT, IFFT=IDFT };
enum { CZT=DFT, ICZT=IDFT };


#pragma pack(push,1)
typedef struct PIXEL   { unsigned char B, G, R, A; } PIXEL ;
#pragma pack(pop)

typedef struct COMPLEX { double r, i; } COMPLEX;

#pragma pack(push,2)
typedef struct HEADER {       // BMP => bfType==0x4D42 (0x42,0x4D) little endian
     unsigned short  bfType;        //  identification  0x42='B', 0x4D='M'
    unsigned int   bfSize;          // ??
    unsigned short bfReserved1;     //  0
    unsigned short bfReserved2;     //  0
    unsigned int   bfOffBits;       // 54  sizeof(HEADER)
    ////////////////////////////////////////////////////////////////////////////
     unsigned int    biSize;        // 40  sizeof(biSize ~ biClrImportant)
    signed int     biWidth;         // lx
    signed int     biHeight;        // ly
    unsigned short biPlanes;        //  1
    unsigned short biBitCount;      // 32
    unsigned int   biCompression;   //  0
    unsigned int   biSizeImage;     // ??
    signed int     biXPelsPerMeter; //  0
    signed int     biYPelsPerMeter; //  0
    unsigned int   biClrUsed;       //  0
    unsigned int   biClrImportant;  //  0
    ////////////////////////////////////////////////////////////////////////////
    unsigned char  lut[];
} HEADER ;
#pragma pack(pop)

#define BMPID              0x4D42
#define BMPIFHSIZE         40

#define saveArray2bmp32(name,a,w,h)   \
                            saveDataAsBMP32((name),(&(a)[0][0]),(w),(h))
                            

int dft=0, verbose=0;


void **alloc2D( void *data, int s, int lx, int ly ){
  long long i, w=lx, h=ly;  if( !data || s<1 || lx<1 || ly<1 ) return NULL;
  void **d, *base=data ;

    if( (d=(void **)malloc(h*sizeof(void *))) == NULL ) return NULL ;
    for( i=0; i<ly; i++ ) d[i] = (void *)((unsigned char *)base + w*s*i) ;
    return d ;
}

void **alloc2Darray( int s, int lx, int ly ){
  long long i, w=lx, h=ly;  if(s<1 || lx<1 || ly<1) return NULL ;
  void **d, *base ;

    if( (d=(void **)calloc(1,h*sizeof(void *)+(w*h*s))) == NULL ) return NULL ;
    base = (unsigned char *)d + h*sizeof(void *) ;
    for( i=0; i<ly; i++ ) d[i] = (void *)((unsigned char *)base + w*s*i) ;
    return d ;
}

int saveDataAsBMP32( char *fname, void *b, int lx, int ly ){
  HEADER h = {BMPID, lx*abs(ly)*sizeof(PIXEL)+sizeof(HEADER), 0, 0, sizeof(HEADER),
               BMPIFHSIZE, lx, ly, 1, 32, 0, lx*abs(ly)*sizeof(PIXEL), 0, 0, 0, 0} ;
  FILE *fp=NULL ;
  int  st=-1;

    if( (fp=fopen(fname, "wb")) == NULL ) {
        fprintf( stderr, "File '%s': cannot create.\n", fname );
    } else if ( fwrite( &h, sizeof(HEADER), 1, fp ) != 1 ) {
        fprintf( stderr, "File '%s': Header write error.\n", fname );
    } else if ( fwrite( b, h.biSizeImage, 1, fp ) != 1 ) {
        fprintf( stderr, "File '%s': Data write error.\n", fname );
    } else st=0;
    if(fp)fclose(fp) ;
    return st;
}

////////////////////////////////////////////////////////////////////////////////
////
//// 二次元の離散フーリエ変換
//// Two-dimensional discrete Fourier transform (DFT)
////
#include <omp.h>       // gcc -fopenmp -O3  ( -O3: Strongly recommendation )

#define AppendCOMPLEX(a,b) { (a).r += (b).r, (a).i += (b).i ; }

#define ConjCOMPLEX(a)     (COMPLEX){ (a).r, -((a).i) }
#define RotorCOMPLEX(a)    (COMPLEX){ cos(a), sin(a) }
#define AddCOMPLEX(a,b)    (COMPLEX){ (a).r + (b).r, (a).i + (b).i }
#define SubCOMPLEX(a,b)    (COMPLEX){ (a).r - (b).r, (a).i - (b).i }
#define MulCOMPLEX(a,b)    (COMPLEX){ (a).r*(b).r - (a).i*(b).i, (a).r*(b).i + (a).i*(b).r }
#define ImeMulCOMPLEX(a,b) (COMPLEX){ (a).r * (b), (a).i * (b) }

extern int verbose;

void dft_TwoDimensional( COMPLEX **d, int lx, int ly, int flag ) {
  if(verbose)printf("execution: %s\t%d\n", __func__, flag );
  int     x, y, elements;
  double  unit;
  COMPLEX rotor[(lx>ly?lx:ly)];

    elements = lx;  unit = (flag==IDFT) ? 2*M_PI/elements : -2*M_PI/elements;
    for( int e=0; e<elements; rotor[e]=RotorCOMPLEX(unit*e), e++ );
    #pragma omp parallel for
    for( y=0; y<ly; y++ ) { COMPLEX tmp[elements];//You need to be aware of the thread stack overflow.
        for(int u=0; u<elements; u++ ){ int v; COMPLEX t = (COMPLEX){0,0};
             for( v=0; v<elements; v++ ) AppendCOMPLEX( t,  MulCOMPLEX(d[y][v],rotor[(u*v)%elements]) );
            tmp[u] = t;
         }   double reciprocal = 1./elements; // tmp[u]/elements ->  tmp[u]*reciprocal
         if(flag==IDFT) for(int e=0; e<elements; e++ ) d[y][e] = ImeMulCOMPLEX(tmp[e],reciprocal);
         else           for(int e=0; e<elements; e++ ) d[y][e] = tmp[e];
    } //////////////////////////////////////////////////////////////////////////
    elements = ly;  unit = (flag==IDFT) ? 2*M_PI/elements : -2*M_PI/elements;
    if(lx!=ly){ for( int e=0; e<elements; rotor[e]=RotorCOMPLEX(unit*e), e++ ); }
    #pragma omp parallel for
    for( x=0; x<lx; x++ ) { COMPLEX tmp[elements];//You need to be aware of the thread stack overflow.
        for(int u=0; u<elements; u++ ){ int v; COMPLEX t = (COMPLEX){0,0};
             for( v=0; v<elements; v++ ) AppendCOMPLEX( t,  MulCOMPLEX(d[v][x],rotor[(u*v)%elements]) );
            tmp[u] = t;
         }   double reciprocal = 1./elements; // tmp[u]/elements ->  tmp[u]*reciprocal
         if(flag==IDFT) for(int e=0; e<elements; e++ ) d[e][x] = ImeMulCOMPLEX(tmp[e],reciprocal);
         else           for(int e=0; e<elements; e++ ) d[e][x] = tmp[e];
    } //////////////////////////////////////////////////////////////////////////
}

////////////////////////////////////////////////////////////////////////////////
////
//// CZT Chirp Z-transform, Bluestein's algorithm
////
//// Structure to store data for CZT Chirp Z-transform
////  samples    : number of actual data
////  samples_out: Actual data count (for output, same as input one)
////  samples_fft: Number of data used in CZT conversion (power of 2)
////  flag_fft   : flag indicating if "samples" is a power of 2
////  w          : The number of weight data elements is (samples)
////  v          : The number of  impulse response data elements is (samples_fft)
////  
typedef struct CZT_CONTROL {
    int  samples;      // 標本点の数
    int  samples_out;  // 出力する標本点の数
    int  samples_fft;  // 2の整数乗値:flag_fft?samples:2*get_czt_size(samples)
    int  flag_fft;     // 直接FFTを実行できるサイズかの判定
    double complex *w; // 重みデータ            要素数は(samples)
    double complex *v; // インパルス応答データ  要素数は(samples_fft)
} CZT_CONTROL;

////////////////////////////////////////////////////////////////////////////////
//// checks if the argument is a power of 2 ( Returns 1 if it is a power of 2 )
////
int ft_size_check( int n ){ if( n<4 ) return 0;
    for(int e=n;e>1;e>>=1 ) if( e&1 ) return 0;
    return 1;
}

////////////////////////////////////////////////////////////////////////////////
//// A function that returns the smallest power of 2 that is greater than or 
//// equal to twice its argument
////
int get_czt_size( int n ){
    if( ft_size_check(n) ) return n;
    int a=1, b= n<<1;
    for( a=1; a<b; a<<=1 );
    return a;
}

////////////////////////////////////////////////////////////////////////////////
//// FFT routine for simple one-dimensional data
////     double complex *c: Pointer to complex array for input/output
////     elements  : number of data in complex array
////     flag      : Flag indicating forward transform or inverse transform
////                 { FFT | IFFT }
////
void fft( double complex *c, int elements, int flag ){
    if( !ft_size_check(elements) ) return; //2の冪(べき)のみ、power of two, ONLY
    double complex *rotor = (double complex *)calloc( elements, sizeof(double complex) );
    if(!rotor) return; // Processing is aborted because the twiddle factor table
                       // used for calculation could not be secured.
    double  unit = (flag==IFFT) ? 2*M_PI/elements : -2*M_PI/elements;;
    for(int e=0; e<elements; rotor[e]=Rotor(unit*e), e++ );
    
    for(int i=0, j=1; j<elements-1; j++ ) {// scrambler, for initial placement
        for(int k=elements>>1; k>(i^=k); k>>=1 );
        if( j<i ){double complex temp=c[j]; c[j]=c[i], c[i]=temp; }  // exchange
    }
    for(int m, mh=1; (m=mh<<1)<=elements; mh=m ) {// Operations
         for(int ir=0, i=0; i<elements; i+=m ) {  // ir=0:  Rotation angle is 0rad.
             register double complex temp, W=rotor[ir];  // W: Rotation factor
            for(int k=elements>>2; k>(ir^=k); k>>=1 );
            for(int j=i; j<mh+i; j++ ) { int a = j,  b = j + mh;
                 temp = c[a] - c[b];  // For butterfly operation
                 c[a] = c[a] + c[b];  // a ->- (a + b) ->- a
                 c[b] = temp * W;     // b ->- (a-b)*W ->- b
    }   }   }   double reciprocal = 1./elements;
    if(flag==IFFT)for(int i=0; i<elements; c[i++]*=reciprocal );
    free(rotor);
}

////////////////////////////////////////////////////////////////////////////////
//// A function that generates a structure for Chirp Z-transform for a given 
//// integer value and returns its pointer.
//// You MUST call "free()" with this pointer when it is no longer needed.
////
CZT_CONTROL *GenerateCZT_CONTROL( int n ){
    int samples     = n;
    int samples_out = n;
    int samples_fft = ft_size_check(n) ? n : get_czt_size(n);
    int flag_fft    = ft_size_check(n);
    CZT_CONTROL  *a = (CZT_CONTROL*)malloc( sizeof(CZT_CONTROL) + sizeof(double complex) *
                                           (samples + (samples_fft +1)) );  // +1 for dummy
    if( !a ) return NULL;
    
    *a = (CZT_CONTROL){
             .samples     = samples,         // 標本点の数
             .samples_out = samples_out,     // 出力する標本点の数
             .samples_fft = samples_fft,     // 2の冪:2の整数乗値
             .flag_fft    = flag_fft,        // samples  が直接FFTを実行できるサイズかの判定
             .w           =  (double complex *)&a[1],       // 重みデータ           要素数は(samples)
             .v           =  &((double complex *)&a[1])[n], // インパルス応答データ 要素数は(samples_fft)  +dummy
           };
    double rad = PI/n;   double complex *w=a->w, *v=a->v;   // as alias
    for(int i=n;i<samples_fft;i++) v[i]=COMPLEX(0,0);
    for(int tail=samples_fft, n2=n<<1, j=0, i=0;i<n;i++, j=(j+(2*i-1))%n2 ){
        double c=cos(rad*j), s=sin(rad*j);
        w[i]      = COMPLEX( c, -s ); // complex conjugate: 0<= { w[i] <-> v[i] } <n
        v[i]      = COMPLEX( c,  s );
        v[tail-i] = COMPLEX( c,  s ); // v[tail] was keeded as dummy area
    }   fft( v, samples_fft, FFT );
    return a;
}

////////////////////////////////////////////////////////////////////////////////
//// Chirp Z-transform routine for fast Fourier transform of arbitrary size data
//// Argument structure is the same as the FFT routine
////
void czt( double complex *c, int n, int flag ){//For One Dimension data processing
    if( ft_size_check(n) ) return fft( c, n, flag );
    CZT_CONTROL    *czt = NULL;
    double complex *tmp = NULL;   // Temporary work area used by CZT

    if( !(czt = GenerateCZT_CONTROL( n ) )  )                        goto GOOD_BY;
    int    nx = czt->samples_fft, dc_size = sizeof(double complex);
    double complex *w = czt->w;
    double complex *v = czt->v;
    
    if( !(tmp = (double complex*)calloc( nx, dc_size ))  )           goto GOOD_BY;

    // Division is slow, so we replace it with multiplication using the reciprocal.
    double   m = 1.0/n;  // a*m, same as a/n. if m is m = 1.0/n;

    //for(int i=n;i<nx;tmp[i++]=0+I*0);    // from n to nx are already zero.
    if( flag==ICZT ) { // 逆変換
      // ICZT part: Inverse transform processing part, inverse transform can be 
      // easily realized by using complex conjugate numbers.
      for(int i=0;i< n;i++) tmp[i] =   c[i] * conj(w[i]);
      fft( tmp, nx,  FFT );
      for(int i=0;i<nx;i++) tmp[i] = tmp[i] * conj(v[i]);
      fft( tmp, nx, IFFT );
      for(int i=0;i< n;i++)   c[i] = tmp[i] * conj(w[i]) * m;
      // Note: For the inverse transform, divide the result after the transform 
      // process by the number of elements.
    } else {// flag==CZT  順変換
      // CZT part: Forward transform processing unit
      for(int i=0;i< n;i++) tmp[i] =   c[i] * w[i]; // weight data multiplication
       fft( tmp, nx,  FFT  );                          // Fourier transform
      for(int i=0;i<nx;i++) tmp[i] = tmp[i] * v[i]; // convolution
       fft( tmp, nx, IFFT  );                          // inverse Fourier  transform                                 
      for(int i=0;i< n;i++)   c[i] = tmp[i] * w[i]; // weight data multiplication
    }

  GOOD_BY:;
    free( tmp );
    free( czt );
}

////////////////////////////////////////////////////////////////////////////////
//// Simple Chirp Z-transform routine
////
void czt_with( CZT_CONTROL *czt, double complex *c, int n, int nx, int flag ){
    double complex *w = czt->w,  *v = czt->v,   m = 1.0/n;

    for(int i=n;i<nx;c[i++]=0+I*0);

    if( flag == ICZT ) {
      for(int i=0;i< n;i++) c[i] *= conj(w[i]);
      fft( c, nx,  FFT );        
      for(int i=0;i<nx;i++) c[i] *= conj(v[i]);
      fft( c, nx, IFFT );        
      for(int i=0;i< n;i++) c[i] *= conj(w[i]) * m;
    } else {//  CZT  ///////////////////////////
      for(int i=0;i< n;i++) c[i] *= w[i];
      fft( c, nx,  FFT );        
      for(int i=0;i<nx;i++) c[i] *= v[i];
      fft( c, nx, IFFT );        
      for(int i=0;i< n;i++) c[i] *= w[i];
    }
}

////////////////////////////////////////////////////////////////////////////////
////
//// czt_TwoDimensional
//// 
void czt_TwoDimensional( double complex **c, int lx, int ly, int flag ){
    if(verbose)printf("execution: %s\t%d\n", __func__, flag );
    CZT_CONTROL    *czt_lx = NULL, *czt_ly = NULL;
    double complex **tx = NULL,    **ty = NULL;
    
    if( !(czt_lx = GenerateCZT_CONTROL( lx ))  )                   goto GOOD_BY;
    if( !(czt_ly = GenerateCZT_CONTROL( ly ))  )                   goto GOOD_BY;
    
    int fft_lx=czt_lx->samples_fft,   fft_ly=czt_ly->samples_fft;
    #define  DCPX  double complex
    if( !(tx=(DCPX**)alloc2Darray( sizeof(DCPX), fft_lx, ly )) )   goto GOOD_BY;
    if( !(ty=(DCPX**)alloc2Darray( sizeof(DCPX), fft_ly, lx )) )   goto GOOD_BY;
    ////////////////////////////////////////////////////////////////////////////
    for(int y=0;y<ly;y++)for(int x=0;x<lx;x++)tx[y][x]=c[y][x];
    
    #pragma omp parallel for
    for(int y=0;y<ly;y++)
        czt_with( czt_lx, tx[y], czt_lx->samples, czt_lx->samples_fft, flag );
    
    for(int y=0;y<ly;y++)for(int x=0;x<lx;x++)ty[x][y]=tx[y][x];
    
    #pragma omp parallel for
    for(int x=0;x<lx;x++)
        czt_with( czt_ly, ty[x], czt_ly->samples, czt_ly->samples_fft, flag );

    for(int y=0;y<ly;y++)for(int x=0;x<lx;x++)c[y][x]=ty[x][y];
    ////////////////////////////////////////////////////////////////////////////
  GOOD_BY:;
    free(ty);
    free(tx);
    free(czt_ly);
    free(czt_lx);
}

int TwoDimensionalFourierTransform( PIXEL **s ,PIXEL **d, int lx, int ly ){
    double complex **c1=NULL, **c2=NULL,**c3=NULL;
    
    if( !(c1 = (double complex **)alloc2Darray( sizeof(double complex), lx, ly )) ) goto GOOD_BY;
    if( !(c2 = (double complex **)alloc2Darray( sizeof(double complex), lx, ly )) ) goto GOOD_BY;
    if( !(c3 = (double complex **)alloc2Darray( sizeof(double complex), lx, ly )) ) goto GOOD_BY;
    
    // 画像データを濃淡画像に変換して、複素数データを作成
    for(int y=0; y<ly; y++ )for(int x=0; x<lx; x++ ){
        int gray = (306*s[y][x].R + 601*s[y][x].G + 117*s[y][x].B)>>10;
        c1[y][x] = c2[y][x] = COMPLEX(gray,0);
    }///////////////////////////////////////////////////////////////////////////

    czt_TwoDimensional( c2, lx, ly,  CZT);                       // 順変換:  CZT

    for(int y=0;y<ly;y++)for(int x=0;x<lx;x++) c3[y][x]=c2[y][x];

    if( dft ) dft_TwoDimensional( (COMPLEX**)c3, lx, ly, IDFT ); // 逆変換: IDFT
    else      czt_TwoDimensional( c3, lx, ly, ICZT  );           // 逆変換: ICZT
    
    ////////////////////////////////////////////////////////////////////////////
    // 複素数データのリアルパートを 画像データ(濃淡画像)として抽出
    for(int y=0; y<ly; y++ )for(int x=0; x<lx; x++ ){
        int real = creal(c3[y][x])+0.0001, gray = real<0 ? 0 : ( 255<real  ? 255 : real );
        d[y][x]  = (PIXEL){ gray, gray, gray, 0 };
    }

  GOOD_BY:;
    free(c3);
    free(c2);
    free(c1);
    
    return 0;
}

////////////////////////////////////////////////////////////////////////////////
#include <string.h>
void argumentAnalysis(int argc, char *argv[], char **iname, char **oname){
  int  i, st=1;
    for( i=1; i<argc; i++ ) {
        if( argv[i][0]=='-' ) {
            switch(argv[i][1]) {
                case 'v': verbose=1;    break;
                case 'd': dft=1;        break;
                case 'h': st=0;
                default : goto GOOD_BY;
            }
        }
        else if( *iname==NULL ) *iname = argv[i];
        else if( *oname==NULL ) *oname = argv[i];
        else goto GOOD_BY;
    }
    return;
  GOOD_BY:;
    char *p=argv[0], *op="[-d] [-h] ";
    for( i=strlen(p); i>0 && p[i]!='\\' && p[i]!='/'; i-- );; if(i) i++;
    fprintf( stderr, "\nusage: %s %s[input_image [output.bmp]]\n\n", &p[i], op );
    fprintf( stderr, "\t-v: verbose mode\n" );
    fprintf( stderr, "\t-d: Inverse transform with standard DFT.\n" );
    fprintf( stderr, "\t-h: This option shows only this message.\n" );
    exit(st);
}

void imageFittingOperation( PIXEL **s, int lx, int ly ) {
  int   x, y, w, ey=(ly +1)/2, ty=ly-1;
    for( y=0; y<ey; y++ ) for( w=ty-y, x=0; x<lx; x++ ) {
        PIXEL p = (PIXEL){ s[y][x].R, s[y][x].G, s[y][x].B, 0 };
        s[y][x] = (PIXEL){ s[w][x].R, s[w][x].G, s[w][x].B, 0 };
        s[w][x] = p;
    }// 色要素順の変更( B ⇔ R )及び 左上原点系 から 左下原点系への変換
}

int main(int argc, char *argv[]){
  int     lx, ly, bpp, st=1;
  char    *raw=NULL, *ifname=NULL, *ofname=NULL;
  PIXEL   **s=NULL, **d=NULL ;
  
    argumentAnalysis( argc, argv, &ifname, &ofname );
    if( ifname==NULL ) ifname = "sample.bmp";
    if( ofname==NULL ) ofname = "result.bmp";
    
    if( (raw=(char*)stbi_load( ifname, &lx, &ly, &bpp, 4 ))==NULL ) {
        fprintf(stderr,"Couldn't load the image '%s'.\n",ifname);  goto GOOD_BY;
    } else if( (s=(PIXEL**)alloc2D( raw, sizeof(PIXEL), lx, ly ))==NULL ) {
         fprintf(stderr, "Couldn't allocate  memory.\n");             goto GOOD_BY;
    } else imageFittingOperation( s, lx, ly );
   
    if( (d=(PIXEL**)alloc2Darray( sizeof(PIXEL), lx, ly ))==NULL ) {
         fprintf(stderr, "Couldn't allocate  memory.\n");             goto GOOD_BY;
    } else if( (st=TwoDimensionalFourierTransform( s ,d, lx, ly ))==0 ) {
        saveArray2bmp32( ofname, d, lx, ly );
    }
  GOOD_BY:;
    free(d);
    free(s);
    stbi_image_free(raw);
    return st;
}