File indexing completed on 2024-12-22 04:17:57

0001 /*
0002     Copyright(C) 1996-2001 Takuya OOURA
0003     email: ooura@mmm.t.u-tokyo.ac.jp
0004     download: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html
0005     You may use, copy, modify this code for any purpose and 
0006     without fee.
0007 
0008 */
0009 
0010 /*
0011 
0012 Fast Fourier/Cosine/Sine Transform
0013     dimension   :one
0014     data length :power of 2
0015     decimation  :frequency
0016     radix       :split-radix
0017     data        :inplace
0018     table       :not use
0019 functions
0020     cdft: Complex Discrete Fourier Transform
0021     rdft: Real Discrete Fourier Transform
0022     ddct: Discrete Cosine Transform
0023     ddst: Discrete Sine Transform
0024     dfct: Cosine Transform of RDFT (Real Symmetric DFT)
0025     dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
0026 function prototypes
0027     void cdft(int, int, double *);
0028     void rdft(int, int, double *);
0029     void ddct(int, int, double *);
0030     void ddst(int, int, double *);
0031     void dfct(int, double *);
0032     void dfst(int, double *);
0033 macro definitions
0034     USE_CDFT_PTHREADS : default=not defined
0035         CDFT_THREADS_BEGIN_N  : must be >= 512, default=8192
0036         CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
0037     USE_CDFT_WINTHREADS : default=not defined
0038         CDFT_THREADS_BEGIN_N  : must be >= 512, default=32768
0039         CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
0040 
0041 
0042 -------- Complex DFT (Discrete Fourier Transform) --------
0043     [definition]
0044         <case1>
0045             X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
0046         <case2>
0047             X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
0048         (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
0049     [usage]
0050         <case1>
0051             cdft(2*n, 1, a);
0052         <case2>
0053             cdft(2*n, -1, a);
0054     [parameters]
0055         2*n            :data length (int)
0056                         n >= 1, n = power of 2
0057         a[0...2*n-1]   :input/output data (double *)
0058                         input data
0059                             a[2*j] = Re(x[j]), 
0060                             a[2*j+1] = Im(x[j]), 0<=j<n
0061                         output data
0062                             a[2*k] = Re(X[k]), 
0063                             a[2*k+1] = Im(X[k]), 0<=k<n
0064     [remark]
0065         Inverse of 
0066             cdft(2*n, -1, a);
0067         is 
0068             cdft(2*n, 1, a);
0069             for (j = 0; j <= 2 * n - 1; j++) {
0070                 a[j] *= 1.0 / n;
0071             }
0072         .
0073 
0074 
0075 -------- Real DFT / Inverse of Real DFT --------
0076     [definition]
0077         <case1> RDFT
0078             R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
0079             I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
0080         <case2> IRDFT (excluding scale)
0081             a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + 
0082                    sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + 
0083                    sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
0084     [usage]
0085         <case1>
0086             rdft(n, 1, a);
0087         <case2>
0088             rdft(n, -1, a);
0089     [parameters]
0090         n              :data length (int)
0091                         n >= 2, n = power of 2
0092         a[0...n-1]     :input/output data (double *)
0093                         <case1>
0094                             output data
0095                                 a[2*k] = R[k], 0<=k<n/2
0096                                 a[2*k+1] = I[k], 0<k<n/2
0097                                 a[1] = R[n/2]
0098                         <case2>
0099                             input data
0100                                 a[2*j] = R[j], 0<=j<n/2
0101                                 a[2*j+1] = I[j], 0<j<n/2
0102                                 a[1] = R[n/2]
0103     [remark]
0104         Inverse of 
0105             rdft(n, 1, a);
0106         is 
0107             rdft(n, -1, a);
0108             for (j = 0; j <= n - 1; j++) {
0109                 a[j] *= 2.0 / n;
0110             }
0111         .
0112 
0113 
0114 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
0115     [definition]
0116         <case1> IDCT (excluding scale)
0117             C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
0118         <case2> DCT
0119             C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
0120     [usage]
0121         <case1>
0122             ddct(n, 1, a);
0123         <case2>
0124             ddct(n, -1, a);
0125     [parameters]
0126         n              :data length (int)
0127                         n >= 2, n = power of 2
0128         a[0...n-1]     :input/output data (double *)
0129                         output data
0130                             a[k] = C[k], 0<=k<n
0131     [remark]
0132         Inverse of 
0133             ddct(n, -1, a);
0134         is 
0135             a[0] *= 0.5;
0136             ddct(n, 1, a);
0137             for (j = 0; j <= n - 1; j++) {
0138                 a[j] *= 2.0 / n;
0139             }
0140         .
0141 
0142 
0143 -------- DST (Discrete Sine Transform) / Inverse of DST --------
0144     [definition]
0145         <case1> IDST (excluding scale)
0146             S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
0147         <case2> DST
0148             S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
0149     [usage]
0150         <case1>
0151             ddst(n, 1, a);
0152         <case2>
0153             ddst(n, -1, a);
0154     [parameters]
0155         n              :data length (int)
0156                         n >= 2, n = power of 2
0157         a[0...n-1]     :input/output data (double *)
0158                         <case1>
0159                             input data
0160                                 a[j] = A[j], 0<j<n
0161                                 a[0] = A[n]
0162                             output data
0163                                 a[k] = S[k], 0<=k<n
0164                         <case2>
0165                             output data
0166                                 a[k] = S[k], 0<k<n
0167                                 a[0] = S[n]
0168     [remark]
0169         Inverse of 
0170             ddst(n, -1, a);
0171         is 
0172             a[0] *= 0.5;
0173             ddst(n, 1, a);
0174             for (j = 0; j <= n - 1; j++) {
0175                 a[j] *= 2.0 / n;
0176             }
0177         .
0178 
0179 
0180 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
0181     [definition]
0182         C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
0183     [usage]
0184         dfct(n, a);
0185     [parameters]
0186         n              :data length - 1 (int)
0187                         n >= 2, n = power of 2
0188         a[0...n]       :input/output data (double *)
0189                         output data
0190                             a[k] = C[k], 0<=k<=n
0191     [remark]
0192         Inverse of 
0193             a[0] *= 0.5;
0194             a[n] *= 0.5;
0195             dfct(n, a);
0196         is 
0197             a[0] *= 0.5;
0198             a[n] *= 0.5;
0199             dfct(n, a);
0200             for (j = 0; j <= n; j++) {
0201                 a[j] *= 2.0 / n;
0202             }
0203         .
0204 
0205 
0206 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
0207     [definition]
0208         S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
0209     [usage]
0210         dfst(n, a);
0211     [parameters]
0212         n              :data length + 1 (int)
0213                         n >= 2, n = power of 2
0214         a[0...n-1]     :input/output data (double *)
0215                         output data
0216                             a[k] = S[k], 0<k<n
0217                         (a[0] is used for work area)
0218     [remark]
0219         Inverse of 
0220             dfst(n, a);
0221         is 
0222             dfst(n, a);
0223             for (j = 1; j <= n - 1; j++) {
0224                 a[j] *= 2.0 / n;
0225             }
0226         .
0227 */
0228 
0229 void cdft(int n, int isgn, double *a);
0230 void rdft(int n, int isgn, double *a);
0231 void dfct(int n, double *a);
0232 void dfst(int n, double *a);
0233 
0234 static void cftfsub(int n, double *a);
0235 static void cftbsub(int n, double *a);
0236 static void rftfsub(int n, double *a);
0237 static void rftbsub(int n, double *a);
0238 static void dctsub(int n, double *a);
0239 static void dctsub4(int n, double *a);
0240 static void ddct(int n, int isgn, double *a);
0241 static void bitrv1(int n, double *a);
0242 static void dstsub(int n, double *a);
0243 static void dstsub4(int n, double *a);
0244 static void bitrv2(int n, double *a);
0245 static void bitrv216(double *a);
0246 static void bitrv208(double *a);
0247 static void cftmdl1(int n, double *a);
0248 static void cftrec4(int n, double *a);
0249 static void cftleaf(int n, int isplt, double *a);
0250 static void cftfx41(int n, double *a);
0251 static void cftf161(double *a);
0252 static void cftf081(double *a);
0253 static void cftf040(double *a);
0254 static void cftx020(double *a);
0255 #ifdef USE_CDFT_THREADS
0256 static void cftrec4_th(int n, double *a);
0257 #endif /* USE_CDFT_THREADS */
0258 static void bitrv2conj(int n, double *a);
0259 static void bitrv216neg(double *a);
0260 static void bitrv208neg(double *a);
0261 static void cftb1st(int n, double *a);
0262 static void cftrec4(int n, double *a);
0263 static void cftleaf(int n, int isplt, double *a);
0264 static void cftfx41(int n, double *a);
0265 static void cftf161(double *a);
0266 static void cftf081(double *a);
0267 static void cftb040(double *a);
0268 static void cftx020(double *a);
0269 static int cfttree(int n, int j, int k, double *a);
0270 static void cftleaf(int n, int isplt, double *a);
0271 static void cftmdl1(int n, double *a);
0272 static void cftmdl2(int n, double *a);
0273 static void cftf162(double *a);
0274 static void cftf081(double *a);
0275 static void cftf082(double *a);
0276 
0277 
0278 void cdft(int n, int isgn, double *a)
0279 {
0280     if (isgn >= 0) {
0281         cftfsub(n, a);
0282     } else {
0283         cftbsub(n, a);
0284     }
0285 }
0286 
0287 
0288 void rdft(int n, int isgn, double *a)
0289 {
0290     double xi;
0291 
0292     if (isgn >= 0) {
0293         if (n > 4) {
0294             cftfsub(n, a);
0295             rftfsub(n, a);
0296         } else if (n == 4) {
0297             cftfsub(n, a);
0298         }
0299         xi = a[0] - a[1];
0300         a[0] += a[1];
0301         a[1] = xi;
0302     } else {
0303         a[1] = 0.5 * (a[0] - a[1]);
0304         a[0] -= a[1];
0305         if (n > 4) {
0306             rftbsub(n, a);
0307             cftbsub(n, a);
0308         } else if (n == 4) {
0309             cftbsub(n, a);
0310         }
0311     }
0312 }
0313 
0314 
0315 static void ddct(int n, int isgn, double *a)
0316 {
0317     int j;
0318     double xr;
0319 
0320     if (isgn < 0) {
0321         xr = a[n - 1];
0322         for (j = n - 2; j >= 2; j -= 2) {
0323             a[j + 1] = a[j] - a[j - 1];
0324             a[j] += a[j - 1];
0325         }
0326         a[1] = a[0] - xr;
0327         a[0] += xr;
0328         if (n > 4) {
0329             rftbsub(n, a);
0330             cftbsub(n, a);
0331         } else if (n == 4) {
0332             cftbsub(n, a);
0333         }
0334     }
0335     if (n > 4) {
0336         dctsub(n, a);
0337     } else {
0338         dctsub4(n, a);
0339     }
0340     if (isgn >= 0) {
0341         if (n > 4) {
0342             cftfsub(n, a);
0343             rftfsub(n, a);
0344         } else if (n == 4) {
0345             cftfsub(n, a);
0346         }
0347         xr = a[0] - a[1];
0348         a[0] += a[1];
0349         for (j = 2; j < n; j += 2) {
0350             a[j - 1] = a[j] - a[j + 1];
0351             a[j] += a[j + 1];
0352         }
0353         a[n - 1] = xr;
0354     }
0355 }
0356 
0357 
0358 static void ddst(int n, int isgn, double *a)
0359 {
0360     int j;
0361     double xr;
0362 
0363     if (isgn < 0) {
0364         xr = a[n - 1];
0365         for (j = n - 2; j >= 2; j -= 2) {
0366             a[j + 1] = -a[j] - a[j - 1];
0367             a[j] -= a[j - 1];
0368         }
0369         a[1] = a[0] + xr;
0370         a[0] -= xr;
0371         if (n > 4) {
0372             rftbsub(n, a);
0373             cftbsub(n, a);
0374         } else if (n == 4) {
0375             cftbsub(n, a);
0376         }
0377     }
0378     if (n > 4) {
0379         dstsub(n, a);
0380     } else {
0381         dstsub4(n, a);
0382     }
0383     if (isgn >= 0) {
0384         if (n > 4) {
0385             cftfsub(n, a);
0386             rftfsub(n, a);
0387         } else if (n == 4) {
0388             cftfsub(n, a);
0389         }
0390         xr = a[0] - a[1];
0391         a[0] += a[1];
0392         for (j = 2; j < n; j += 2) {
0393             a[j - 1] = -a[j] - a[j + 1];
0394             a[j] -= a[j + 1];
0395         }
0396         a[n - 1] = -xr;
0397     }
0398 }
0399 
0400 
0401 void dfct(int n, double *a)
0402 {
0403     int j, k, m, mh;
0404     double xr, xi, yr, yi, an;
0405 
0406     m = n >> 1;
0407     for (j = 0; j < m; j++) {
0408         k = n - j;
0409         xr = a[j] + a[k];
0410         a[j] -= a[k];
0411         a[k] = xr;
0412     }
0413     an = a[n];
0414     while (m >= 2) {
0415         ddct(m, 1, a);
0416         if (m > 2) {
0417             bitrv1(m, a);
0418         }
0419         mh = m >> 1;
0420         xi = a[m];
0421         a[m] = a[0];
0422         a[0] = an - xi;
0423         an += xi;
0424         for (j = 1; j < mh; j++) {
0425             k = m - j;
0426             xr = a[m + k];
0427             xi = a[m + j];
0428             yr = a[j];
0429             yi = a[k];
0430             a[m + j] = yr;
0431             a[m + k] = yi;
0432             a[j] = xr - xi;
0433             a[k] = xr + xi;
0434         }
0435         xr = a[mh];
0436         a[mh] = a[m + mh];
0437         a[m + mh] = xr;
0438         m = mh;
0439     }
0440     xi = a[1];
0441     a[1] = a[0];
0442     a[0] = an + xi;
0443     a[n] = an - xi;
0444     if (n > 2) {
0445         bitrv1(n, a);
0446     }
0447 }
0448 
0449 
0450 void dfst(int n, double *a)
0451 {
0452     int j, k, m, mh;
0453     double xr, xi, yr, yi;
0454 
0455     m = n >> 1;
0456     for (j = 1; j < m; j++) {
0457         k = n - j;
0458         xr = a[j] - a[k];
0459         a[j] += a[k];
0460         a[k] = xr;
0461     }
0462     a[0] = a[m];
0463     while (m >= 2) {
0464         ddst(m, 1, a);
0465         if (m > 2) {
0466             bitrv1(m, a);
0467         }
0468         mh = m >> 1;
0469         for (j = 1; j < mh; j++) {
0470             k = m - j;
0471             xr = a[m + k];
0472             xi = a[m + j];
0473             yr = a[j];
0474             yi = a[k];
0475             a[m + j] = yr;
0476             a[m + k] = yi;
0477             a[j] = xr + xi;
0478             a[k] = xr - xi;
0479         }
0480         a[m] = a[0];
0481         a[0] = a[m + mh];
0482         a[m + mh] = a[mh];
0483         m = mh;
0484     }
0485     a[1] = a[0];
0486     a[0] = 0;
0487     if (n > 2) {
0488         bitrv1(n, a);
0489     }
0490 }
0491 
0492 
0493 /* -------- child routines -------- */
0494 
0495 
0496 #include <math.h>
0497 #ifndef M_PI_2
0498 #define M_PI_2      1.570796326794896619231321691639751442098584699687
0499 #endif
0500 #ifndef WR5000  /* cos(M_PI_2*0.5000) */
0501 #define WR5000      0.707106781186547524400844362104849039284835937688
0502 #endif
0503 #ifndef WR2500  /* cos(M_PI_2*0.2500) */
0504 #define WR2500      0.923879532511286756128183189396788286822416625863
0505 #endif
0506 #ifndef WI2500  /* sin(M_PI_2*0.2500) */
0507 #define WI2500      0.382683432365089771728459984030398866761344562485
0508 #endif
0509 #ifndef WR1250  /* cos(M_PI_2*0.1250) */
0510 #define WR1250      0.980785280403230449126182236134239036973933730893
0511 #endif
0512 #ifndef WI1250  /* sin(M_PI_2*0.1250) */
0513 #define WI1250      0.195090322016128267848284868477022240927691617751
0514 #endif
0515 #ifndef WR3750  /* cos(M_PI_2*0.3750) */
0516 #define WR3750      0.831469612302545237078788377617905756738560811987
0517 #endif
0518 #ifndef WI3750  /* sin(M_PI_2*0.3750) */
0519 #define WI3750      0.555570233019602224742830813948532874374937190754
0520 #endif
0521 
0522 
0523 #ifdef USE_CDFT_PTHREADS
0524 #define USE_CDFT_THREADS
0525 #ifndef CDFT_THREADS_BEGIN_N
0526 #define CDFT_THREADS_BEGIN_N 8192
0527 #endif
0528 #ifndef CDFT_4THREADS_BEGIN_N
0529 #define CDFT_4THREADS_BEGIN_N 65536
0530 #endif
0531 #include <pthread.h>
0532 #include <stdio.h>
0533 #include <stdlib.h>
0534 #define cdft_thread_t pthread_t
0535 #define cdft_thread_create(thp,func,argp) { \
0536     if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
0537         fprintf(stderr, "cdft thread error\n"); \
0538         exit(1); \
0539     } \
0540 }
0541 #define cdft_thread_wait(th) { \
0542     if (pthread_join(th, NULL) != 0) { \
0543         fprintf(stderr, "cdft thread error\n"); \
0544         exit(1); \
0545     } \
0546 }
0547 #endif /* USE_CDFT_PTHREADS */
0548 
0549 
0550 #ifdef USE_CDFT_WINTHREADS
0551 #define USE_CDFT_THREADS
0552 #ifndef CDFT_THREADS_BEGIN_N
0553 #define CDFT_THREADS_BEGIN_N 32768
0554 #endif
0555 #ifndef CDFT_4THREADS_BEGIN_N
0556 #define CDFT_4THREADS_BEGIN_N 524288
0557 #endif
0558 #include <windows.h>
0559 #include <stdio.h>
0560 #include <stdlib.h>
0561 #define cdft_thread_t HANDLE
0562 #define cdft_thread_create(thp,func,argp) { \
0563     DWORD thid; \
0564     *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
0565     if (*(thp) == 0) { \
0566         fprintf(stderr, "cdft thread error\n"); \
0567         exit(1); \
0568     } \
0569 }
0570 #define cdft_thread_wait(th) { \
0571     WaitForSingleObject(th, INFINITE); \
0572     CloseHandle(th); \
0573 }
0574 #endif /* USE_CDFT_WINTHREADS */
0575 
0576 
0577 #ifndef CDFT_LOOP_DIV  /* control of the CDFT's speed & tolerance */
0578 #define CDFT_LOOP_DIV 32
0579 #endif
0580 
0581 #ifndef RDFT_LOOP_DIV  /* control of the RDFT's speed & tolerance */
0582 #define RDFT_LOOP_DIV 64
0583 #endif
0584 
0585 #ifndef DCST_LOOP_DIV  /* control of the DCT,DST's speed & tolerance */
0586 #define DCST_LOOP_DIV 64
0587 #endif
0588 
0589 
0590 static void cftfsub(int n, double *a)
0591 {
0592 
0593     if (n > 8) {
0594         if (n > 32) {
0595             cftmdl1(n, a);
0596 #ifdef USE_CDFT_THREADS
0597             if (n > CDFT_THREADS_BEGIN_N) {
0598                 cftrec4_th(n, a);
0599             } else 
0600 #endif /* USE_CDFT_THREADS */
0601             if (n > 512) {
0602                 cftrec4(n, a);
0603             } else if (n > 128) {
0604                 cftleaf(n, 1, a);
0605             } else {
0606                 cftfx41(n, a);
0607             }
0608             bitrv2(n, a);
0609         } else if (n == 32) {
0610             cftf161(a);
0611             bitrv216(a);
0612         } else {
0613             cftf081(a);
0614             bitrv208(a);
0615         }
0616     } else if (n == 8) {
0617         cftf040(a);
0618     } else if (n == 4) {
0619         cftx020(a);
0620     }
0621 }
0622 
0623 
0624 static void cftbsub(int n, double *a)
0625 {
0626     if (n > 8) {
0627         if (n > 32) {
0628             cftb1st(n, a);
0629 #ifdef USE_CDFT_THREADS
0630             if (n > CDFT_THREADS_BEGIN_N) {
0631                 cftrec4_th(n, a);
0632             } else 
0633 #endif /* USE_CDFT_THREADS */
0634             if (n > 512) {
0635                 cftrec4(n, a);
0636             } else if (n > 128) {
0637                 cftleaf(n, 1, a);
0638             } else {
0639                 cftfx41(n, a);
0640             }
0641             bitrv2conj(n, a);
0642         } else if (n == 32) {
0643             cftf161(a);
0644             bitrv216neg(a);
0645         } else {
0646             cftf081(a);
0647             bitrv208neg(a);
0648         }
0649     } else if (n == 8) {
0650         cftb040(a);
0651     } else if (n == 4) {
0652         cftx020(a);
0653     }
0654 }
0655 
0656 
0657 void bitrv2(int n, double *a)
0658 {
0659     int _j0, k0, _j1, k1, l, m, i, j, k, nh;
0660     double xr, xi, yr, yi;
0661 
0662     m = 4;
0663     for (l = n >> 2; l > 8; l >>= 2) {
0664         m <<= 1;
0665     }
0666     nh = n >> 1;
0667     if (l == 8) {
0668         _j0 = 0;
0669         for (k0 = 0; k0 < m; k0 += 4) {
0670             k = k0;
0671             for (j = _j0; j < _j0 + k0; j += 4) {
0672                 xr = a[j];
0673                 xi = a[j + 1];
0674                 yr = a[k];
0675                 yi = a[k + 1];
0676                 a[j] = yr;
0677                 a[j + 1] = yi;
0678                 a[k] = xr;
0679                 a[k + 1] = xi;
0680                 _j1 = j + m;
0681                 k1 = k + 2 * m;
0682                 xr = a[_j1];
0683                 xi = a[_j1 + 1];
0684                 yr = a[k1];
0685                 yi = a[k1 + 1];
0686                 a[_j1] = yr;
0687                 a[_j1 + 1] = yi;
0688                 a[k1] = xr;
0689                 a[k1 + 1] = xi;
0690                 _j1 += m;
0691                 k1 -= m;
0692                 xr = a[_j1];
0693                 xi = a[_j1 + 1];
0694                 yr = a[k1];
0695                 yi = a[k1 + 1];
0696                 a[_j1] = yr;
0697                 a[_j1 + 1] = yi;
0698                 a[k1] = xr;
0699                 a[k1 + 1] = xi;
0700                 _j1 += m;
0701                 k1 += 2 * m;
0702                 xr = a[_j1];
0703                 xi = a[_j1 + 1];
0704                 yr = a[k1];
0705                 yi = a[k1 + 1];
0706                 a[_j1] = yr;
0707                 a[_j1 + 1] = yi;
0708                 a[k1] = xr;
0709                 a[k1 + 1] = xi;
0710                 _j1 += nh;
0711                 k1 += 2;
0712                 xr = a[_j1];
0713                 xi = a[_j1 + 1];
0714                 yr = a[k1];
0715                 yi = a[k1 + 1];
0716                 a[_j1] = yr;
0717                 a[_j1 + 1] = yi;
0718                 a[k1] = xr;
0719                 a[k1 + 1] = xi;
0720                 _j1 -= m;
0721                 k1 -= 2 * m;
0722                 xr = a[_j1];
0723                 xi = a[_j1 + 1];
0724                 yr = a[k1];
0725                 yi = a[k1 + 1];
0726                 a[_j1] = yr;
0727                 a[_j1 + 1] = yi;
0728                 a[k1] = xr;
0729                 a[k1 + 1] = xi;
0730                 _j1 -= m;
0731                 k1 += m;
0732                 xr = a[_j1];
0733                 xi = a[_j1 + 1];
0734                 yr = a[k1];
0735                 yi = a[k1 + 1];
0736                 a[_j1] = yr;
0737                 a[_j1 + 1] = yi;
0738                 a[k1] = xr;
0739                 a[k1 + 1] = xi;
0740                 _j1 -= m;
0741                 k1 -= 2 * m;
0742                 xr = a[_j1];
0743                 xi = a[_j1 + 1];
0744                 yr = a[k1];
0745                 yi = a[k1 + 1];
0746                 a[_j1] = yr;
0747                 a[_j1 + 1] = yi;
0748                 a[k1] = xr;
0749                 a[k1 + 1] = xi;
0750                 _j1 += 2;
0751                 k1 += nh;
0752                 xr = a[_j1];
0753                 xi = a[_j1 + 1];
0754                 yr = a[k1];
0755                 yi = a[k1 + 1];
0756                 a[_j1] = yr;
0757                 a[_j1 + 1] = yi;
0758                 a[k1] = xr;
0759                 a[k1 + 1] = xi;
0760                 _j1 += m;
0761                 k1 += 2 * m;
0762                 xr = a[_j1];
0763                 xi = a[_j1 + 1];
0764                 yr = a[k1];
0765                 yi = a[k1 + 1];
0766                 a[_j1] = yr;
0767                 a[_j1 + 1] = yi;
0768                 a[k1] = xr;
0769                 a[k1 + 1] = xi;
0770                 _j1 += m;
0771                 k1 -= m;
0772                 xr = a[_j1];
0773                 xi = a[_j1 + 1];
0774                 yr = a[k1];
0775                 yi = a[k1 + 1];
0776                 a[_j1] = yr;
0777                 a[_j1 + 1] = yi;
0778                 a[k1] = xr;
0779                 a[k1 + 1] = xi;
0780                 _j1 += m;
0781                 k1 += 2 * m;
0782                 xr = a[_j1];
0783                 xi = a[_j1 + 1];
0784                 yr = a[k1];
0785                 yi = a[k1 + 1];
0786                 a[_j1] = yr;
0787                 a[_j1 + 1] = yi;
0788                 a[k1] = xr;
0789                 a[k1 + 1] = xi;
0790                 _j1 -= nh;
0791                 k1 -= 2;
0792                 xr = a[_j1];
0793                 xi = a[_j1 + 1];
0794                 yr = a[k1];
0795                 yi = a[k1 + 1];
0796                 a[_j1] = yr;
0797                 a[_j1 + 1] = yi;
0798                 a[k1] = xr;
0799                 a[k1 + 1] = xi;
0800                 _j1 -= m;
0801                 k1 -= 2 * m;
0802                 xr = a[_j1];
0803                 xi = a[_j1 + 1];
0804                 yr = a[k1];
0805                 yi = a[k1 + 1];
0806                 a[_j1] = yr;
0807                 a[_j1 + 1] = yi;
0808                 a[k1] = xr;
0809                 a[k1 + 1] = xi;
0810                 _j1 -= m;
0811                 k1 += m;
0812                 xr = a[_j1];
0813                 xi = a[_j1 + 1];
0814                 yr = a[k1];
0815                 yi = a[k1 + 1];
0816                 a[_j1] = yr;
0817                 a[_j1 + 1] = yi;
0818                 a[k1] = xr;
0819                 a[k1 + 1] = xi;
0820                 _j1 -= m;
0821                 k1 -= 2 * m;
0822                 xr = a[_j1];
0823                 xi = a[_j1 + 1];
0824                 yr = a[k1];
0825                 yi = a[k1 + 1];
0826                 a[_j1] = yr;
0827                 a[_j1 + 1] = yi;
0828                 a[k1] = xr;
0829                 a[k1 + 1] = xi;
0830                 for (i = nh >> 1; i > (k ^= i); i >>= 1);
0831             }
0832             k1 = _j0 + k0;
0833             _j1 = k1 + 2;
0834             k1 += nh;
0835             xr = a[_j1];
0836             xi = a[_j1 + 1];
0837             yr = a[k1];
0838             yi = a[k1 + 1];
0839             a[_j1] = yr;
0840             a[_j1 + 1] = yi;
0841             a[k1] = xr;
0842             a[k1 + 1] = xi;
0843             _j1 += m;
0844             k1 += 2 * m;
0845             xr = a[_j1];
0846             xi = a[_j1 + 1];
0847             yr = a[k1];
0848             yi = a[k1 + 1];
0849             a[_j1] = yr;
0850             a[_j1 + 1] = yi;
0851             a[k1] = xr;
0852             a[k1 + 1] = xi;
0853             _j1 += m;
0854             k1 -= m;
0855             xr = a[_j1];
0856             xi = a[_j1 + 1];
0857             yr = a[k1];
0858             yi = a[k1 + 1];
0859             a[_j1] = yr;
0860             a[_j1 + 1] = yi;
0861             a[k1] = xr;
0862             a[k1 + 1] = xi;
0863             _j1 -= 2;
0864             k1 -= nh;
0865             xr = a[_j1];
0866             xi = a[_j1 + 1];
0867             yr = a[k1];
0868             yi = a[k1 + 1];
0869             a[_j1] = yr;
0870             a[_j1 + 1] = yi;
0871             a[k1] = xr;
0872             a[k1 + 1] = xi;
0873             _j1 += nh + 2;
0874             k1 += nh + 2;
0875             xr = a[_j1];
0876             xi = a[_j1 + 1];
0877             yr = a[k1];
0878             yi = a[k1 + 1];
0879             a[_j1] = yr;
0880             a[_j1 + 1] = yi;
0881             a[k1] = xr;
0882             a[k1 + 1] = xi;
0883             _j1 -= nh - m;
0884             k1 += 2 * m - 2;
0885             xr = a[_j1];
0886             xi = a[_j1 + 1];
0887             yr = a[k1];
0888             yi = a[k1 + 1];
0889             a[_j1] = yr;
0890             a[_j1 + 1] = yi;
0891             a[k1] = xr;
0892             a[k1 + 1] = xi;
0893             for (i = nh >> 1; i > (_j0 ^= i); i >>= 1);
0894         }
0895     } else {
0896         _j0 = 0;
0897         for (k0 = 0; k0 < m; k0 += 4) {
0898             k = k0;
0899             for (j = _j0; j < _j0 + k0; j += 4) {
0900                 xr = a[j];
0901                 xi = a[j + 1];
0902                 yr = a[k];
0903                 yi = a[k + 1];
0904                 a[j] = yr;
0905                 a[j + 1] = yi;
0906                 a[k] = xr;
0907                 a[k + 1] = xi;
0908                 _j1 = j + m;
0909                 k1 = k + m;
0910                 xr = a[_j1];
0911                 xi = a[_j1 + 1];
0912                 yr = a[k1];
0913                 yi = a[k1 + 1];
0914                 a[_j1] = yr;
0915                 a[_j1 + 1] = yi;
0916                 a[k1] = xr;
0917                 a[k1 + 1] = xi;
0918                 _j1 += nh;
0919                 k1 += 2;
0920                 xr = a[_j1];
0921                 xi = a[_j1 + 1];
0922                 yr = a[k1];
0923                 yi = a[k1 + 1];
0924                 a[_j1] = yr;
0925                 a[_j1 + 1] = yi;
0926                 a[k1] = xr;
0927                 a[k1 + 1] = xi;
0928                 _j1 -= m;
0929                 k1 -= m;
0930                 xr = a[_j1];
0931                 xi = a[_j1 + 1];
0932                 yr = a[k1];
0933                 yi = a[k1 + 1];
0934                 a[_j1] = yr;
0935                 a[_j1 + 1] = yi;
0936                 a[k1] = xr;
0937                 a[k1 + 1] = xi;
0938                 _j1 += 2;
0939                 k1 += nh;
0940                 xr = a[_j1];
0941                 xi = a[_j1 + 1];
0942                 yr = a[k1];
0943                 yi = a[k1 + 1];
0944                 a[_j1] = yr;
0945                 a[_j1 + 1] = yi;
0946                 a[k1] = xr;
0947                 a[k1 + 1] = xi;
0948                 _j1 += m;
0949                 k1 += m;
0950                 xr = a[_j1];
0951                 xi = a[_j1 + 1];
0952                 yr = a[k1];
0953                 yi = a[k1 + 1];
0954                 a[_j1] = yr;
0955                 a[_j1 + 1] = yi;
0956                 a[k1] = xr;
0957                 a[k1 + 1] = xi;
0958                 _j1 -= nh;
0959                 k1 -= 2;
0960                 xr = a[_j1];
0961                 xi = a[_j1 + 1];
0962                 yr = a[k1];
0963                 yi = a[k1 + 1];
0964                 a[_j1] = yr;
0965                 a[_j1 + 1] = yi;
0966                 a[k1] = xr;
0967                 a[k1 + 1] = xi;
0968                 _j1 -= m;
0969                 k1 -= m;
0970                 xr = a[_j1];
0971                 xi = a[_j1 + 1];
0972                 yr = a[k1];
0973                 yi = a[k1 + 1];
0974                 a[_j1] = yr;
0975                 a[_j1 + 1] = yi;
0976                 a[k1] = xr;
0977                 a[k1 + 1] = xi;
0978                 for (i = nh >> 1; i > (k ^= i); i >>= 1);
0979             }
0980             k1 = _j0 + k0;
0981             _j1 = k1 + 2;
0982             k1 += nh;
0983             xr = a[_j1];
0984             xi = a[_j1 + 1];
0985             yr = a[k1];
0986             yi = a[k1 + 1];
0987             a[_j1] = yr;
0988             a[_j1 + 1] = yi;
0989             a[k1] = xr;
0990             a[k1 + 1] = xi;
0991             _j1 += m;
0992             k1 += m;
0993             xr = a[_j1];
0994             xi = a[_j1 + 1];
0995             yr = a[k1];
0996             yi = a[k1 + 1];
0997             a[_j1] = yr;
0998             a[_j1 + 1] = yi;
0999             a[k1] = xr;
1000             a[k1 + 1] = xi;
1001             for (i = nh >> 1; i > (_j0 ^= i); i >>= 1);
1002         }
1003     }
1004 }
1005 
1006 
1007 void bitrv2conj(int n, double *a)
1008 {
1009     int _j0, k0, _j1, k1, l, m, i, j, k, nh;
1010     double xr, xi, yr, yi;
1011 
1012     m = 4;
1013     for (l = n >> 2; l > 8; l >>= 2) {
1014         m <<= 1;
1015     }
1016     nh = n >> 1;
1017     if (l == 8) {
1018         _j0 = 0;
1019         for (k0 = 0; k0 < m; k0 += 4) {
1020             k = k0;
1021             for (j = _j0; j < _j0 + k0; j += 4) {
1022                 xr = a[j];
1023                 xi = -a[j + 1];
1024                 yr = a[k];
1025                 yi = -a[k + 1];
1026                 a[j] = yr;
1027                 a[j + 1] = yi;
1028                 a[k] = xr;
1029                 a[k + 1] = xi;
1030                 _j1 = j + m;
1031                 k1 = k + 2 * m;
1032                 xr = a[_j1];
1033                 xi = -a[_j1 + 1];
1034                 yr = a[k1];
1035                 yi = -a[k1 + 1];
1036                 a[_j1] = yr;
1037                 a[_j1 + 1] = yi;
1038                 a[k1] = xr;
1039                 a[k1 + 1] = xi;
1040                 _j1 += m;
1041                 k1 -= m;
1042                 xr = a[_j1];
1043                 xi = -a[_j1 + 1];
1044                 yr = a[k1];
1045                 yi = -a[k1 + 1];
1046                 a[_j1] = yr;
1047                 a[_j1 + 1] = yi;
1048                 a[k1] = xr;
1049                 a[k1 + 1] = xi;
1050                 _j1 += m;
1051                 k1 += 2 * m;
1052                 xr = a[_j1];
1053                 xi = -a[_j1 + 1];
1054                 yr = a[k1];
1055                 yi = -a[k1 + 1];
1056                 a[_j1] = yr;
1057                 a[_j1 + 1] = yi;
1058                 a[k1] = xr;
1059                 a[k1 + 1] = xi;
1060                 _j1 += nh;
1061                 k1 += 2;
1062                 xr = a[_j1];
1063                 xi = -a[_j1 + 1];
1064                 yr = a[k1];
1065                 yi = -a[k1 + 1];
1066                 a[_j1] = yr;
1067                 a[_j1 + 1] = yi;
1068                 a[k1] = xr;
1069                 a[k1 + 1] = xi;
1070                 _j1 -= m;
1071                 k1 -= 2 * m;
1072                 xr = a[_j1];
1073                 xi = -a[_j1 + 1];
1074                 yr = a[k1];
1075                 yi = -a[k1 + 1];
1076                 a[_j1] = yr;
1077                 a[_j1 + 1] = yi;
1078                 a[k1] = xr;
1079                 a[k1 + 1] = xi;
1080                 _j1 -= m;
1081                 k1 += m;
1082                 xr = a[_j1];
1083                 xi = -a[_j1 + 1];
1084                 yr = a[k1];
1085                 yi = -a[k1 + 1];
1086                 a[_j1] = yr;
1087                 a[_j1 + 1] = yi;
1088                 a[k1] = xr;
1089                 a[k1 + 1] = xi;
1090                 _j1 -= m;
1091                 k1 -= 2 * m;
1092                 xr = a[_j1];
1093                 xi = -a[_j1 + 1];
1094                 yr = a[k1];
1095                 yi = -a[k1 + 1];
1096                 a[_j1] = yr;
1097                 a[_j1 + 1] = yi;
1098                 a[k1] = xr;
1099                 a[k1 + 1] = xi;
1100                 _j1 += 2;
1101                 k1 += nh;
1102                 xr = a[_j1];
1103                 xi = -a[_j1 + 1];
1104                 yr = a[k1];
1105                 yi = -a[k1 + 1];
1106                 a[_j1] = yr;
1107                 a[_j1 + 1] = yi;
1108                 a[k1] = xr;
1109                 a[k1 + 1] = xi;
1110                 _j1 += m;
1111                 k1 += 2 * m;
1112                 xr = a[_j1];
1113                 xi = -a[_j1 + 1];
1114                 yr = a[k1];
1115                 yi = -a[k1 + 1];
1116                 a[_j1] = yr;
1117                 a[_j1 + 1] = yi;
1118                 a[k1] = xr;
1119                 a[k1 + 1] = xi;
1120                 _j1 += m;
1121                 k1 -= m;
1122                 xr = a[_j1];
1123                 xi = -a[_j1 + 1];
1124                 yr = a[k1];
1125                 yi = -a[k1 + 1];
1126                 a[_j1] = yr;
1127                 a[_j1 + 1] = yi;
1128                 a[k1] = xr;
1129                 a[k1 + 1] = xi;
1130                 _j1 += m;
1131                 k1 += 2 * m;
1132                 xr = a[_j1];
1133                 xi = -a[_j1 + 1];
1134                 yr = a[k1];
1135                 yi = -a[k1 + 1];
1136                 a[_j1] = yr;
1137                 a[_j1 + 1] = yi;
1138                 a[k1] = xr;
1139                 a[k1 + 1] = xi;
1140                 _j1 -= nh;
1141                 k1 -= 2;
1142                 xr = a[_j1];
1143                 xi = -a[_j1 + 1];
1144                 yr = a[k1];
1145                 yi = -a[k1 + 1];
1146                 a[_j1] = yr;
1147                 a[_j1 + 1] = yi;
1148                 a[k1] = xr;
1149                 a[k1 + 1] = xi;
1150                 _j1 -= m;
1151                 k1 -= 2 * m;
1152                 xr = a[_j1];
1153                 xi = -a[_j1 + 1];
1154                 yr = a[k1];
1155                 yi = -a[k1 + 1];
1156                 a[_j1] = yr;
1157                 a[_j1 + 1] = yi;
1158                 a[k1] = xr;
1159                 a[k1 + 1] = xi;
1160                 _j1 -= m;
1161                 k1 += m;
1162                 xr = a[_j1];
1163                 xi = -a[_j1 + 1];
1164                 yr = a[k1];
1165                 yi = -a[k1 + 1];
1166                 a[_j1] = yr;
1167                 a[_j1 + 1] = yi;
1168                 a[k1] = xr;
1169                 a[k1 + 1] = xi;
1170                 _j1 -= m;
1171                 k1 -= 2 * m;
1172                 xr = a[_j1];
1173                 xi = -a[_j1 + 1];
1174                 yr = a[k1];
1175                 yi = -a[k1 + 1];
1176                 a[_j1] = yr;
1177                 a[_j1 + 1] = yi;
1178                 a[k1] = xr;
1179                 a[k1 + 1] = xi;
1180                 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1181             }
1182             k1 = _j0 + k0;
1183             _j1 = k1 + 2;
1184             k1 += nh;
1185             a[_j1 - 1] = -a[_j1 - 1];
1186             xr = a[_j1];
1187             xi = -a[_j1 + 1];
1188             yr = a[k1];
1189             yi = -a[k1 + 1];
1190             a[_j1] = yr;
1191             a[_j1 + 1] = yi;
1192             a[k1] = xr;
1193             a[k1 + 1] = xi;
1194             a[k1 + 3] = -a[k1 + 3];
1195             _j1 += m;
1196             k1 += 2 * m;
1197             xr = a[_j1];
1198             xi = -a[_j1 + 1];
1199             yr = a[k1];
1200             yi = -a[k1 + 1];
1201             a[_j1] = yr;
1202             a[_j1 + 1] = yi;
1203             a[k1] = xr;
1204             a[k1 + 1] = xi;
1205             _j1 += m;
1206             k1 -= m;
1207             xr = a[_j1];
1208             xi = -a[_j1 + 1];
1209             yr = a[k1];
1210             yi = -a[k1 + 1];
1211             a[_j1] = yr;
1212             a[_j1 + 1] = yi;
1213             a[k1] = xr;
1214             a[k1 + 1] = xi;
1215             _j1 -= 2;
1216             k1 -= nh;
1217             xr = a[_j1];
1218             xi = -a[_j1 + 1];
1219             yr = a[k1];
1220             yi = -a[k1 + 1];
1221             a[_j1] = yr;
1222             a[_j1 + 1] = yi;
1223             a[k1] = xr;
1224             a[k1 + 1] = xi;
1225             _j1 += nh + 2;
1226             k1 += nh + 2;
1227             xr = a[_j1];
1228             xi = -a[_j1 + 1];
1229             yr = a[k1];
1230             yi = -a[k1 + 1];
1231             a[_j1] = yr;
1232             a[_j1 + 1] = yi;
1233             a[k1] = xr;
1234             a[k1 + 1] = xi;
1235             _j1 -= nh - m;
1236             k1 += 2 * m - 2;
1237             a[_j1 - 1] = -a[_j1 - 1];
1238             xr = a[_j1];
1239             xi = -a[_j1 + 1];
1240             yr = a[k1];
1241             yi = -a[k1 + 1];
1242             a[_j1] = yr;
1243             a[_j1 + 1] = yi;
1244             a[k1] = xr;
1245             a[k1 + 1] = xi;
1246             a[k1 + 3] = -a[k1 + 3];
1247             for (i = nh >> 1; i > (_j0 ^= i); i >>= 1);
1248         }
1249     } else {
1250         _j0 = 0;
1251         for (k0 = 0; k0 < m; k0 += 4) {
1252             k = k0;
1253             for (j = _j0; j < _j0 + k0; j += 4) {
1254                 xr = a[j];
1255                 xi = -a[j + 1];
1256                 yr = a[k];
1257                 yi = -a[k + 1];
1258                 a[j] = yr;
1259                 a[j + 1] = yi;
1260                 a[k] = xr;
1261                 a[k + 1] = xi;
1262                 _j1 = j + m;
1263                 k1 = k + m;
1264                 xr = a[_j1];
1265                 xi = -a[_j1 + 1];
1266                 yr = a[k1];
1267                 yi = -a[k1 + 1];
1268                 a[_j1] = yr;
1269                 a[_j1 + 1] = yi;
1270                 a[k1] = xr;
1271                 a[k1 + 1] = xi;
1272                 _j1 += nh;
1273                 k1 += 2;
1274                 xr = a[_j1];
1275                 xi = -a[_j1 + 1];
1276                 yr = a[k1];
1277                 yi = -a[k1 + 1];
1278                 a[_j1] = yr;
1279                 a[_j1 + 1] = yi;
1280                 a[k1] = xr;
1281                 a[k1 + 1] = xi;
1282                 _j1 -= m;
1283                 k1 -= m;
1284                 xr = a[_j1];
1285                 xi = -a[_j1 + 1];
1286                 yr = a[k1];
1287                 yi = -a[k1 + 1];
1288                 a[_j1] = yr;
1289                 a[_j1 + 1] = yi;
1290                 a[k1] = xr;
1291                 a[k1 + 1] = xi;
1292                 _j1 += 2;
1293                 k1 += nh;
1294                 xr = a[_j1];
1295                 xi = -a[_j1 + 1];
1296                 yr = a[k1];
1297                 yi = -a[k1 + 1];
1298                 a[_j1] = yr;
1299                 a[_j1 + 1] = yi;
1300                 a[k1] = xr;
1301                 a[k1 + 1] = xi;
1302                 _j1 += m;
1303                 k1 += m;
1304                 xr = a[_j1];
1305                 xi = -a[_j1 + 1];
1306                 yr = a[k1];
1307                 yi = -a[k1 + 1];
1308                 a[_j1] = yr;
1309                 a[_j1 + 1] = yi;
1310                 a[k1] = xr;
1311                 a[k1 + 1] = xi;
1312                 _j1 -= nh;
1313                 k1 -= 2;
1314                 xr = a[_j1];
1315                 xi = -a[_j1 + 1];
1316                 yr = a[k1];
1317                 yi = -a[k1 + 1];
1318                 a[_j1] = yr;
1319                 a[_j1 + 1] = yi;
1320                 a[k1] = xr;
1321                 a[k1 + 1] = xi;
1322                 _j1 -= m;
1323                 k1 -= m;
1324                 xr = a[_j1];
1325                 xi = -a[_j1 + 1];
1326                 yr = a[k1];
1327                 yi = -a[k1 + 1];
1328                 a[_j1] = yr;
1329                 a[_j1 + 1] = yi;
1330                 a[k1] = xr;
1331                 a[k1 + 1] = xi;
1332                 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1333             }
1334             k1 = _j0 + k0;
1335             _j1 = k1 + 2;
1336             k1 += nh;
1337             a[_j1 - 1] = -a[_j1 - 1];
1338             xr = a[_j1];
1339             xi = -a[_j1 + 1];
1340             yr = a[k1];
1341             yi = -a[k1 + 1];
1342             a[_j1] = yr;
1343             a[_j1 + 1] = yi;
1344             a[k1] = xr;
1345             a[k1 + 1] = xi;
1346             a[k1 + 3] = -a[k1 + 3];
1347             _j1 += m;
1348             k1 += m;
1349             a[_j1 - 1] = -a[_j1 - 1];
1350             xr = a[_j1];
1351             xi = -a[_j1 + 1];
1352             yr = a[k1];
1353             yi = -a[k1 + 1];
1354             a[_j1] = yr;
1355             a[_j1 + 1] = yi;
1356             a[k1] = xr;
1357             a[k1 + 1] = xi;
1358             a[k1 + 3] = -a[k1 + 3];
1359             for (i = nh >> 1; i > (_j0 ^= i); i >>= 1);
1360         }
1361     }
1362 }
1363 
1364 
1365 void bitrv216(double *a)
1366 {
1367     double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
1368         x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, 
1369         x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1370 
1371     x1r = a[2];
1372     x1i = a[3];
1373     x2r = a[4];
1374     x2i = a[5];
1375     x3r = a[6];
1376     x3i = a[7];
1377     x4r = a[8];
1378     x4i = a[9];
1379     x5r = a[10];
1380     x5i = a[11];
1381     x7r = a[14];
1382     x7i = a[15];
1383     x8r = a[16];
1384     x8i = a[17];
1385     x10r = a[20];
1386     x10i = a[21];
1387     x11r = a[22];
1388     x11i = a[23];
1389     x12r = a[24];
1390     x12i = a[25];
1391     x13r = a[26];
1392     x13i = a[27];
1393     x14r = a[28];
1394     x14i = a[29];
1395     a[2] = x8r;
1396     a[3] = x8i;
1397     a[4] = x4r;
1398     a[5] = x4i;
1399     a[6] = x12r;
1400     a[7] = x12i;
1401     a[8] = x2r;
1402     a[9] = x2i;
1403     a[10] = x10r;
1404     a[11] = x10i;
1405     a[14] = x14r;
1406     a[15] = x14i;
1407     a[16] = x1r;
1408     a[17] = x1i;
1409     a[20] = x5r;
1410     a[21] = x5i;
1411     a[22] = x13r;
1412     a[23] = x13i;
1413     a[24] = x3r;
1414     a[25] = x3i;
1415     a[26] = x11r;
1416     a[27] = x11i;
1417     a[28] = x7r;
1418     a[29] = x7i;
1419 }
1420 
1421 
1422 void bitrv216neg(double *a)
1423 {
1424     double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
1425         x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, 
1426         x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, 
1427         x13r, x13i, x14r, x14i, x15r, x15i;
1428 
1429     x1r = a[2];
1430     x1i = a[3];
1431     x2r = a[4];
1432     x2i = a[5];
1433     x3r = a[6];
1434     x3i = a[7];
1435     x4r = a[8];
1436     x4i = a[9];
1437     x5r = a[10];
1438     x5i = a[11];
1439     x6r = a[12];
1440     x6i = a[13];
1441     x7r = a[14];
1442     x7i = a[15];
1443     x8r = a[16];
1444     x8i = a[17];
1445     x9r = a[18];
1446     x9i = a[19];
1447     x10r = a[20];
1448     x10i = a[21];
1449     x11r = a[22];
1450     x11i = a[23];
1451     x12r = a[24];
1452     x12i = a[25];
1453     x13r = a[26];
1454     x13i = a[27];
1455     x14r = a[28];
1456     x14i = a[29];
1457     x15r = a[30];
1458     x15i = a[31];
1459     a[2] = x15r;
1460     a[3] = x15i;
1461     a[4] = x7r;
1462     a[5] = x7i;
1463     a[6] = x11r;
1464     a[7] = x11i;
1465     a[8] = x3r;
1466     a[9] = x3i;
1467     a[10] = x13r;
1468     a[11] = x13i;
1469     a[12] = x5r;
1470     a[13] = x5i;
1471     a[14] = x9r;
1472     a[15] = x9i;
1473     a[16] = x1r;
1474     a[17] = x1i;
1475     a[18] = x14r;
1476     a[19] = x14i;
1477     a[20] = x6r;
1478     a[21] = x6i;
1479     a[22] = x10r;
1480     a[23] = x10i;
1481     a[24] = x2r;
1482     a[25] = x2i;
1483     a[26] = x12r;
1484     a[27] = x12i;
1485     a[28] = x4r;
1486     a[29] = x4i;
1487     a[30] = x8r;
1488     a[31] = x8i;
1489 }
1490 
1491 
1492 void bitrv208(double *a)
1493 {
1494     double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1495 
1496     x1r = a[2];
1497     x1i = a[3];
1498     x3r = a[6];
1499     x3i = a[7];
1500     x4r = a[8];
1501     x4i = a[9];
1502     x6r = a[12];
1503     x6i = a[13];
1504     a[2] = x4r;
1505     a[3] = x4i;
1506     a[6] = x6r;
1507     a[7] = x6i;
1508     a[8] = x1r;
1509     a[9] = x1i;
1510     a[12] = x3r;
1511     a[13] = x3i;
1512 }
1513 
1514 
1515 void bitrv208neg(double *a)
1516 {
1517     double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
1518         x5r, x5i, x6r, x6i, x7r, x7i;
1519 
1520     x1r = a[2];
1521     x1i = a[3];
1522     x2r = a[4];
1523     x2i = a[5];
1524     x3r = a[6];
1525     x3i = a[7];
1526     x4r = a[8];
1527     x4i = a[9];
1528     x5r = a[10];
1529     x5i = a[11];
1530     x6r = a[12];
1531     x6i = a[13];
1532     x7r = a[14];
1533     x7i = a[15];
1534     a[2] = x7r;
1535     a[3] = x7i;
1536     a[4] = x3r;
1537     a[5] = x3i;
1538     a[6] = x5r;
1539     a[7] = x5i;
1540     a[8] = x1r;
1541     a[9] = x1i;
1542     a[10] = x6r;
1543     a[11] = x6i;
1544     a[12] = x2r;
1545     a[13] = x2i;
1546     a[14] = x4r;
1547     a[15] = x4i;
1548 }
1549 
1550 
1551 void bitrv1(int n, double *a)
1552 {
1553     int _j0, k0, _j1, k1, l, m, i, j, k, nh;
1554     double x;
1555 
1556     nh = n >> 1;
1557     x = a[1];
1558     a[1] = a[nh];
1559     a[nh] = x;
1560     m = 2;
1561     for (l = n >> 2; l > 2; l >>= 2) {
1562         m <<= 1;
1563     }
1564     if (l == 2) {
1565         _j1 = m + 1;
1566         k1 = m + nh;
1567         x = a[_j1];
1568         a[_j1] = a[k1];
1569         a[k1] = x;
1570         _j0 = 0;
1571         for (k0 = 2; k0 < m; k0 += 2) {
1572             for (i = nh >> 1; i > (_j0 ^= i); i >>= 1);
1573             k = k0;
1574             for (j = _j0; j < _j0 + k0; j += 2) {
1575                 x = a[j];
1576                 a[j] = a[k];
1577                 a[k] = x;
1578                 _j1 = j + m;
1579                 k1 = k + m;
1580                 x = a[_j1];
1581                 a[_j1] = a[k1];
1582                 a[k1] = x;
1583                 _j1 += nh;
1584                 k1++;
1585                 x = a[_j1];
1586                 a[_j1] = a[k1];
1587                 a[k1] = x;
1588                 _j1 -= m;
1589                 k1 -= m;
1590                 x = a[_j1];
1591                 a[_j1] = a[k1];
1592                 a[k1] = x;
1593                 _j1++;
1594                 k1 += nh;
1595                 x = a[_j1];
1596                 a[_j1] = a[k1];
1597                 a[k1] = x;
1598                 _j1 += m;
1599                 k1 += m;
1600                 x = a[_j1];
1601                 a[_j1] = a[k1];
1602                 a[k1] = x;
1603                 _j1 -= nh;
1604                 k1--;
1605                 x = a[_j1];
1606                 a[_j1] = a[k1];
1607                 a[k1] = x;
1608                 _j1 -= m;
1609                 k1 -= m;
1610                 x = a[_j1];
1611                 a[_j1] = a[k1];
1612                 a[k1] = x;
1613                 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1614             }
1615             k1 = _j0 + k0;
1616             _j1 = k1 + 1;
1617             k1 += nh;
1618             x = a[_j1];
1619             a[_j1] = a[k1];
1620             a[k1] = x;
1621             _j1 += m;
1622             k1 += m;
1623             x = a[_j1];
1624             a[_j1] = a[k1];
1625             a[k1] = x;
1626         }
1627     } else {
1628         _j0 = 0;
1629         for (k0 = 2; k0 < m; k0 += 2) {
1630             for (i = nh >> 1; i > (_j0 ^= i); i >>= 1);
1631             k = k0;
1632             for (j = _j0; j < _j0 + k0; j += 2) {
1633                 x = a[j];
1634                 a[j] = a[k];
1635                 a[k] = x;
1636                 _j1 = j + nh;
1637                 k1 = k + 1;
1638                 x = a[_j1];
1639                 a[_j1] = a[k1];
1640                 a[k1] = x;
1641                 _j1++;
1642                 k1 += nh;
1643                 x = a[_j1];
1644                 a[_j1] = a[k1];
1645                 a[k1] = x;
1646                 _j1 -= nh;
1647                 k1--;
1648                 x = a[_j1];
1649                 a[_j1] = a[k1];
1650                 a[k1] = x;
1651                 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1652             }
1653             k1 = _j0 + k0;
1654             _j1 = k1 + 1;
1655             k1 += nh;
1656             x = a[_j1];
1657             a[_j1] = a[k1];
1658             a[k1] = x;
1659         }
1660     }
1661 }
1662 
1663 
1664 void cftb1st(int n, double *a)
1665 {
1666     int i, i0, j, _j0, _j1, j2, j3, m, mh;
1667     double ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, 
1668         wd1r, wd1i, wd3r, wd3i, ss1, ss3;
1669     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1670 
1671     mh = n >> 3;
1672     m = 2 * mh;
1673     _j1 = m;
1674     j2 = _j1 + m;
1675     j3 = j2 + m;
1676     x0r = a[0] + a[j2];
1677     x0i = -a[1] - a[j2 + 1];
1678     x1r = a[0] - a[j2];
1679     x1i = -a[1] + a[j2 + 1];
1680     x2r = a[_j1] + a[j3];
1681     x2i = a[_j1 + 1] + a[j3 + 1];
1682     x3r = a[_j1] - a[j3];
1683     x3i = a[_j1 + 1] - a[j3 + 1];
1684     a[0] = x0r + x2r;
1685     a[1] = x0i - x2i;
1686     a[_j1] = x0r - x2r;
1687     a[_j1 + 1] = x0i + x2i;
1688     a[j2] = x1r + x3i;
1689     a[j2 + 1] = x1i + x3r;
1690     a[j3] = x1r - x3i;
1691     a[j3 + 1] = x1i - x3r;
1692     wd1r = 1;
1693     wd1i = 0;
1694     wd3r = 1;
1695     wd3i = 0;
1696     ew = M_PI_2 / m;
1697     w1r = cos(2 * ew);
1698     w1i = sin(2 * ew);
1699     wk1r = w1r;
1700     wk1i = w1i;
1701     ss1 = 2 * w1i;
1702     wk3i = 2 * ss1 * wk1r;
1703     wk3r = wk1r - wk3i * wk1i;
1704     wk3i = wk1i - wk3i * wk1r;
1705     ss3 = 2 * wk3i;
1706     i = 0;
1707     for (;;) {
1708         i0 = i + 4 * CDFT_LOOP_DIV;
1709         if (i0 > mh - 4) {
1710             i0 = mh - 4;
1711         }
1712         for (j = i + 2; j < i0; j += 4) {
1713             wd1r -= ss1 * wk1i;
1714             wd1i += ss1 * wk1r;
1715             wd3r -= ss3 * wk3i;
1716             wd3i += ss3 * wk3r;
1717             _j1 = j + m;
1718             j2 = _j1 + m;
1719             j3 = j2 + m;
1720             x0r = a[j] + a[j2];
1721             x0i = -a[j + 1] - a[j2 + 1];
1722             x1r = a[j] - a[j2];
1723             x1i = -a[j + 1] + a[j2 + 1];
1724             x2r = a[_j1] + a[j3];
1725             x2i = a[_j1 + 1] + a[j3 + 1];
1726             x3r = a[_j1] - a[j3];
1727             x3i = a[_j1 + 1] - a[j3 + 1];
1728             a[j] = x0r + x2r;
1729             a[j + 1] = x0i - x2i;
1730             a[_j1] = x0r - x2r;
1731             a[_j1 + 1] = x0i + x2i;
1732             x0r = x1r + x3i;
1733             x0i = x1i + x3r;
1734             a[j2] = wk1r * x0r - wk1i * x0i;
1735             a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1736             x0r = x1r - x3i;
1737             x0i = x1i - x3r;
1738             a[j3] = wk3r * x0r + wk3i * x0i;
1739             a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1740             x0r = a[j + 2] + a[j2 + 2];
1741             x0i = -a[j + 3] - a[j2 + 3];
1742             x1r = a[j + 2] - a[j2 + 2];
1743             x1i = -a[j + 3] + a[j2 + 3];
1744             x2r = a[_j1 + 2] + a[j3 + 2];
1745             x2i = a[_j1 + 3] + a[j3 + 3];
1746             x3r = a[_j1 + 2] - a[j3 + 2];
1747             x3i = a[_j1 + 3] - a[j3 + 3];
1748             a[j + 2] = x0r + x2r;
1749             a[j + 3] = x0i - x2i;
1750             a[_j1 + 2] = x0r - x2r;
1751             a[_j1 + 3] = x0i + x2i;
1752             x0r = x1r + x3i;
1753             x0i = x1i + x3r;
1754             a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1755             a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1756             x0r = x1r - x3i;
1757             x0i = x1i - x3r;
1758             a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1759             a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1760             _j0 = m - j;
1761             _j1 = _j0 + m;
1762             j2 = _j1 + m;
1763             j3 = j2 + m;
1764             x0r = a[_j0] + a[j2];
1765             x0i = -a[_j0 + 1] - a[j2 + 1];
1766             x1r = a[_j0] - a[j2];
1767             x1i = -a[_j0 + 1] + a[j2 + 1];
1768             x2r = a[_j1] + a[j3];
1769             x2i = a[_j1 + 1] + a[j3 + 1];
1770             x3r = a[_j1] - a[j3];
1771             x3i = a[_j1 + 1] - a[j3 + 1];
1772             a[_j0] = x0r + x2r;
1773             a[_j0 + 1] = x0i - x2i;
1774             a[_j1] = x0r - x2r;
1775             a[_j1 + 1] = x0i + x2i;
1776             x0r = x1r + x3i;
1777             x0i = x1i + x3r;
1778             a[j2] = wk1i * x0r - wk1r * x0i;
1779             a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1780             x0r = x1r - x3i;
1781             x0i = x1i - x3r;
1782             a[j3] = wk3i * x0r + wk3r * x0i;
1783             a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1784             x0r = a[_j0 - 2] + a[j2 - 2];
1785             x0i = -a[_j0 - 1] - a[j2 - 1];
1786             x1r = a[_j0 - 2] - a[j2 - 2];
1787             x1i = -a[_j0 - 1] + a[j2 - 1];
1788             x2r = a[_j1 - 2] + a[j3 - 2];
1789             x2i = a[_j1 - 1] + a[j3 - 1];
1790             x3r = a[_j1 - 2] - a[j3 - 2];
1791             x3i = a[_j1 - 1] - a[j3 - 1];
1792             a[_j0 - 2] = x0r + x2r;
1793             a[_j0 - 1] = x0i - x2i;
1794             a[_j1 - 2] = x0r - x2r;
1795             a[_j1 - 1] = x0i + x2i;
1796             x0r = x1r + x3i;
1797             x0i = x1i + x3r;
1798             a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1799             a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1800             x0r = x1r - x3i;
1801             x0i = x1i - x3r;
1802             a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1803             a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1804             wk1r -= ss1 * wd1i;
1805             wk1i += ss1 * wd1r;
1806             wk3r -= ss3 * wd3i;
1807             wk3i += ss3 * wd3r;
1808         }
1809         if (i0 == mh - 4) {
1810             break;
1811         }
1812         wd1r = cos(ew * i0);
1813         wd1i = sin(ew * i0);
1814         wd3i = 4 * wd1i * wd1r;
1815         wd3r = wd1r - wd3i * wd1i;
1816         wd3i = wd1i - wd3i * wd1r;
1817         wk1r = w1r * wd1r - w1i * wd1i;
1818         wk1i = w1r * wd1i + w1i * wd1r;
1819         wk3i = 4 * wk1i * wk1r;
1820         wk3r = wk1r - wk3i * wk1i;
1821         wk3i = wk1i - wk3i * wk1r;
1822         i = i0;
1823     }
1824     wd1r = WR5000;
1825     _j0 = mh;
1826     _j1 = _j0 + m;
1827     j2 = _j1 + m;
1828     j3 = j2 + m;
1829     x0r = a[_j0 - 2] + a[j2 - 2];
1830     x0i = -a[_j0 - 1] - a[j2 - 1];
1831     x1r = a[_j0 - 2] - a[j2 - 2];
1832     x1i = -a[_j0 - 1] + a[j2 - 1];
1833     x2r = a[_j1 - 2] + a[j3 - 2];
1834     x2i = a[_j1 - 1] + a[j3 - 1];
1835     x3r = a[_j1 - 2] - a[j3 - 2];
1836     x3i = a[_j1 - 1] - a[j3 - 1];
1837     a[_j0 - 2] = x0r + x2r;
1838     a[_j0 - 1] = x0i - x2i;
1839     a[_j1 - 2] = x0r - x2r;
1840     a[_j1 - 1] = x0i + x2i;
1841     x0r = x1r + x3i;
1842     x0i = x1i + x3r;
1843     a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1844     a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1845     x0r = x1r - x3i;
1846     x0i = x1i - x3r;
1847     a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1848     a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1849     x0r = a[_j0] + a[j2];
1850     x0i = -a[_j0 + 1] - a[j2 + 1];
1851     x1r = a[_j0] - a[j2];
1852     x1i = -a[_j0 + 1] + a[j2 + 1];
1853     x2r = a[_j1] + a[j3];
1854     x2i = a[_j1 + 1] + a[j3 + 1];
1855     x3r = a[_j1] - a[j3];
1856     x3i = a[_j1 + 1] - a[j3 + 1];
1857     a[_j0] = x0r + x2r;
1858     a[_j0 + 1] = x0i - x2i;
1859     a[_j1] = x0r - x2r;
1860     a[_j1 + 1] = x0i + x2i;
1861     x0r = x1r + x3i;
1862     x0i = x1i + x3r;
1863     a[j2] = wd1r * (x0r - x0i);
1864     a[j2 + 1] = wd1r * (x0i + x0r);
1865     x0r = x1r - x3i;
1866     x0i = x1i - x3r;
1867     a[j3] = -wd1r * (x0r + x0i);
1868     a[j3 + 1] = -wd1r * (x0i - x0r);
1869     x0r = a[_j0 + 2] + a[j2 + 2];
1870     x0i = -a[_j0 + 3] - a[j2 + 3];
1871     x1r = a[_j0 + 2] - a[j2 + 2];
1872     x1i = -a[_j0 + 3] + a[j2 + 3];
1873     x2r = a[_j1 + 2] + a[j3 + 2];
1874     x2i = a[_j1 + 3] + a[j3 + 3];
1875     x3r = a[_j1 + 2] - a[j3 + 2];
1876     x3i = a[_j1 + 3] - a[j3 + 3];
1877     a[_j0 + 2] = x0r + x2r;
1878     a[_j0 + 3] = x0i - x2i;
1879     a[_j1 + 2] = x0r - x2r;
1880     a[_j1 + 3] = x0i + x2i;
1881     x0r = x1r + x3i;
1882     x0i = x1i + x3r;
1883     a[j2 + 2] = wk1i * x0r - wk1r * x0i;
1884     a[j2 + 3] = wk1i * x0i + wk1r * x0r;
1885     x0r = x1r - x3i;
1886     x0i = x1i - x3r;
1887     a[j3 + 2] = wk3i * x0r + wk3r * x0i;
1888     a[j3 + 3] = wk3i * x0i - wk3r * x0r;
1889 }
1890 
1891 
1892 #ifdef USE_CDFT_THREADS
1893 struct cdft_arg_st {
1894     int n0;
1895     int n;
1896     double *a;
1897 };
1898 typedef struct cdft_arg_st cdft_arg_t;
1899 
1900 
1901 void cftrec4_th(int n, double *a)
1902 {
1903     void *cftrec1_th(void *p);
1904     void *cftrec2_th(void *p);
1905     int i, idiv4, m, nthread;
1906     cdft_thread_t th[4];
1907     cdft_arg_t ag[4];
1908 
1909     nthread = 2;
1910     idiv4 = 0;
1911     m = n >> 1;
1912     if (n > CDFT_4THREADS_BEGIN_N) {
1913         nthread = 4;
1914         idiv4 = 1;
1915         m >>= 1;
1916     }
1917     for (i = 0; i < nthread; i++) {
1918         ag[i].n0 = n;
1919         ag[i].n = m;
1920         ag[i].a = &a[i * m];
1921         if (i != idiv4) {
1922             cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
1923         } else {
1924             cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
1925         }
1926     }
1927     for (i = 0; i < nthread; i++) {
1928         cdft_thread_wait(th[i]);
1929     }
1930 }
1931 
1932 
1933 void *cftrec1_th(void *p)
1934 {
1935     int cfttree(int n, int j, int k, double *a);
1936     void cftleaf(int n, int isplt, double *a);
1937     void cftmdl1(int n, double *a);
1938     int isplt, j, k, m, n, n0;
1939     double *a;
1940 
1941     n0 = ((cdft_arg_t *) p)->n0;
1942     n = ((cdft_arg_t *) p)->n;
1943     a = ((cdft_arg_t *) p)->a;
1944     m = n0;
1945     while (m > 512) {
1946         m >>= 2;
1947         cftmdl1(m, &a[n - m]);
1948     }
1949     cftleaf(m, 1, &a[n - m]);
1950     k = 0;
1951     for (j = n - m; j > 0; j -= m) {
1952         k++;
1953         isplt = cfttree(m, j, k, a);
1954         cftleaf(m, isplt, &a[j - m]);
1955     }
1956     return (void *) 0;
1957 }
1958 
1959 
1960 void *cftrec2_th(void *p)
1961 {
1962     int cfttree(int n, int j, int k, double *a);
1963     void cftleaf(int n, int isplt, double *a);
1964     void cftmdl2(int n, double *a);
1965     int isplt, j, k, m, n, n0;
1966     double *a;
1967 
1968     n0 = ((cdft_arg_t *) p)->n0;
1969     n = ((cdft_arg_t *) p)->n;
1970     a = ((cdft_arg_t *) p)->a;
1971     k = 1;
1972     m = n0;
1973     while (m > 512) {
1974         m >>= 2;
1975         k <<= 2;
1976         cftmdl2(m, &a[n - m]);
1977     }
1978     cftleaf(m, 0, &a[n - m]);
1979     k >>= 1;
1980     for (j = n - m; j > 0; j -= m) {
1981         k++;
1982         isplt = cfttree(m, j, k, a);
1983         cftleaf(m, isplt, &a[j - m]);
1984     }
1985     return (void *) 0;
1986 }
1987 #endif /* USE_CDFT_THREADS */
1988 
1989 
1990 void cftrec4(int n, double *a)
1991 {
1992     int isplt, j, k, m;
1993 
1994     m = n;
1995     while (m > 512) {
1996         m >>= 2;
1997         cftmdl1(m, &a[n - m]);
1998     }
1999     cftleaf(m, 1, &a[n - m]);
2000     k = 0;
2001     for (j = n - m; j > 0; j -= m) {
2002         k++;
2003         isplt = cfttree(m, j, k, a);
2004         cftleaf(m, isplt, &a[j - m]);
2005     }
2006 }
2007 
2008 
2009 int cfttree(int n, int j, int k, double *a)
2010 {
2011     int i, isplt, m;
2012 
2013     if ((k & 3) != 0) {
2014         isplt = k & 1;
2015         if (isplt != 0) {
2016             cftmdl1(n, &a[j - n]);
2017         } else {
2018             cftmdl2(n, &a[j - n]);
2019         }
2020     } else {
2021         m = n;
2022         for (i = k; (i & 3) == 0; i >>= 2) {
2023             m <<= 2;
2024         }
2025         isplt = i & 1;
2026         if (isplt != 0) {
2027             while (m > 128) {
2028                 cftmdl1(m, &a[j - m]);
2029                 m >>= 2;
2030             }
2031         } else {
2032             while (m > 128) {
2033                 cftmdl2(m, &a[j - m]);
2034                 m >>= 2;
2035             }
2036         }
2037     }
2038     return isplt;
2039 }
2040 
2041 
2042 void cftleaf(int n, int isplt, double *a)
2043 {
2044     if (n == 512) {
2045         cftmdl1(128, a);
2046         cftf161(a);
2047         cftf162(&a[32]);
2048         cftf161(&a[64]);
2049         cftf161(&a[96]);
2050         cftmdl2(128, &a[128]);
2051         cftf161(&a[128]);
2052         cftf162(&a[160]);
2053         cftf161(&a[192]);
2054         cftf162(&a[224]);
2055         cftmdl1(128, &a[256]);
2056         cftf161(&a[256]);
2057         cftf162(&a[288]);
2058         cftf161(&a[320]);
2059         cftf161(&a[352]);
2060         if (isplt != 0) {
2061             cftmdl1(128, &a[384]);
2062             cftf161(&a[480]);
2063         } else {
2064             cftmdl2(128, &a[384]);
2065             cftf162(&a[480]);
2066         }
2067         cftf161(&a[384]);
2068         cftf162(&a[416]);
2069         cftf161(&a[448]);
2070     } else {
2071         cftmdl1(64, a);
2072         cftf081(a);
2073         cftf082(&a[16]);
2074         cftf081(&a[32]);
2075         cftf081(&a[48]);
2076         cftmdl2(64, &a[64]);
2077         cftf081(&a[64]);
2078         cftf082(&a[80]);
2079         cftf081(&a[96]);
2080         cftf082(&a[112]);
2081         cftmdl1(64, &a[128]);
2082         cftf081(&a[128]);
2083         cftf082(&a[144]);
2084         cftf081(&a[160]);
2085         cftf081(&a[176]);
2086         if (isplt != 0) {
2087             cftmdl1(64, &a[192]);
2088             cftf081(&a[240]);
2089         } else {
2090             cftmdl2(64, &a[192]);
2091             cftf082(&a[240]);
2092         }
2093         cftf081(&a[192]);
2094         cftf082(&a[208]);
2095         cftf081(&a[224]);
2096     }
2097 }
2098 
2099 
2100 void cftmdl1(int n, double *a)
2101 {
2102     int i, i0, j, _j0, _j1, j2, j3, m, mh;
2103     double ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, 
2104         wd1r, wd1i, wd3r, wd3i, ss1, ss3;
2105     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2106 
2107     mh = n >> 3;
2108     m = 2 * mh;
2109     _j1 = m;
2110     j2 = _j1 + m;
2111     j3 = j2 + m;
2112     x0r = a[0] + a[j2];
2113     x0i = a[1] + a[j2 + 1];
2114     x1r = a[0] - a[j2];
2115     x1i = a[1] - a[j2 + 1];
2116     x2r = a[_j1] + a[j3];
2117     x2i = a[_j1 + 1] + a[j3 + 1];
2118     x3r = a[_j1] - a[j3];
2119     x3i = a[_j1 + 1] - a[j3 + 1];
2120     a[0] = x0r + x2r;
2121     a[1] = x0i + x2i;
2122     a[_j1] = x0r - x2r;
2123     a[_j1 + 1] = x0i - x2i;
2124     a[j2] = x1r - x3i;
2125     a[j2 + 1] = x1i + x3r;
2126     a[j3] = x1r + x3i;
2127     a[j3 + 1] = x1i - x3r;
2128     wd1r = 1;
2129     wd1i = 0;
2130     wd3r = 1;
2131     wd3i = 0;
2132     ew = M_PI_2 / m;
2133     w1r = cos(2 * ew);
2134     w1i = sin(2 * ew);
2135     wk1r = w1r;
2136     wk1i = w1i;
2137     ss1 = 2 * w1i;
2138     wk3i = 2 * ss1 * wk1r;
2139     wk3r = wk1r - wk3i * wk1i;
2140     wk3i = wk1i - wk3i * wk1r;
2141     ss3 = 2 * wk3i;
2142     i = 0;
2143     for (;;) {
2144         i0 = i + 4 * CDFT_LOOP_DIV;
2145         if (i0 > mh - 4) {
2146             i0 = mh - 4;
2147         }
2148         for (j = i + 2; j < i0; j += 4) {
2149             wd1r -= ss1 * wk1i;
2150             wd1i += ss1 * wk1r;
2151             wd3r -= ss3 * wk3i;
2152             wd3i += ss3 * wk3r;
2153             _j1 = j + m;
2154             j2 = _j1 + m;
2155             j3 = j2 + m;
2156             x0r = a[j] + a[j2];
2157             x0i = a[j + 1] + a[j2 + 1];
2158             x1r = a[j] - a[j2];
2159             x1i = a[j + 1] - a[j2 + 1];
2160             x2r = a[_j1] + a[j3];
2161             x2i = a[_j1 + 1] + a[j3 + 1];
2162             x3r = a[_j1] - a[j3];
2163             x3i = a[_j1 + 1] - a[j3 + 1];
2164             a[j] = x0r + x2r;
2165             a[j + 1] = x0i + x2i;
2166             a[_j1] = x0r - x2r;
2167             a[_j1 + 1] = x0i - x2i;
2168             x0r = x1r - x3i;
2169             x0i = x1i + x3r;
2170             a[j2] = wk1r * x0r - wk1i * x0i;
2171             a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2172             x0r = x1r + x3i;
2173             x0i = x1i - x3r;
2174             a[j3] = wk3r * x0r + wk3i * x0i;
2175             a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2176             x0r = a[j + 2] + a[j2 + 2];
2177             x0i = a[j + 3] + a[j2 + 3];
2178             x1r = a[j + 2] - a[j2 + 2];
2179             x1i = a[j + 3] - a[j2 + 3];
2180             x2r = a[_j1 + 2] + a[j3 + 2];
2181             x2i = a[_j1 + 3] + a[j3 + 3];
2182             x3r = a[_j1 + 2] - a[j3 + 2];
2183             x3i = a[_j1 + 3] - a[j3 + 3];
2184             a[j + 2] = x0r + x2r;
2185             a[j + 3] = x0i + x2i;
2186             a[_j1 + 2] = x0r - x2r;
2187             a[_j1 + 3] = x0i - x2i;
2188             x0r = x1r - x3i;
2189             x0i = x1i + x3r;
2190             a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2191             a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2192             x0r = x1r + x3i;
2193             x0i = x1i - x3r;
2194             a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2195             a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2196             _j0 = m - j;
2197             _j1 = _j0 + m;
2198             j2 = _j1 + m;
2199             j3 = j2 + m;
2200             x0r = a[_j0] + a[j2];
2201             x0i = a[_j0 + 1] + a[j2 + 1];
2202             x1r = a[_j0] - a[j2];
2203             x1i = a[_j0 + 1] - a[j2 + 1];
2204             x2r = a[_j1] + a[j3];
2205             x2i = a[_j1 + 1] + a[j3 + 1];
2206             x3r = a[_j1] - a[j3];
2207             x3i = a[_j1 + 1] - a[j3 + 1];
2208             a[_j0] = x0r + x2r;
2209             a[_j0 + 1] = x0i + x2i;
2210             a[_j1] = x0r - x2r;
2211             a[_j1 + 1] = x0i - x2i;
2212             x0r = x1r - x3i;
2213             x0i = x1i + x3r;
2214             a[j2] = wk1i * x0r - wk1r * x0i;
2215             a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2216             x0r = x1r + x3i;
2217             x0i = x1i - x3r;
2218             a[j3] = wk3i * x0r + wk3r * x0i;
2219             a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2220             x0r = a[_j0 - 2] + a[j2 - 2];
2221             x0i = a[_j0 - 1] + a[j2 - 1];
2222             x1r = a[_j0 - 2] - a[j2 - 2];
2223             x1i = a[_j0 - 1] - a[j2 - 1];
2224             x2r = a[_j1 - 2] + a[j3 - 2];
2225             x2i = a[_j1 - 1] + a[j3 - 1];
2226             x3r = a[_j1 - 2] - a[j3 - 2];
2227             x3i = a[_j1 - 1] - a[j3 - 1];
2228             a[_j0 - 2] = x0r + x2r;
2229             a[_j0 - 1] = x0i + x2i;
2230             a[_j1 - 2] = x0r - x2r;
2231             a[_j1 - 1] = x0i - x2i;
2232             x0r = x1r - x3i;
2233             x0i = x1i + x3r;
2234             a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2235             a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2236             x0r = x1r + x3i;
2237             x0i = x1i - x3r;
2238             a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2239             a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2240             wk1r -= ss1 * wd1i;
2241             wk1i += ss1 * wd1r;
2242             wk3r -= ss3 * wd3i;
2243             wk3i += ss3 * wd3r;
2244         }
2245         if (i0 == mh - 4) {
2246             break;
2247         }
2248         wd1r = cos(ew * i0);
2249         wd1i = sin(ew * i0);
2250         wd3i = 4 * wd1i * wd1r;
2251         wd3r = wd1r - wd3i * wd1i;
2252         wd3i = wd1i - wd3i * wd1r;
2253         wk1r = w1r * wd1r - w1i * wd1i;
2254         wk1i = w1r * wd1i + w1i * wd1r;
2255         wk3i = 4 * wk1i * wk1r;
2256         wk3r = wk1r - wk3i * wk1i;
2257         wk3i = wk1i - wk3i * wk1r;
2258         i = i0;
2259     }
2260     wd1r = WR5000;
2261     _j0 = mh;
2262     _j1 = _j0 + m;
2263     j2 = _j1 + m;
2264     j3 = j2 + m;
2265     x0r = a[_j0 - 2] + a[j2 - 2];
2266     x0i = a[_j0 - 1] + a[j2 - 1];
2267     x1r = a[_j0 - 2] - a[j2 - 2];
2268     x1i = a[_j0 - 1] - a[j2 - 1];
2269     x2r = a[_j1 - 2] + a[j3 - 2];
2270     x2i = a[_j1 - 1] + a[j3 - 1];
2271     x3r = a[_j1 - 2] - a[j3 - 2];
2272     x3i = a[_j1 - 1] - a[j3 - 1];
2273     a[_j0 - 2] = x0r + x2r;
2274     a[_j0 - 1] = x0i + x2i;
2275     a[_j1 - 2] = x0r - x2r;
2276     a[_j1 - 1] = x0i - x2i;
2277     x0r = x1r - x3i;
2278     x0i = x1i + x3r;
2279     a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2280     a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2281     x0r = x1r + x3i;
2282     x0i = x1i - x3r;
2283     a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2284     a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2285     x0r = a[_j0] + a[j2];
2286     x0i = a[_j0 + 1] + a[j2 + 1];
2287     x1r = a[_j0] - a[j2];
2288     x1i = a[_j0 + 1] - a[j2 + 1];
2289     x2r = a[_j1] + a[j3];
2290     x2i = a[_j1 + 1] + a[j3 + 1];
2291     x3r = a[_j1] - a[j3];
2292     x3i = a[_j1 + 1] - a[j3 + 1];
2293     a[_j0] = x0r + x2r;
2294     a[_j0 + 1] = x0i + x2i;
2295     a[_j1] = x0r - x2r;
2296     a[_j1 + 1] = x0i - x2i;
2297     x0r = x1r - x3i;
2298     x0i = x1i + x3r;
2299     a[j2] = wd1r * (x0r - x0i);
2300     a[j2 + 1] = wd1r * (x0i + x0r);
2301     x0r = x1r + x3i;
2302     x0i = x1i - x3r;
2303     a[j3] = -wd1r * (x0r + x0i);
2304     a[j3 + 1] = -wd1r * (x0i - x0r);
2305     x0r = a[_j0 + 2] + a[j2 + 2];
2306     x0i = a[_j0 + 3] + a[j2 + 3];
2307     x1r = a[_j0 + 2] - a[j2 + 2];
2308     x1i = a[_j0 + 3] - a[j2 + 3];
2309     x2r = a[_j1 + 2] + a[j3 + 2];
2310     x2i = a[_j1 + 3] + a[j3 + 3];
2311     x3r = a[_j1 + 2] - a[j3 + 2];
2312     x3i = a[_j1 + 3] - a[j3 + 3];
2313     a[_j0 + 2] = x0r + x2r;
2314     a[_j0 + 3] = x0i + x2i;
2315     a[_j1 + 2] = x0r - x2r;
2316     a[_j1 + 3] = x0i - x2i;
2317     x0r = x1r - x3i;
2318     x0i = x1i + x3r;
2319     a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2320     a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2321     x0r = x1r + x3i;
2322     x0i = x1i - x3r;
2323     a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2324     a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2325 }
2326 
2327 
2328 void cftmdl2(int n, double *a)
2329 {
2330     int i, i0, j, _j0, _j1, j2, j3, m, mh;
2331     double ew, w1r, w1i, wn4r, wk1r, wk1i, wk3r, wk3i, 
2332         wl1r, wl1i, wl3r, wl3i, wd1r, wd1i, wd3r, wd3i, 
2333         we1r, we1i, we3r, we3i, ss1, ss3;
2334     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2335 
2336     mh = n >> 3;
2337     m = 2 * mh;
2338     wn4r = WR5000;
2339     _j1 = m;
2340     j2 = _j1 + m;
2341     j3 = j2 + m;
2342     x0r = a[0] - a[j2 + 1];
2343     x0i = a[1] + a[j2];
2344     x1r = a[0] + a[j2 + 1];
2345     x1i = a[1] - a[j2];
2346     x2r = a[_j1] - a[j3 + 1];
2347     x2i = a[_j1 + 1] + a[j3];
2348     x3r = a[_j1] + a[j3 + 1];
2349     x3i = a[_j1 + 1] - a[j3];
2350     y0r = wn4r * (x2r - x2i);
2351     y0i = wn4r * (x2i + x2r);
2352     a[0] = x0r + y0r;
2353     a[1] = x0i + y0i;
2354     a[_j1] = x0r - y0r;
2355     a[_j1 + 1] = x0i - y0i;
2356     y0r = wn4r * (x3r - x3i);
2357     y0i = wn4r * (x3i + x3r);
2358     a[j2] = x1r - y0i;
2359     a[j2 + 1] = x1i + y0r;
2360     a[j3] = x1r + y0i;
2361     a[j3 + 1] = x1i - y0r;
2362     wl1r = 1;
2363     wl1i = 0;
2364     wl3r = 1;
2365     wl3i = 0;
2366     we1r = wn4r;
2367     we1i = wn4r;
2368     we3r = -wn4r;
2369     we3i = -wn4r;
2370     ew = M_PI_2 / (2 * m);
2371     w1r = cos(2 * ew);
2372     w1i = sin(2 * ew);
2373     wk1r = w1r;
2374     wk1i = w1i;
2375     wd1r = wn4r * (w1r - w1i);
2376     wd1i = wn4r * (w1i + w1r);
2377     ss1 = 2 * w1i;
2378     wk3i = 2 * ss1 * wk1r;
2379     wk3r = wk1r - wk3i * wk1i;
2380     wk3i = wk1i - wk3i * wk1r;
2381     ss3 = 2 * wk3i;
2382     wd3r = -wn4r * (wk3r - wk3i);
2383     wd3i = -wn4r * (wk3i + wk3r);
2384     i = 0;
2385     for (;;) {
2386         i0 = i + 4 * CDFT_LOOP_DIV;
2387         if (i0 > mh - 4) {
2388             i0 = mh - 4;
2389         }
2390         for (j = i + 2; j < i0; j += 4) {
2391             wl1r -= ss1 * wk1i;
2392             wl1i += ss1 * wk1r;
2393             wl3r -= ss3 * wk3i;
2394             wl3i += ss3 * wk3r;
2395             we1r -= ss1 * wd1i;
2396             we1i += ss1 * wd1r;
2397             we3r -= ss3 * wd3i;
2398             we3i += ss3 * wd3r;
2399             _j1 = j + m;
2400             j2 = _j1 + m;
2401             j3 = j2 + m;
2402             x0r = a[j] - a[j2 + 1];
2403             x0i = a[j + 1] + a[j2];
2404             x1r = a[j] + a[j2 + 1];
2405             x1i = a[j + 1] - a[j2];
2406             x2r = a[_j1] - a[j3 + 1];
2407             x2i = a[_j1 + 1] + a[j3];
2408             x3r = a[_j1] + a[j3 + 1];
2409             x3i = a[_j1 + 1] - a[j3];
2410             y0r = wk1r * x0r - wk1i * x0i;
2411             y0i = wk1r * x0i + wk1i * x0r;
2412             y2r = wd1r * x2r - wd1i * x2i;
2413             y2i = wd1r * x2i + wd1i * x2r;
2414             a[j] = y0r + y2r;
2415             a[j + 1] = y0i + y2i;
2416             a[_j1] = y0r - y2r;
2417             a[_j1 + 1] = y0i - y2i;
2418             y0r = wk3r * x1r + wk3i * x1i;
2419             y0i = wk3r * x1i - wk3i * x1r;
2420             y2r = wd3r * x3r + wd3i * x3i;
2421             y2i = wd3r * x3i - wd3i * x3r;
2422             a[j2] = y0r + y2r;
2423             a[j2 + 1] = y0i + y2i;
2424             a[j3] = y0r - y2r;
2425             a[j3 + 1] = y0i - y2i;
2426             x0r = a[j + 2] - a[j2 + 3];
2427             x0i = a[j + 3] + a[j2 + 2];
2428             x1r = a[j + 2] + a[j2 + 3];
2429             x1i = a[j + 3] - a[j2 + 2];
2430             x2r = a[_j1 + 2] - a[j3 + 3];
2431             x2i = a[_j1 + 3] + a[j3 + 2];
2432             x3r = a[_j1 + 2] + a[j3 + 3];
2433             x3i = a[_j1 + 3] - a[j3 + 2];
2434             y0r = wl1r * x0r - wl1i * x0i;
2435             y0i = wl1r * x0i + wl1i * x0r;
2436             y2r = we1r * x2r - we1i * x2i;
2437             y2i = we1r * x2i + we1i * x2r;
2438             a[j + 2] = y0r + y2r;
2439             a[j + 3] = y0i + y2i;
2440             a[_j1 + 2] = y0r - y2r;
2441             a[_j1 + 3] = y0i - y2i;
2442             y0r = wl3r * x1r + wl3i * x1i;
2443             y0i = wl3r * x1i - wl3i * x1r;
2444             y2r = we3r * x3r + we3i * x3i;
2445             y2i = we3r * x3i - we3i * x3r;
2446             a[j2 + 2] = y0r + y2r;
2447             a[j2 + 3] = y0i + y2i;
2448             a[j3 + 2] = y0r - y2r;
2449             a[j3 + 3] = y0i - y2i;
2450             _j0 = m - j;
2451             _j1 = _j0 + m;
2452             j2 = _j1 + m;
2453             j3 = j2 + m;
2454             x0r = a[_j0] - a[j2 + 1];
2455             x0i = a[_j0 + 1] + a[j2];
2456             x1r = a[_j0] + a[j2 + 1];
2457             x1i = a[_j0 + 1] - a[j2];
2458             x2r = a[_j1] - a[j3 + 1];
2459             x2i = a[_j1 + 1] + a[j3];
2460             x3r = a[_j1] + a[j3 + 1];
2461             x3i = a[_j1 + 1] - a[j3];
2462             y0r = wd1i * x0r - wd1r * x0i;
2463             y0i = wd1i * x0i + wd1r * x0r;
2464             y2r = wk1i * x2r - wk1r * x2i;
2465             y2i = wk1i * x2i + wk1r * x2r;
2466             a[_j0] = y0r + y2r;
2467             a[_j0 + 1] = y0i + y2i;
2468             a[_j1] = y0r - y2r;
2469             a[_j1 + 1] = y0i - y2i;
2470             y0r = wd3i * x1r + wd3r * x1i;
2471             y0i = wd3i * x1i - wd3r * x1r;
2472             y2r = wk3i * x3r + wk3r * x3i;
2473             y2i = wk3i * x3i - wk3r * x3r;
2474             a[j2] = y0r + y2r;
2475             a[j2 + 1] = y0i + y2i;
2476             a[j3] = y0r - y2r;
2477             a[j3 + 1] = y0i - y2i;
2478             x0r = a[_j0 - 2] - a[j2 - 1];
2479             x0i = a[_j0 - 1] + a[j2 - 2];
2480             x1r = a[_j0 - 2] + a[j2 - 1];
2481             x1i = a[_j0 - 1] - a[j2 - 2];
2482             x2r = a[_j1 - 2] - a[j3 - 1];
2483             x2i = a[_j1 - 1] + a[j3 - 2];
2484             x3r = a[_j1 - 2] + a[j3 - 1];
2485             x3i = a[_j1 - 1] - a[j3 - 2];
2486             y0r = we1i * x0r - we1r * x0i;
2487             y0i = we1i * x0i + we1r * x0r;
2488             y2r = wl1i * x2r - wl1r * x2i;
2489             y2i = wl1i * x2i + wl1r * x2r;
2490             a[_j0 - 2] = y0r + y2r;
2491             a[_j0 - 1] = y0i + y2i;
2492             a[_j1 - 2] = y0r - y2r;
2493             a[_j1 - 1] = y0i - y2i;
2494             y0r = we3i * x1r + we3r * x1i;
2495             y0i = we3i * x1i - we3r * x1r;
2496             y2r = wl3i * x3r + wl3r * x3i;
2497             y2i = wl3i * x3i - wl3r * x3r;
2498             a[j2 - 2] = y0r + y2r;
2499             a[j2 - 1] = y0i + y2i;
2500             a[j3 - 2] = y0r - y2r;
2501             a[j3 - 1] = y0i - y2i;
2502             wk1r -= ss1 * wl1i;
2503             wk1i += ss1 * wl1r;
2504             wk3r -= ss3 * wl3i;
2505             wk3i += ss3 * wl3r;
2506             wd1r -= ss1 * we1i;
2507             wd1i += ss1 * we1r;
2508             wd3r -= ss3 * we3i;
2509             wd3i += ss3 * we3r;
2510         }
2511         if (i0 == mh - 4) {
2512             break;
2513         }
2514         wl1r = cos(ew * i0);
2515         wl1i = sin(ew * i0);
2516         wl3i = 4 * wl1i * wl1r;
2517         wl3r = wl1r - wl3i * wl1i;
2518         wl3i = wl1i - wl3i * wl1r;
2519         we1r = wn4r * (wl1r - wl1i);
2520         we1i = wn4r * (wl1i + wl1r);
2521         we3r = -wn4r * (wl3r - wl3i);
2522         we3i = -wn4r * (wl3i + wl3r);
2523         wk1r = w1r * wl1r - w1i * wl1i;
2524         wk1i = w1r * wl1i + w1i * wl1r;
2525         wk3i = 4 * wk1i * wk1r;
2526         wk3r = wk1r - wk3i * wk1i;
2527         wk3i = wk1i - wk3i * wk1r;
2528         wd1r = wn4r * (wk1r - wk1i);
2529         wd1i = wn4r * (wk1i + wk1r);
2530         wd3r = -wn4r * (wk3r - wk3i);
2531         wd3i = -wn4r * (wk3i + wk3r);
2532         i = i0;
2533     }
2534     wl1r = WR2500;
2535     wl1i = WI2500;
2536     _j0 = mh;
2537     _j1 = _j0 + m;
2538     j2 = _j1 + m;
2539     j3 = j2 + m;
2540     x0r = a[_j0 - 2] - a[j2 - 1];
2541     x0i = a[_j0 - 1] + a[j2 - 2];
2542     x1r = a[_j0 - 2] + a[j2 - 1];
2543     x1i = a[_j0 - 1] - a[j2 - 2];
2544     x2r = a[_j1 - 2] - a[j3 - 1];
2545     x2i = a[_j1 - 1] + a[j3 - 2];
2546     x3r = a[_j1 - 2] + a[j3 - 1];
2547     x3i = a[_j1 - 1] - a[j3 - 2];
2548     y0r = wk1r * x0r - wk1i * x0i;
2549     y0i = wk1r * x0i + wk1i * x0r;
2550     y2r = wd1r * x2r - wd1i * x2i;
2551     y2i = wd1r * x2i + wd1i * x2r;
2552     a[_j0 - 2] = y0r + y2r;
2553     a[_j0 - 1] = y0i + y2i;
2554     a[_j1 - 2] = y0r - y2r;
2555     a[_j1 - 1] = y0i - y2i;
2556     y0r = wk3r * x1r + wk3i * x1i;
2557     y0i = wk3r * x1i - wk3i * x1r;
2558     y2r = wd3r * x3r + wd3i * x3i;
2559     y2i = wd3r * x3i - wd3i * x3r;
2560     a[j2 - 2] = y0r + y2r;
2561     a[j2 - 1] = y0i + y2i;
2562     a[j3 - 2] = y0r - y2r;
2563     a[j3 - 1] = y0i - y2i;
2564     x0r = a[_j0] - a[j2 + 1];
2565     x0i = a[_j0 + 1] + a[j2];
2566     x1r = a[_j0] + a[j2 + 1];
2567     x1i = a[_j0 + 1] - a[j2];
2568     x2r = a[_j1] - a[j3 + 1];
2569     x2i = a[_j1 + 1] + a[j3];
2570     x3r = a[_j1] + a[j3 + 1];
2571     x3i = a[_j1 + 1] - a[j3];
2572     y0r = wl1r * x0r - wl1i * x0i;
2573     y0i = wl1r * x0i + wl1i * x0r;
2574     y2r = wl1i * x2r - wl1r * x2i;
2575     y2i = wl1i * x2i + wl1r * x2r;
2576     a[_j0] = y0r + y2r;
2577     a[_j0 + 1] = y0i + y2i;
2578     a[_j1] = y0r - y2r;
2579     a[_j1 + 1] = y0i - y2i;
2580     y0r = wl1i * x1r - wl1r * x1i;
2581     y0i = wl1i * x1i + wl1r * x1r;
2582     y2r = wl1r * x3r - wl1i * x3i;
2583     y2i = wl1r * x3i + wl1i * x3r;
2584     a[j2] = y0r - y2r;
2585     a[j2 + 1] = y0i - y2i;
2586     a[j3] = y0r + y2r;
2587     a[j3 + 1] = y0i + y2i;
2588     x0r = a[_j0 + 2] - a[j2 + 3];
2589     x0i = a[_j0 + 3] + a[j2 + 2];
2590     x1r = a[_j0 + 2] + a[j2 + 3];
2591     x1i = a[_j0 + 3] - a[j2 + 2];
2592     x2r = a[_j1 + 2] - a[j3 + 3];
2593     x2i = a[_j1 + 3] + a[j3 + 2];
2594     x3r = a[_j1 + 2] + a[j3 + 3];
2595     x3i = a[_j1 + 3] - a[j3 + 2];
2596     y0r = wd1i * x0r - wd1r * x0i;
2597     y0i = wd1i * x0i + wd1r * x0r;
2598     y2r = wk1i * x2r - wk1r * x2i;
2599     y2i = wk1i * x2i + wk1r * x2r;
2600     a[_j0 + 2] = y0r + y2r;
2601     a[_j0 + 3] = y0i + y2i;
2602     a[_j1 + 2] = y0r - y2r;
2603     a[_j1 + 3] = y0i - y2i;
2604     y0r = wd3i * x1r + wd3r * x1i;
2605     y0i = wd3i * x1i - wd3r * x1r;
2606     y2r = wk3i * x3r + wk3r * x3i;
2607     y2i = wk3i * x3i - wk3r * x3r;
2608     a[j2 + 2] = y0r + y2r;
2609     a[j2 + 3] = y0i + y2i;
2610     a[j3 + 2] = y0r - y2r;
2611     a[j3 + 3] = y0i - y2i;
2612 }
2613 
2614 
2615 void cftfx41(int n, double *a)
2616 {
2617     if (n == 128) {
2618         cftf161(a);
2619         cftf162(&a[32]);
2620         cftf161(&a[64]);
2621         cftf161(&a[96]);
2622     } else {
2623         cftf081(a);
2624         cftf082(&a[16]);
2625         cftf081(&a[32]);
2626         cftf081(&a[48]);
2627     }
2628 }
2629 
2630 
2631 void cftf161(double *a)
2632 {
2633     double wn4r, wk1r, wk1i, 
2634         x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 
2635         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
2636         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, 
2637         y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, 
2638         y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2639 
2640     wn4r = WR5000;
2641     wk1r = WR2500;
2642     wk1i = WI2500;
2643     x0r = a[0] + a[16];
2644     x0i = a[1] + a[17];
2645     x1r = a[0] - a[16];
2646     x1i = a[1] - a[17];
2647     x2r = a[8] + a[24];
2648     x2i = a[9] + a[25];
2649     x3r = a[8] - a[24];
2650     x3i = a[9] - a[25];
2651     y0r = x0r + x2r;
2652     y0i = x0i + x2i;
2653     y4r = x0r - x2r;
2654     y4i = x0i - x2i;
2655     y8r = x1r - x3i;
2656     y8i = x1i + x3r;
2657     y12r = x1r + x3i;
2658     y12i = x1i - x3r;
2659     x0r = a[2] + a[18];
2660     x0i = a[3] + a[19];
2661     x1r = a[2] - a[18];
2662     x1i = a[3] - a[19];
2663     x2r = a[10] + a[26];
2664     x2i = a[11] + a[27];
2665     x3r = a[10] - a[26];
2666     x3i = a[11] - a[27];
2667     y1r = x0r + x2r;
2668     y1i = x0i + x2i;
2669     y5r = x0r - x2r;
2670     y5i = x0i - x2i;
2671     x0r = x1r - x3i;
2672     x0i = x1i + x3r;
2673     y9r = wk1r * x0r - wk1i * x0i;
2674     y9i = wk1r * x0i + wk1i * x0r;
2675     x0r = x1r + x3i;
2676     x0i = x1i - x3r;
2677     y13r = wk1i * x0r - wk1r * x0i;
2678     y13i = wk1i * x0i + wk1r * x0r;
2679     x0r = a[4] + a[20];
2680     x0i = a[5] + a[21];
2681     x1r = a[4] - a[20];
2682     x1i = a[5] - a[21];
2683     x2r = a[12] + a[28];
2684     x2i = a[13] + a[29];
2685     x3r = a[12] - a[28];
2686     x3i = a[13] - a[29];
2687     y2r = x0r + x2r;
2688     y2i = x0i + x2i;
2689     y6r = x0r - x2r;
2690     y6i = x0i - x2i;
2691     x0r = x1r - x3i;
2692     x0i = x1i + x3r;
2693     y10r = wn4r * (x0r - x0i);
2694     y10i = wn4r * (x0i + x0r);
2695     x0r = x1r + x3i;
2696     x0i = x1i - x3r;
2697     y14r = wn4r * (x0r + x0i);
2698     y14i = wn4r * (x0i - x0r);
2699     x0r = a[6] + a[22];
2700     x0i = a[7] + a[23];
2701     x1r = a[6] - a[22];
2702     x1i = a[7] - a[23];
2703     x2r = a[14] + a[30];
2704     x2i = a[15] + a[31];
2705     x3r = a[14] - a[30];
2706     x3i = a[15] - a[31];
2707     y3r = x0r + x2r;
2708     y3i = x0i + x2i;
2709     y7r = x0r - x2r;
2710     y7i = x0i - x2i;
2711     x0r = x1r - x3i;
2712     x0i = x1i + x3r;
2713     y11r = wk1i * x0r - wk1r * x0i;
2714     y11i = wk1i * x0i + wk1r * x0r;
2715     x0r = x1r + x3i;
2716     x0i = x1i - x3r;
2717     y15r = wk1r * x0r - wk1i * x0i;
2718     y15i = wk1r * x0i + wk1i * x0r;
2719     x0r = y12r - y14r;
2720     x0i = y12i - y14i;
2721     x1r = y12r + y14r;
2722     x1i = y12i + y14i;
2723     x2r = y13r - y15r;
2724     x2i = y13i - y15i;
2725     x3r = y13r + y15r;
2726     x3i = y13i + y15i;
2727     a[24] = x0r + x2r;
2728     a[25] = x0i + x2i;
2729     a[26] = x0r - x2r;
2730     a[27] = x0i - x2i;
2731     a[28] = x1r - x3i;
2732     a[29] = x1i + x3r;
2733     a[30] = x1r + x3i;
2734     a[31] = x1i - x3r;
2735     x0r = y8r + y10r;
2736     x0i = y8i + y10i;
2737     x1r = y8r - y10r;
2738     x1i = y8i - y10i;
2739     x2r = y9r + y11r;
2740     x2i = y9i + y11i;
2741     x3r = y9r - y11r;
2742     x3i = y9i - y11i;
2743     a[16] = x0r + x2r;
2744     a[17] = x0i + x2i;
2745     a[18] = x0r - x2r;
2746     a[19] = x0i - x2i;
2747     a[20] = x1r - x3i;
2748     a[21] = x1i + x3r;
2749     a[22] = x1r + x3i;
2750     a[23] = x1i - x3r;
2751     x0r = y5r - y7i;
2752     x0i = y5i + y7r;
2753     x2r = wn4r * (x0r - x0i);
2754     x2i = wn4r * (x0i + x0r);
2755     x0r = y5r + y7i;
2756     x0i = y5i - y7r;
2757     x3r = wn4r * (x0r - x0i);
2758     x3i = wn4r * (x0i + x0r);
2759     x0r = y4r - y6i;
2760     x0i = y4i + y6r;
2761     x1r = y4r + y6i;
2762     x1i = y4i - y6r;
2763     a[8] = x0r + x2r;
2764     a[9] = x0i + x2i;
2765     a[10] = x0r - x2r;
2766     a[11] = x0i - x2i;
2767     a[12] = x1r - x3i;
2768     a[13] = x1i + x3r;
2769     a[14] = x1r + x3i;
2770     a[15] = x1i - x3r;
2771     x0r = y0r + y2r;
2772     x0i = y0i + y2i;
2773     x1r = y0r - y2r;
2774     x1i = y0i - y2i;
2775     x2r = y1r + y3r;
2776     x2i = y1i + y3i;
2777     x3r = y1r - y3r;
2778     x3i = y1i - y3i;
2779     a[0] = x0r + x2r;
2780     a[1] = x0i + x2i;
2781     a[2] = x0r - x2r;
2782     a[3] = x0i - x2i;
2783     a[4] = x1r - x3i;
2784     a[5] = x1i + x3r;
2785     a[6] = x1r + x3i;
2786     a[7] = x1i - x3r;
2787 }
2788 
2789 
2790 void cftf162(double *a)
2791 {
2792     double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, 
2793         x0r, x0i, x1r, x1i, x2r, x2i, 
2794         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
2795         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, 
2796         y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, 
2797         y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2798 
2799     wn4r = WR5000;
2800     wk1r = WR1250;
2801     wk1i = WI1250;
2802     wk2r = WR2500;
2803     wk2i = WI2500;
2804     wk3r = WR3750;
2805     wk3i = WI3750;
2806     x1r = a[0] - a[17];
2807     x1i = a[1] + a[16];
2808     x0r = a[8] - a[25];
2809     x0i = a[9] + a[24];
2810     x2r = wn4r * (x0r - x0i);
2811     x2i = wn4r * (x0i + x0r);
2812     y0r = x1r + x2r;
2813     y0i = x1i + x2i;
2814     y4r = x1r - x2r;
2815     y4i = x1i - x2i;
2816     x1r = a[0] + a[17];
2817     x1i = a[1] - a[16];
2818     x0r = a[8] + a[25];
2819     x0i = a[9] - a[24];
2820     x2r = wn4r * (x0r - x0i);
2821     x2i = wn4r * (x0i + x0r);
2822     y8r = x1r - x2i;
2823     y8i = x1i + x2r;
2824     y12r = x1r + x2i;
2825     y12i = x1i - x2r;
2826     x0r = a[2] - a[19];
2827     x0i = a[3] + a[18];
2828     x1r = wk1r * x0r - wk1i * x0i;
2829     x1i = wk1r * x0i + wk1i * x0r;
2830     x0r = a[10] - a[27];
2831     x0i = a[11] + a[26];
2832     x2r = wk3i * x0r - wk3r * x0i;
2833     x2i = wk3i * x0i + wk3r * x0r;
2834     y1r = x1r + x2r;
2835     y1i = x1i + x2i;
2836     y5r = x1r - x2r;
2837     y5i = x1i - x2i;
2838     x0r = a[2] + a[19];
2839     x0i = a[3] - a[18];
2840     x1r = wk3r * x0r - wk3i * x0i;
2841     x1i = wk3r * x0i + wk3i * x0r;
2842     x0r = a[10] + a[27];
2843     x0i = a[11] - a[26];
2844     x2r = wk1r * x0r + wk1i * x0i;
2845     x2i = wk1r * x0i - wk1i * x0r;
2846     y9r = x1r - x2r;
2847     y9i = x1i - x2i;
2848     y13r = x1r + x2r;
2849     y13i = x1i + x2i;
2850     x0r = a[4] - a[21];
2851     x0i = a[5] + a[20];
2852     x1r = wk2r * x0r - wk2i * x0i;
2853     x1i = wk2r * x0i + wk2i * x0r;
2854     x0r = a[12] - a[29];
2855     x0i = a[13] + a[28];
2856     x2r = wk2i * x0r - wk2r * x0i;
2857     x2i = wk2i * x0i + wk2r * x0r;
2858     y2r = x1r + x2r;
2859     y2i = x1i + x2i;
2860     y6r = x1r - x2r;
2861     y6i = x1i - x2i;
2862     x0r = a[4] + a[21];
2863     x0i = a[5] - a[20];
2864     x1r = wk2i * x0r - wk2r * x0i;
2865     x1i = wk2i * x0i + wk2r * x0r;
2866     x0r = a[12] + a[29];
2867     x0i = a[13] - a[28];
2868     x2r = wk2r * x0r - wk2i * x0i;
2869     x2i = wk2r * x0i + wk2i * x0r;
2870     y10r = x1r - x2r;
2871     y10i = x1i - x2i;
2872     y14r = x1r + x2r;
2873     y14i = x1i + x2i;
2874     x0r = a[6] - a[23];
2875     x0i = a[7] + a[22];
2876     x1r = wk3r * x0r - wk3i * x0i;
2877     x1i = wk3r * x0i + wk3i * x0r;
2878     x0r = a[14] - a[31];
2879     x0i = a[15] + a[30];
2880     x2r = wk1i * x0r - wk1r * x0i;
2881     x2i = wk1i * x0i + wk1r * x0r;
2882     y3r = x1r + x2r;
2883     y3i = x1i + x2i;
2884     y7r = x1r - x2r;
2885     y7i = x1i - x2i;
2886     x0r = a[6] + a[23];
2887     x0i = a[7] - a[22];
2888     x1r = wk1i * x0r + wk1r * x0i;
2889     x1i = wk1i * x0i - wk1r * x0r;
2890     x0r = a[14] + a[31];
2891     x0i = a[15] - a[30];
2892     x2r = wk3i * x0r - wk3r * x0i;
2893     x2i = wk3i * x0i + wk3r * x0r;
2894     y11r = x1r + x2r;
2895     y11i = x1i + x2i;
2896     y15r = x1r - x2r;
2897     y15i = x1i - x2i;
2898     x1r = y0r + y2r;
2899     x1i = y0i + y2i;
2900     x2r = y1r + y3r;
2901     x2i = y1i + y3i;
2902     a[0] = x1r + x2r;
2903     a[1] = x1i + x2i;
2904     a[2] = x1r - x2r;
2905     a[3] = x1i - x2i;
2906     x1r = y0r - y2r;
2907     x1i = y0i - y2i;
2908     x2r = y1r - y3r;
2909     x2i = y1i - y3i;
2910     a[4] = x1r - x2i;
2911     a[5] = x1i + x2r;
2912     a[6] = x1r + x2i;
2913     a[7] = x1i - x2r;
2914     x1r = y4r - y6i;
2915     x1i = y4i + y6r;
2916     x0r = y5r - y7i;
2917     x0i = y5i + y7r;
2918     x2r = wn4r * (x0r - x0i);
2919     x2i = wn4r * (x0i + x0r);
2920     a[8] = x1r + x2r;
2921     a[9] = x1i + x2i;
2922     a[10] = x1r - x2r;
2923     a[11] = x1i - x2i;
2924     x1r = y4r + y6i;
2925     x1i = y4i - y6r;
2926     x0r = y5r + y7i;
2927     x0i = y5i - y7r;
2928     x2r = wn4r * (x0r - x0i);
2929     x2i = wn4r * (x0i + x0r);
2930     a[12] = x1r - x2i;
2931     a[13] = x1i + x2r;
2932     a[14] = x1r + x2i;
2933     a[15] = x1i - x2r;
2934     x1r = y8r + y10r;
2935     x1i = y8i + y10i;
2936     x2r = y9r - y11r;
2937     x2i = y9i - y11i;
2938     a[16] = x1r + x2r;
2939     a[17] = x1i + x2i;
2940     a[18] = x1r - x2r;
2941     a[19] = x1i - x2i;
2942     x1r = y8r - y10r;
2943     x1i = y8i - y10i;
2944     x2r = y9r + y11r;
2945     x2i = y9i + y11i;
2946     a[20] = x1r - x2i;
2947     a[21] = x1i + x2r;
2948     a[22] = x1r + x2i;
2949     a[23] = x1i - x2r;
2950     x1r = y12r - y14i;
2951     x1i = y12i + y14r;
2952     x0r = y13r + y15i;
2953     x0i = y13i - y15r;
2954     x2r = wn4r * (x0r - x0i);
2955     x2i = wn4r * (x0i + x0r);
2956     a[24] = x1r + x2r;
2957     a[25] = x1i + x2i;
2958     a[26] = x1r - x2r;
2959     a[27] = x1i - x2i;
2960     x1r = y12r + y14i;
2961     x1i = y12i - y14r;
2962     x0r = y13r - y15i;
2963     x0i = y13i + y15r;
2964     x2r = wn4r * (x0r - x0i);
2965     x2i = wn4r * (x0i + x0r);
2966     a[28] = x1r - x2i;
2967     a[29] = x1i + x2r;
2968     a[30] = x1r + x2i;
2969     a[31] = x1i - x2r;
2970 }
2971 
2972 
2973 void cftf081(double *a)
2974 {
2975     double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 
2976         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
2977         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
2978 
2979     wn4r = WR5000;
2980     x0r = a[0] + a[8];
2981     x0i = a[1] + a[9];
2982     x1r = a[0] - a[8];
2983     x1i = a[1] - a[9];
2984     x2r = a[4] + a[12];
2985     x2i = a[5] + a[13];
2986     x3r = a[4] - a[12];
2987     x3i = a[5] - a[13];
2988     y0r = x0r + x2r;
2989     y0i = x0i + x2i;
2990     y2r = x0r - x2r;
2991     y2i = x0i - x2i;
2992     y1r = x1r - x3i;
2993     y1i = x1i + x3r;
2994     y3r = x1r + x3i;
2995     y3i = x1i - x3r;
2996     x0r = a[2] + a[10];
2997     x0i = a[3] + a[11];
2998     x1r = a[2] - a[10];
2999     x1i = a[3] - a[11];
3000     x2r = a[6] + a[14];
3001     x2i = a[7] + a[15];
3002     x3r = a[6] - a[14];
3003     x3i = a[7] - a[15];
3004     y4r = x0r + x2r;
3005     y4i = x0i + x2i;
3006     y6r = x0r - x2r;
3007     y6i = x0i - x2i;
3008     x0r = x1r - x3i;
3009     x0i = x1i + x3r;
3010     x2r = x1r + x3i;
3011     x2i = x1i - x3r;
3012     y5r = wn4r * (x0r - x0i);
3013     y5i = wn4r * (x0r + x0i);
3014     y7r = wn4r * (x2r - x2i);
3015     y7i = wn4r * (x2r + x2i);
3016     a[8] = y1r + y5r;
3017     a[9] = y1i + y5i;
3018     a[10] = y1r - y5r;
3019     a[11] = y1i - y5i;
3020     a[12] = y3r - y7i;
3021     a[13] = y3i + y7r;
3022     a[14] = y3r + y7i;
3023     a[15] = y3i - y7r;
3024     a[0] = y0r + y4r;
3025     a[1] = y0i + y4i;
3026     a[2] = y0r - y4r;
3027     a[3] = y0i - y4i;
3028     a[4] = y2r - y6i;
3029     a[5] = y2i + y6r;
3030     a[6] = y2r + y6i;
3031     a[7] = y2i - y6r;
3032 }
3033 
3034 
3035 void cftf082(double *a)
3036 {
3037     double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, 
3038         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
3039         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3040 
3041     wn4r = WR5000;
3042     wk1r = WR2500;
3043     wk1i = WI2500;
3044     y0r = a[0] - a[9];
3045     y0i = a[1] + a[8];
3046     y1r = a[0] + a[9];
3047     y1i = a[1] - a[8];
3048     x0r = a[4] - a[13];
3049     x0i = a[5] + a[12];
3050     y2r = wn4r * (x0r - x0i);
3051     y2i = wn4r * (x0i + x0r);
3052     x0r = a[4] + a[13];
3053     x0i = a[5] - a[12];
3054     y3r = wn4r * (x0r - x0i);
3055     y3i = wn4r * (x0i + x0r);
3056     x0r = a[2] - a[11];
3057     x0i = a[3] + a[10];
3058     y4r = wk1r * x0r - wk1i * x0i;
3059     y4i = wk1r * x0i + wk1i * x0r;
3060     x0r = a[2] + a[11];
3061     x0i = a[3] - a[10];
3062     y5r = wk1i * x0r - wk1r * x0i;
3063     y5i = wk1i * x0i + wk1r * x0r;
3064     x0r = a[6] - a[15];
3065     x0i = a[7] + a[14];
3066     y6r = wk1i * x0r - wk1r * x0i;
3067     y6i = wk1i * x0i + wk1r * x0r;
3068     x0r = a[6] + a[15];
3069     x0i = a[7] - a[14];
3070     y7r = wk1r * x0r - wk1i * x0i;
3071     y7i = wk1r * x0i + wk1i * x0r;
3072     x0r = y0r + y2r;
3073     x0i = y0i + y2i;
3074     x1r = y4r + y6r;
3075     x1i = y4i + y6i;
3076     a[0] = x0r + x1r;
3077     a[1] = x0i + x1i;
3078     a[2] = x0r - x1r;
3079     a[3] = x0i - x1i;
3080     x0r = y0r - y2r;
3081     x0i = y0i - y2i;
3082     x1r = y4r - y6r;
3083     x1i = y4i - y6i;
3084     a[4] = x0r - x1i;
3085     a[5] = x0i + x1r;
3086     a[6] = x0r + x1i;
3087     a[7] = x0i - x1r;
3088     x0r = y1r - y3i;
3089     x0i = y1i + y3r;
3090     x1r = y5r - y7r;
3091     x1i = y5i - y7i;
3092     a[8] = x0r + x1r;
3093     a[9] = x0i + x1i;
3094     a[10] = x0r - x1r;
3095     a[11] = x0i - x1i;
3096     x0r = y1r + y3i;
3097     x0i = y1i - y3r;
3098     x1r = y5r + y7r;
3099     x1i = y5i + y7i;
3100     a[12] = x0r - x1i;
3101     a[13] = x0i + x1r;
3102     a[14] = x0r + x1i;
3103     a[15] = x0i - x1r;
3104 }
3105 
3106 
3107 void cftf040(double *a)
3108 {
3109     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3110 
3111     x0r = a[0] + a[4];
3112     x0i = a[1] + a[5];
3113     x1r = a[0] - a[4];
3114     x1i = a[1] - a[5];
3115     x2r = a[2] + a[6];
3116     x2i = a[3] + a[7];
3117     x3r = a[2] - a[6];
3118     x3i = a[3] - a[7];
3119     a[0] = x0r + x2r;
3120     a[1] = x0i + x2i;
3121     a[2] = x1r - x3i;
3122     a[3] = x1i + x3r;
3123     a[4] = x0r - x2r;
3124     a[5] = x0i - x2i;
3125     a[6] = x1r + x3i;
3126     a[7] = x1i - x3r;
3127 }
3128 
3129 
3130 void cftb040(double *a)
3131 {
3132     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3133 
3134     x0r = a[0] + a[4];
3135     x0i = a[1] + a[5];
3136     x1r = a[0] - a[4];
3137     x1i = a[1] - a[5];
3138     x2r = a[2] + a[6];
3139     x2i = a[3] + a[7];
3140     x3r = a[2] - a[6];
3141     x3i = a[3] - a[7];
3142     a[0] = x0r + x2r;
3143     a[1] = x0i + x2i;
3144     a[2] = x1r + x3i;
3145     a[3] = x1i - x3r;
3146     a[4] = x0r - x2r;
3147     a[5] = x0i - x2i;
3148     a[6] = x1r - x3i;
3149     a[7] = x1i + x3r;
3150 }
3151 
3152 
3153 void cftx020(double *a)
3154 {
3155     double x0r, x0i;
3156 
3157     x0r = a[0] - a[2];
3158     x0i = a[1] - a[3];
3159     a[0] += a[2];
3160     a[1] += a[3];
3161     a[2] = x0r;
3162     a[3] = x0i;
3163 }
3164 
3165 
3166 void rftfsub(int n, double *a)
3167 {
3168     int i, i0, j, k;
3169     double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3170 
3171     ec = 2 * M_PI_2 / n;
3172     wkr = 0;
3173     wki = 0;
3174     wdi = cos(ec);
3175     wdr = sin(ec);
3176     wdi *= wdr;
3177     wdr *= wdr;
3178     w1r = 1 - 2 * wdr;
3179     w1i = 2 * wdi;
3180     ss = 2 * w1i;
3181     i = n >> 1;
3182     for (;;) {
3183         i0 = i - 4 * RDFT_LOOP_DIV;
3184         if (i0 < 4) {
3185             i0 = 4;
3186         }
3187         for (j = i - 4; j >= i0; j -= 4) {
3188             k = n - j;
3189             xr = a[j + 2] - a[k - 2];
3190             xi = a[j + 3] + a[k - 1];
3191             yr = wdr * xr - wdi * xi;
3192             yi = wdr * xi + wdi * xr;
3193             a[j + 2] -= yr;
3194             a[j + 3] -= yi;
3195             a[k - 2] += yr;
3196             a[k - 1] -= yi;
3197             wkr += ss * wdi;
3198             wki += ss * (0.5 - wdr);
3199             xr = a[j] - a[k];
3200             xi = a[j + 1] + a[k + 1];
3201             yr = wkr * xr - wki * xi;
3202             yi = wkr * xi + wki * xr;
3203             a[j] -= yr;
3204             a[j + 1] -= yi;
3205             a[k] += yr;
3206             a[k + 1] -= yi;
3207             wdr += ss * wki;
3208             wdi += ss * (0.5 - wkr);
3209         }
3210         if (i0 == 4) {
3211             break;
3212         }
3213         wkr = 0.5 * sin(ec * i0);
3214         wki = 0.5 * cos(ec * i0);
3215         wdr = 0.5 - (wkr * w1r - wki * w1i);
3216         wdi = wkr * w1i + wki * w1r;
3217         wkr = 0.5 - wkr;
3218         i = i0;
3219     }
3220     xr = a[2] - a[n - 2];
3221     xi = a[3] + a[n - 1];
3222     yr = wdr * xr - wdi * xi;
3223     yi = wdr * xi + wdi * xr;
3224     a[2] -= yr;
3225     a[3] -= yi;
3226     a[n - 2] += yr;
3227     a[n - 1] -= yi;
3228 }
3229 
3230 
3231 void rftbsub(int n, double *a)
3232 {
3233     int i, i0, j, k;
3234     double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3235 
3236     ec = 2 * M_PI_2 / n;
3237     wkr = 0;
3238     wki = 0;
3239     wdi = cos(ec);
3240     wdr = sin(ec);
3241     wdi *= wdr;
3242     wdr *= wdr;
3243     w1r = 1 - 2 * wdr;
3244     w1i = 2 * wdi;
3245     ss = 2 * w1i;
3246     i = n >> 1;
3247     for (;;) {
3248         i0 = i - 4 * RDFT_LOOP_DIV;
3249         if (i0 < 4) {
3250             i0 = 4;
3251         }
3252         for (j = i - 4; j >= i0; j -= 4) {
3253             k = n - j;
3254             xr = a[j + 2] - a[k - 2];
3255             xi = a[j + 3] + a[k - 1];
3256             yr = wdr * xr + wdi * xi;
3257             yi = wdr * xi - wdi * xr;
3258             a[j + 2] -= yr;
3259             a[j + 3] -= yi;
3260             a[k - 2] += yr;
3261             a[k - 1] -= yi;
3262             wkr += ss * wdi;
3263             wki += ss * (0.5 - wdr);
3264             xr = a[j] - a[k];
3265             xi = a[j + 1] + a[k + 1];
3266             yr = wkr * xr + wki * xi;
3267             yi = wkr * xi - wki * xr;
3268             a[j] -= yr;
3269             a[j + 1] -= yi;
3270             a[k] += yr;
3271             a[k + 1] -= yi;
3272             wdr += ss * wki;
3273             wdi += ss * (0.5 - wkr);
3274         }
3275         if (i0 == 4) {
3276             break;
3277         }
3278         wkr = 0.5 * sin(ec * i0);
3279         wki = 0.5 * cos(ec * i0);
3280         wdr = 0.5 - (wkr * w1r - wki * w1i);
3281         wdi = wkr * w1i + wki * w1r;
3282         wkr = 0.5 - wkr;
3283         i = i0;
3284     }
3285     xr = a[2] - a[n - 2];
3286     xi = a[3] + a[n - 1];
3287     yr = wdr * xr + wdi * xi;
3288     yi = wdr * xi - wdi * xr;
3289     a[2] -= yr;
3290     a[3] -= yi;
3291     a[n - 2] += yr;
3292     a[n - 1] -= yi;
3293 }
3294 
3295 
3296 void dctsub(int n, double *a)
3297 {
3298     int i, i0, j, k, m;
3299     double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3300 
3301     ec = M_PI_2 / n;
3302     wkr = 0.5;
3303     wki = 0.5;
3304     w1r = cos(ec);
3305     w1i = sin(ec);
3306     wdr = 0.5 * (w1r - w1i);
3307     wdi = 0.5 * (w1r + w1i);
3308     ss = 2 * w1i;
3309     m = n >> 1;
3310     i = 0;
3311     for (;;) {
3312         i0 = i + 2 * DCST_LOOP_DIV;
3313         if (i0 > m - 2) {
3314             i0 = m - 2;
3315         }
3316         for (j = i + 2; j <= i0; j += 2) {
3317             k = n - j;
3318             xr = wdi * a[j - 1] - wdr * a[k + 1];
3319             xi = wdr * a[j - 1] + wdi * a[k + 1];
3320             wkr -= ss * wdi;
3321             wki += ss * wdr;
3322             yr = wki * a[j] - wkr * a[k];
3323             yi = wkr * a[j] + wki * a[k];
3324             wdr -= ss * wki;
3325             wdi += ss * wkr;
3326             a[k + 1] = xr;
3327             a[k] = yr;
3328             a[j - 1] = xi;
3329             a[j] = yi;
3330         }
3331         if (i0 == m - 2) {
3332             break;
3333         }
3334         wdr = cos(ec * i0);
3335         wdi = sin(ec * i0);
3336         wkr = 0.5 * (wdr - wdi);
3337         wki = 0.5 * (wdr + wdi);
3338         wdr = wkr * w1r - wki * w1i;
3339         wdi = wkr * w1i + wki * w1r;
3340         i = i0;
3341     }
3342     xr = wdi * a[m - 1] - wdr * a[m + 1];
3343     a[m - 1] = wdr * a[m - 1] + wdi * a[m + 1];
3344     a[m + 1] = xr;
3345     a[m] *= WR5000;
3346 }
3347 
3348 
3349 void dstsub(int n, double *a)
3350 {
3351     int i, i0, j, k, m;
3352     double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3353 
3354     ec = M_PI_2 / n;
3355     wkr = 0.5;
3356     wki = 0.5;
3357     w1r = cos(ec);
3358     w1i = sin(ec);
3359     wdr = 0.5 * (w1r - w1i);
3360     wdi = 0.5 * (w1r + w1i);
3361     ss = 2 * w1i;
3362     m = n >> 1;
3363     i = 0;
3364     for (;;) {
3365         i0 = i + 2 * DCST_LOOP_DIV;
3366         if (i0 > m - 2) {
3367             i0 = m - 2;
3368         }
3369         for (j = i + 2; j <= i0; j += 2) {
3370             k = n - j;
3371             xr = wdi * a[k + 1] - wdr * a[j - 1];
3372             xi = wdr * a[k + 1] + wdi * a[j - 1];
3373             wkr -= ss * wdi;
3374             wki += ss * wdr;
3375             yr = wki * a[k] - wkr * a[j];
3376             yi = wkr * a[k] + wki * a[j];
3377             wdr -= ss * wki;
3378             wdi += ss * wkr;
3379             a[j - 1] = xr;
3380             a[j] = yr;
3381             a[k + 1] = xi;
3382             a[k] = yi;
3383         }
3384         if (i0 == m - 2) {
3385             break;
3386         }
3387         wdr = cos(ec * i0);
3388         wdi = sin(ec * i0);
3389         wkr = 0.5 * (wdr - wdi);
3390         wki = 0.5 * (wdr + wdi);
3391         wdr = wkr * w1r - wki * w1i;
3392         wdi = wkr * w1i + wki * w1r;
3393         i = i0;
3394     }
3395     xr = wdi * a[m + 1] - wdr * a[m - 1];
3396     a[m + 1] = wdr * a[m + 1] + wdi * a[m - 1];
3397     a[m - 1] = xr;
3398     a[m] *= WR5000;
3399 }
3400 
3401 
3402 void dctsub4(int n, double *a)
3403 {
3404     int m;
3405     double wki, wdr, wdi, xr;
3406 
3407     wki = WR5000;
3408     m = n >> 1;
3409     if (m == 2) {
3410         wdr = wki * WI2500;
3411         wdi = wki * WR2500;
3412         xr = wdi * a[1] - wdr * a[3];
3413         a[1] = wdr * a[1] + wdi * a[3];
3414         a[3] = xr;
3415     }
3416     a[m] *= wki;
3417 }
3418 
3419 
3420 void dstsub4(int n, double *a)
3421 {
3422     int m;
3423     double wki, wdr, wdi, xr;
3424 
3425     wki = WR5000;
3426     m = n >> 1;
3427     if (m == 2) {
3428         wdr = wki * WI2500;
3429         wdi = wki * WR2500;
3430         xr = wdi * a[3] - wdr * a[1];
3431         a[3] = wdr * a[3] + wdi * a[1];
3432         a[1] = xr;
3433     }
3434     a[m] *= wki;
3435 }
3436