File indexing completed on 2024-05-12 04:53:39
0001 /* 0002 SPDX-FileCopyrightText: 2003-2010 Mark Borgerding <Mark@Borgerding.net> 0003 SPDX-License-Identifier: BSD-3-Clause 0004 */ 0005 0006 #include "_kiss_fft_guts.h" 0007 /* The guts header contains all the multiplication and addition macros that are defined for 0008 fixed or floating point complex numbers. It also delares the kf_ internal functions. 0009 */ 0010 0011 static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m) 0012 { 0013 kiss_fft_cpx *Fout2; 0014 kiss_fft_cpx *tw1 = st->twiddles; 0015 kiss_fft_cpx t; 0016 Fout2 = Fout + m; 0017 do { 0018 C_FIXDIV(*Fout, 2); 0019 C_FIXDIV(*Fout2, 2); 0020 0021 C_MUL(t, *Fout2, *tw1); 0022 tw1 += fstride; 0023 C_SUB(*Fout2, *Fout, t); 0024 C_ADDTO(*Fout, t); 0025 ++Fout2; 0026 ++Fout; 0027 } while (--m); 0028 } 0029 0030 static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m) 0031 { 0032 kiss_fft_cpx *tw1, *tw2, *tw3; 0033 kiss_fft_cpx scratch[6]; 0034 size_t k = m; 0035 const size_t m2 = 2 * m; 0036 const size_t m3 = 3 * m; 0037 0038 tw3 = tw2 = tw1 = st->twiddles; 0039 0040 do { 0041 C_FIXDIV(*Fout, 4); 0042 C_FIXDIV(Fout[m], 4); 0043 C_FIXDIV(Fout[m2], 4); 0044 C_FIXDIV(Fout[m3], 4); 0045 0046 C_MUL(scratch[0], Fout[m], *tw1); 0047 C_MUL(scratch[1], Fout[m2], *tw2); 0048 C_MUL(scratch[2], Fout[m3], *tw3); 0049 0050 C_SUB(scratch[5], *Fout, scratch[1]); 0051 C_ADDTO(*Fout, scratch[1]); 0052 C_ADD(scratch[3], scratch[0], scratch[2]); 0053 C_SUB(scratch[4], scratch[0], scratch[2]); 0054 C_SUB(Fout[m2], *Fout, scratch[3]); 0055 tw1 += fstride; 0056 tw2 += fstride * 2; 0057 tw3 += fstride * 3; 0058 C_ADDTO(*Fout, scratch[3]); 0059 0060 if (st->inverse) { 0061 Fout[m].r = scratch[5].r - scratch[4].i; 0062 Fout[m].i = scratch[5].i + scratch[4].r; 0063 Fout[m3].r = scratch[5].r + scratch[4].i; 0064 Fout[m3].i = scratch[5].i - scratch[4].r; 0065 } else { 0066 Fout[m].r = scratch[5].r + scratch[4].i; 0067 Fout[m].i = scratch[5].i - scratch[4].r; 0068 Fout[m3].r = scratch[5].r - scratch[4].i; 0069 Fout[m3].i = scratch[5].i + scratch[4].r; 0070 } 0071 ++Fout; 0072 } while (--k); 0073 } 0074 0075 static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, size_t m) 0076 { 0077 size_t k = m; 0078 const size_t m2 = 2 * m; 0079 kiss_fft_cpx *tw1, *tw2; 0080 kiss_fft_cpx scratch[5]; 0081 kiss_fft_cpx epi3; 0082 epi3 = st->twiddles[fstride * m]; 0083 0084 tw1 = tw2 = st->twiddles; 0085 0086 do { 0087 C_FIXDIV(*Fout, 3); 0088 C_FIXDIV(Fout[m], 3); 0089 C_FIXDIV(Fout[m2], 3); 0090 0091 C_MUL(scratch[1], Fout[m], *tw1); 0092 C_MUL(scratch[2], Fout[m2], *tw2); 0093 0094 C_ADD(scratch[3], scratch[1], scratch[2]); 0095 C_SUB(scratch[0], scratch[1], scratch[2]); 0096 tw1 += fstride; 0097 tw2 += fstride * 2; 0098 0099 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 0100 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 0101 0102 C_MULBYSCALAR(scratch[0], epi3.i); 0103 0104 C_ADDTO(*Fout, scratch[3]); 0105 0106 Fout[m2].r = Fout[m].r + scratch[0].i; 0107 Fout[m2].i = Fout[m].i - scratch[0].r; 0108 0109 Fout[m].r -= scratch[0].i; 0110 Fout[m].i += scratch[0].r; 0111 0112 ++Fout; 0113 } while (--k); 0114 } 0115 0116 static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m) 0117 { 0118 kiss_fft_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4; 0119 int u; 0120 kiss_fft_cpx scratch[13]; 0121 kiss_fft_cpx *twiddles = st->twiddles; 0122 kiss_fft_cpx *tw; 0123 kiss_fft_cpx ya, yb; 0124 ya = twiddles[fstride * m]; 0125 yb = twiddles[fstride * 2 * m]; 0126 0127 Fout0 = Fout; 0128 Fout1 = Fout0 + m; 0129 Fout2 = Fout0 + 2 * m; 0130 Fout3 = Fout0 + 3 * m; 0131 Fout4 = Fout0 + 4 * m; 0132 0133 tw = st->twiddles; 0134 for (u = 0; u < m; ++u) { 0135 C_FIXDIV(*Fout0, 5); 0136 C_FIXDIV(*Fout1, 5); 0137 C_FIXDIV(*Fout2, 5); 0138 C_FIXDIV(*Fout3, 5); 0139 C_FIXDIV(*Fout4, 5); 0140 scratch[0] = *Fout0; 0141 0142 C_MUL(scratch[1], *Fout1, tw[u * fstride]); 0143 C_MUL(scratch[2], *Fout2, tw[2 * u * fstride]); 0144 C_MUL(scratch[3], *Fout3, tw[3 * u * fstride]); 0145 C_MUL(scratch[4], *Fout4, tw[4 * u * fstride]); 0146 0147 C_ADD(scratch[7], scratch[1], scratch[4]); 0148 C_SUB(scratch[10], scratch[1], scratch[4]); 0149 C_ADD(scratch[8], scratch[2], scratch[3]); 0150 C_SUB(scratch[9], scratch[2], scratch[3]); 0151 0152 Fout0->r += scratch[7].r + scratch[8].r; 0153 Fout0->i += scratch[7].i + scratch[8].i; 0154 0155 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r); 0156 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r); 0157 0158 scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i); 0159 scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i); 0160 0161 C_SUB(*Fout1, scratch[5], scratch[6]); 0162 C_ADD(*Fout4, scratch[5], scratch[6]); 0163 0164 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r); 0165 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r); 0166 scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i); 0167 scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i); 0168 0169 C_ADD(*Fout2, scratch[11], scratch[12]); 0170 C_SUB(*Fout3, scratch[11], scratch[12]); 0171 0172 ++Fout0; 0173 ++Fout1; 0174 ++Fout2; 0175 ++Fout3; 0176 ++Fout4; 0177 } 0178 } 0179 0180 /* perform the butterfly for one stage of a mixed radix FFT */ 0181 static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p) 0182 { 0183 int u, q1, q; 0184 kiss_fft_cpx *twiddles = st->twiddles; 0185 kiss_fft_cpx t; 0186 int Norig = st->nfft; 0187 0188 kiss_fft_cpx scratch[p]; 0189 0190 for (u = 0; u < m; ++u) { 0191 int k = u; 0192 for (q1 = 0; q1 < p; ++q1) { 0193 scratch[q1] = Fout[k]; 0194 C_FIXDIV(scratch[q1], p); 0195 k += m; 0196 } 0197 0198 k = u; 0199 for (q1 = 0; q1 < p; ++q1) { 0200 int twidx = 0; 0201 Fout[k] = scratch[0]; 0202 for (q = 1; q < p; ++q) { 0203 twidx += fstride * k; 0204 if (twidx >= Norig) twidx -= Norig; 0205 C_MUL(t, scratch[q], twiddles[twidx]); 0206 C_ADDTO(Fout[k], t); 0207 } 0208 k += m; 0209 } 0210 } 0211 } 0212 0213 static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f, const size_t fstride, int in_stride, int *factors, const kiss_fft_cfg st) 0214 { 0215 kiss_fft_cpx *Fout_beg = Fout; 0216 const int p = *factors++; /* the radix */ 0217 const int m = *factors++; /* stage's fft length/p */ 0218 const kiss_fft_cpx *Fout_end = Fout + p * m; 0219 0220 #ifdef _OPENMP 0221 // use openmp extensions at the 0222 // top-level (not recursive) 0223 if (fstride == 1 && p <= 5) { 0224 int k; 0225 0226 // execute the p different work units in different threads 0227 #pragma omp parallel for 0228 for (k = 0; k < p; ++k) 0229 kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride, factors, st); 0230 // all threads have joined by this point 0231 0232 switch (p) { 0233 case 2: 0234 kf_bfly2(Fout, fstride, st, m); 0235 break; 0236 case 3: 0237 kf_bfly3(Fout, fstride, st, m); 0238 break; 0239 case 4: 0240 kf_bfly4(Fout, fstride, st, m); 0241 break; 0242 case 5: 0243 kf_bfly5(Fout, fstride, st, m); 0244 break; 0245 default: 0246 kf_bfly_generic(Fout, fstride, st, m, p); 0247 break; 0248 } 0249 return; 0250 } 0251 #endif 0252 0253 if (m == 1) { 0254 do { 0255 *Fout = *f; 0256 f += fstride * in_stride; 0257 } while (++Fout != Fout_end); 0258 } else { 0259 do { 0260 // recursive call: 0261 // DFT of size m*p performed by doing 0262 // p instances of smaller DFTs of size m, 0263 // each one takes a decimated version of the input 0264 kf_work(Fout, f, fstride * p, in_stride, factors, st); 0265 f += fstride * in_stride; 0266 } while ((Fout += m) != Fout_end); 0267 } 0268 0269 Fout = Fout_beg; 0270 0271 // recombine the p smaller DFTs 0272 switch (p) { 0273 case 2: 0274 kf_bfly2(Fout, fstride, st, m); 0275 break; 0276 case 3: 0277 kf_bfly3(Fout, fstride, st, m); 0278 break; 0279 case 4: 0280 kf_bfly4(Fout, fstride, st, m); 0281 break; 0282 case 5: 0283 kf_bfly5(Fout, fstride, st, m); 0284 break; 0285 default: 0286 kf_bfly_generic(Fout, fstride, st, m, p); 0287 break; 0288 } 0289 } 0290 0291 /* facbuf is populated by p1,m1,p2,m2, ... 0292 where 0293 p[i] * m[i] = m[i-1] 0294 m0 = n */ 0295 static void kf_factor(int n, int *facbuf) 0296 { 0297 int p = 4; 0298 double floor_sqrt; 0299 floor_sqrt = floor(sqrt((double)n)); 0300 0301 /*factor out powers of 4, powers of 2, then any remaining primes */ 0302 do { 0303 while (n % p) { 0304 switch (p) { 0305 case 4: 0306 p = 2; 0307 break; 0308 case 2: 0309 p = 3; 0310 break; 0311 default: 0312 p += 2; 0313 break; 0314 } 0315 if (p > floor_sqrt) p = n; /* no more factors, skip to end */ 0316 } 0317 n /= p; 0318 *facbuf++ = p; 0319 *facbuf++ = n; 0320 } while (n > 1); 0321 } 0322 0323 /* 0324 * 0325 * User-callable function to allocate all necessary storage space for the fft. 0326 * 0327 * The return value is a contiguous block of memory, allocated with malloc. As such, 0328 * It can be freed with free(), rather than a kiss_fft-specific function. 0329 * */ 0330 kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) 0331 { 0332 kiss_fft_cfg st = NULL; 0333 size_t memneeded = sizeof(struct kiss_fft_state) + sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/ 0334 0335 if (lenmem == NULL) { 0336 st = (kiss_fft_cfg)KISS_FFT_MALLOC(memneeded); 0337 } else { 0338 if (mem != NULL && *lenmem >= memneeded) st = (kiss_fft_cfg)mem; 0339 *lenmem = memneeded; 0340 } 0341 if (st) { 0342 int i; 0343 st->nfft = nfft; 0344 st->inverse = inverse_fft; 0345 0346 for (i = 0; i < nfft; ++i) { 0347 const double pi = 3.141592653589793238462643383279502884197169399375105820974944; 0348 double phase = -2 * pi * i / nfft; 0349 if (st->inverse) phase *= -1; 0350 kf_cexp(st->twiddles + i, phase); 0351 } 0352 0353 kf_factor(nfft, st->factors); 0354 } 0355 return st; 0356 } 0357 0358 void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, int in_stride) 0359 { 0360 if (fin == fout) { 0361 // NOTE: this is not really an in-place FFT algorithm. 0362 // It just performs an out-of-place FFT into a temp buffer 0363 kiss_fft_cpx tmpbuf[st->nfft]; 0364 kf_work(tmpbuf, fin, 1, in_stride, st->factors, st); 0365 memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft); 0366 } else { 0367 kf_work(fout, fin, 1, in_stride, st->factors, st); 0368 } 0369 } 0370 0371 void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) 0372 { 0373 kiss_fft_stride(cfg, fin, fout, 1); 0374 }