lib/audio/fftCorrelation.cpp

0001 /*
0002 SPDX-FileCopyrightText: 2012 Simon A. Eugster (Granjow)  <simon.eu@gmail.com>
0003 SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
0004 */
0005
0006 #include "fftCorrelation.h"
0007 #include <QElapsedTimer>
0008 extern "C" {
0009 #include "../external/kiss_fft/tools/kiss_fftr.h"
0010 }
0011
0012 #include "kdenlive_debug.h"
0013 #include <algorithm>
0014 #include <vector>
0015
0016 void FFTCorrelation::correlate(const qint64 *left, const size_t leftSize, const qint64 *right, const size_t rightSize, qint64 *out_correlated)
0017 {
0018     auto *correlatedFloat = new float[leftSize + rightSize + 1];
0019     correlate(left, leftSize, right, rightSize, correlatedFloat);
0020
0021     // The correlation vector will have entries up to N (number of entries
0022     // of the vector), so converting to integers will not lose that much
0023     // of precision.
0024     for (size_t i = 0; i < leftSize + rightSize + 1; ++i) {
0025         out_correlated[i] = qint64(correlatedFloat[i]);
0026     }
0027     delete[] correlatedFloat;
0028 }
0029
0030 void FFTCorrelation::correlate(const qint64 *left, const size_t leftSize, const qint64 *right, const size_t rightSize, float *out_correlated)
0031 {
0032     QElapsedTimer t;
0033     t.start();
0034
0035     auto *leftF = new float[leftSize];
0036     auto *rightF = new float[rightSize];
0037
0038     // First the qint64 values need to be normalized to floats
0039     // Dividing by the max value is maybe not the best solution, but the
0040     // maximum value after correlation should not be larger than the longest
0041     // vector since each value should be at most 1
0042     qint64 maxLeft = 1;
0043     qint64 maxRight = 1;
0044     for (size_t i = 0; i < leftSize; ++i) {
0045         if (qAbs(left[i]) > maxLeft) {
0046             maxLeft = qAbs(left[i]);
0047         }
0048     }
0049     for (size_t i = 0; i < rightSize; ++i) {
0050         if (qAbs(right[i]) > maxRight) {
0051             maxRight = qAbs(right[i]);
0052         }
0053     }
0054
0055     // One side needs to be reversed, since multiplication in frequency domain (fourier space)
0056     // calculates the convolution: \sum l[x]r[N-x] and not the correlation: \sum l[x]r[x]
0057     for (size_t i = 0; i < leftSize; ++i) {
0058         leftF[i] = float(left[i]) / maxLeft;
0059     }
0060     for (size_t i = 0; i < rightSize; ++i) {
0061         rightF[rightSize - 1 - i] = float(right[i]) / maxRight;
0062     }
0063
0064     // Now we can convolve to get the correlation
0065     convolve(leftF, leftSize, rightF, rightSize, out_correlated);
0066
0067     qCDebug(KDENLIVE_LOG) << "Correlation (FFT based) computed in " << t.elapsed() << " ms.";
0068     delete[] leftF;
0069     delete[] rightF;
0070 }
0071
0072 void FFTCorrelation::convolve(const float *left, const size_t leftSize, const float *right, const size_t rightSize, float *out_convolved)
0073 {
0074     QElapsedTimer time;
0075     time.start();
0076
0077     // To avoid issues with repetition (we are dealing with cosine waves
0078     // in the fourier domain) we need to pad the vectors to at least twice their size,
0079     // otherwise convolution would convolve with the repeated pattern as well
0080     size_t largestSize = std::max(leftSize, rightSize);
0081
0082     // The vectors must have the same size (same frequency resolution!) and should
0083     // be a power of 2 (for FFT).
0084     size_t size = 64;
0085     while (size / 2 < largestSize) {
0086         size = size << 1;
0087     }
0088
0089     const size_t fft_size = size / 2 + 1;
0090     kiss_fftr_cfg fftConfig = kiss_fftr_alloc(int(size), 0, nullptr, nullptr);
0091     kiss_fftr_cfg ifftConfig = kiss_fftr_alloc(int(size), 1, nullptr, nullptr);
0092     std::vector<kiss_fft_cpx> leftFFT(fft_size);
0093     std::vector<kiss_fft_cpx> rightFFT(fft_size);
0094     std::vector<kiss_fft_cpx> correlatedFFT(fft_size);
0095
0096     // Fill in the data into our new vectors with padding
0097     std::vector<float> leftData(size, 0);
0098     std::vector<float> rightData(size, 0);
0099     std::vector<float> convolved(size);
0100
0101     std::copy(left, left + leftSize, leftData.begin());
0102     std::copy(right, right + rightSize, rightData.begin());
0103
0104     // Fourier transformation of the vectors
0105     kiss_fftr(fftConfig, &leftData[0], &leftFFT[0]);
0106     kiss_fftr(fftConfig, &rightData[0], &rightFFT[0]);
0107
0108     // Convolution in spacial domain is a multiplication in fourier domain. O(n).
0109     for (size_t i = 0; i < correlatedFFT.size(); ++i) {
0110         correlatedFFT[i].r = leftFFT[i].r * rightFFT[i].r - leftFFT[i].i * rightFFT[i].i;
0111         correlatedFFT[i].i = leftFFT[i].r * rightFFT[i].i + leftFFT[i].i * rightFFT[i].r;
0112     }
0113
0114     // Inverse fourier transformation to get the convolved data.
0115     // Insert one element at the beginning to obtain the same result
0116     // that we also get with the nested for loop correlation.
0117     *out_convolved = 0;
0118     size_t out_size = leftSize + rightSize + 1;
0119
0120     kiss_fftri(ifftConfig, &correlatedFFT[0], &convolved[0]);
0121     std::copy(convolved.begin(), convolved.begin() + int(out_size) - 1, out_convolved + 1);
0122
0123     // Finally some cleanup.
0124     kiss_fftr_free(fftConfig);
0125     kiss_fftr_free(ifftConfig);
0126
0127     qCDebug(KDENLIVE_LOG) << "FFT convolution computed. Time taken: " << time.elapsed() << " ms";
0128 }