lib/audio/fftTools.cpp

0001 /*
0002     SPDX-FileCopyrightText: 2010 Simon Andreas Eugster <simon.eu@gmail.com>
0003     This file is part of kdenlive. See www.kdenlive.org.
0004
0005 SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
0006 */
0007
0008 #include "fftTools.h"
0009
0010 #include <cmath>
0011 #include <iostream>
0012
0013 #include <QString>
0014
0015 // Uncomment for debugging, like writing a GNU Octave .m file to /tmp
0016 //#define DEBUG_FFTTOOLS
0017
0018 #ifdef DEBUG_FFTTOOLS
0019 #include "kdenlive_debug.h"
0020 #include <QTime>
0021 #include <fstream>
0022 #endif
0023
0024 FFTTools::FFTTools()
0025     : m_fftCfgs()
0026     , m_windowFunctions()
0027 {
0028 }
0029 FFTTools::~FFTTools()
0030 {
0031     QHash<QString, kiss_fftr_cfg>::iterator i;
0032     for (i = m_fftCfgs.begin(); i != m_fftCfgs.end(); ++i) {
0033         free(*i);
0034     }
0035 }
0036
0037 const QString FFTTools::windowSignature(const WindowType windowType, const int size, const float param)
0038 {
0039     return QStringLiteral("s%1_t%2_p%3").arg(size).arg(windowType).arg(param, 0, 'f', 3);
0040 }
0041 const QString FFTTools::cfgSignature(const int size)
0042 {
0043     return QStringLiteral("s%1").arg(size);
0044 }
0045
0046 // https://cplusplus.syntaxerrors.info/index.php?title=Cannot_declare_member_function_%E2%80%98static_int_Foo::bar%28%29%E2%80%99_to_have_static_linkage
0047 const QVector<float> FFTTools::window(const WindowType windowType, const int size, const float param)
0048 {
0049     Q_ASSERT(size > 0);
0050
0051     // Deliberately avoid converting size to a float
0052     // to keep mid an integer.
0053     int mid = (int(size - 1) / 2);
0054     int max = (size - 1);
0055     QVector<float> window;
0056
0057     switch (windowType) {
0058     case Window_Rect:
0059         return QVector<float>(size + 1, 1);
0060     case Window_Triangle:
0061         window = QVector<float>(size + 1);
0062
0063         for (int x = 0; x < mid; ++x) {
0064             window[x] = float(x) / mid + float(mid - x) / mid * param;
0065         }
0066         for (int x = mid; x < size; ++x) {
0067             window[x] = float(x - mid) / (max - mid) * param + float(max - x) / (max - mid);
0068         }
0069         window[size] = .5f + param / 2;
0070
0071 #ifdef DEBUG_FFTTOOLS
0072         qCDebug(KDENLIVE_LOG) << "Triangle window (factor " << window[size] << "):";
0073         for (int i = 0; i < size; ++i) {
0074             qCDebug(KDENLIVE_LOG) << window[i];
0075         }
0076         qCDebug(KDENLIVE_LOG) << "Triangle window end.";
0077 #endif
0078
0079         return window;
0080     case Window_Hamming:
0081         // Use a quick version of the Hamming window here: Instead of
0082         // interpolating values between (-max/2) and (max/2)
0083         // we use integer values instead, ranging from -mid to (max-mid).
0084         window = QVector<float>(size + 1);
0085
0086         for (int x = 0; x < size; ++x) {
0087             window[x] = .54f + .46f * cosf(2.f * float(M_PI) * float(x - mid) / size);
0088         }
0089
0090         // Integrating the cosine over the window function results in
0091         // an area of 0; So only the constant factor 0.54 counts.
0092         window[size] = .54f;
0093
0094 #ifdef DEBUG_FFTTOOLS
0095         qCDebug(KDENLIVE_LOG) << "Hanning window (factor " << window[size] << "):";
0096         for (int i = 0; i < size; ++i) {
0097             qCDebug(KDENLIVE_LOG) << window[i];
0098         }
0099         qCDebug(KDENLIVE_LOG) << "Hanning window end.";
0100 #endif
0101
0102         return window;
0103     }
0104     Q_ASSERT(false);
0105     return QVector<float>();
0106 }
0107
0108 void FFTTools::fftNormalized(const audioShortVector &audioFrame, const uint channel, const uint numChannels, float *freqSpectrum, const WindowType windowType,
0109                              const uint windowSize, const float param)
0110 {
0111 #ifdef DEBUG_FFTTOOLS
0112     QTime start = QTime::currentTime();
0113 #endif
0114
0115     const uint numSamples = uint(audioFrame.size()) / numChannels;
0116
0117     if (((windowSize & 1) != 0u) || windowSize < 2) {
0118         return;
0119     }
0120
0121     const QString cfgSig = cfgSignature(int(windowSize));
0122     const QString winSig = windowSignature(windowType, int(windowSize), param);
0123
0124     // Get the kiss_fft configuration from the config cache
0125     // or build a new configuration if the requested one is not available.
0126     kiss_fftr_cfg myCfg;
0127     if (m_fftCfgs.contains(cfgSig)) {
0128 #ifdef DEBUG_FFTTOOLS
0129         qCDebug(KDENLIVE_LOG) << "Re-using FFT configuration with size " << windowSize;
0130 #endif
0131         myCfg = m_fftCfgs.value(cfgSig);
0132     } else {
0133 #ifdef DEBUG_FFTTOOLS
0134         qCDebug(KDENLIVE_LOG) << "Creating FFT configuration with size " << windowSize;
0135 #endif
0136         myCfg = kiss_fftr_alloc(int(windowSize), 0, nullptr, nullptr);
0137         m_fftCfgs.insert(cfgSig, myCfg);
0138     }
0139
0140     // Get the window function from the cache
0141     // (except for a rectangular window; nothing to do there).
0142     QVector<float> window;
0143     float windowScaleFactor = 1;
0144     if (windowType != FFTTools::Window_Rect) {
0145
0146         if (m_windowFunctions.contains(winSig)) {
0147 #ifdef DEBUG_FFTTOOLS
0148             qCDebug(KDENLIVE_LOG) << "Re-using window function with signature " << winSig;
0149 #endif
0150             window = m_windowFunctions.value(winSig);
0151         } else {
0152 #ifdef DEBUG_FFTTOOLS
0153             qCDebug(KDENLIVE_LOG) << "Building new window function with signature " << winSig;
0154 #endif
0155             window = FFTTools::window(windowType, int(windowSize), 0);
0156             m_windowFunctions.insert(winSig, window);
0157         }
0158         windowScaleFactor = 1.0f / window[int(windowSize)];
0159     }
0160
0161     // Prepare frequency space vector. The resulting FFT vector is only half as long.
0162     auto *freqData = new kiss_fft_cpx[size_t(windowSize) / 2];
0163     auto *data = new float[size_t(windowSize)];
0164
0165     // Copy the first channel's audio into a vector for the FFT display;
0166     // Fill the data vector indices that cannot be covered with sample data with 0
0167     if (numSamples < windowSize) {
0168         std::fill(&data[numSamples], &data[windowSize - 1], 0);
0169     }
0170     // Normalize signals to [0,1] to get correct dB values later on
0171     for (uint i = 0; i < numSamples && i < windowSize; ++i) {
0172         // Performance note: Benchmarking has shown that using the if/else inside the loop
0173         // does not do noticeable worse than keeping it outside (perhaps the branch predictor
0174         // is good enough), so it remains in there for better readability.
0175         if (windowType != FFTTools::Window_Rect) {
0176             data[i] = float(audioFrame.data()[i * numChannels + channel]) / 32767.0f * window[int(i)];
0177         } else {
0178             data[i] = float(audioFrame.data()[i * numChannels + channel]) / 32767.0f;
0179         }
0180     }
0181
0182     // Calculate the Fast Fourier Transform for the input data
0183     kiss_fftr(myCfg, data, freqData);
0184
0185     // Logarithmic scale: 20 * log ( 2 * magnitude / N ) with magnitude = sqrt(r² + i²)
0186     // with N = FFT size (after FFT, 1/2 window size)
0187     for (uint i = 0; i < windowSize / 2; ++i) {
0188         // Logarithmic scale: 20 * log ( 2 * magnitude / N ) with magnitude = sqrt(r² + i²)
0189         // with N = FFT size (after FFT, 1/2 window size)
0190         freqSpectrum[i] =
0191             20 *
0192             logf(powf(powf(fabs(freqData[i].r * windowScaleFactor), 2) + powf(fabs(freqData[i].i * windowScaleFactor), 2), .5) / (float(windowSize) / 2.0f)) /
0193             logf(10);
0194         ;
0195     }
0196
0197 #ifdef DEBUG_FFTTOOLS
0198     std::ofstream mFile;
0199     mFile.open("/tmp/freq.m");
0200     if (!mFile) {
0201         qCDebug(KDENLIVE_LOG) << "Opening file failed.";
0202     } else {
0203         mFile << "val = [ ";
0204
0205         for (int sample = 0; sample < 256; ++sample) {
0206             mFile << data[sample] << ' ';
0207         }
0208         mFile << " ];\n";
0209
0210         mFile << "freq = [ ";
0211         for (int sample = 0; sample < 256; ++sample) {
0212             mFile << freqData[sample].r << '+' << freqData[sample].i << "*i ";
0213         }
0214         mFile << " ];\n";
0215
0216         mFile.close();
0217         qCDebug(KDENLIVE_LOG) << "File written.";
0218     }
0219 #endif
0220
0221 #ifdef DEBUG_FFTTOOLS
0222     qCDebug(KDENLIVE_LOG) << "Calculated FFT in " << start.elapsed() << " ms.";
0223 #endif
0224     delete[] freqData;
0225     delete[] data;
0226 }
0227
0228 const QVector<float> FFTTools::interpolatePeakPreserving(const QVector<float> &in, const uint targetSize, uint left, uint right, float fill)
0229 {
0230 #ifdef DEBUG_FFTTOOLS
0231     QTime start = QTime::currentTime();
0232 #endif
0233
0234     if (right == 0) {
0235         Q_ASSERT(in.size() > 0);
0236         right = uint(in.size()) - 1;
0237     }
0238     Q_ASSERT(targetSize > 0);
0239     Q_ASSERT(left < right);
0240
0241     QVector<float> out(static_cast<int>(targetSize));
0242
0243     float x;
0244     int xi;
0245     int i;
0246     if ((float(right - left)) / targetSize < 2.f) {
0247         float x_prev = 0;
0248         for (i = 0; i < int(targetSize); ++i) {
0249
0250             // i:  Target index
0251             // x:  Interpolated source index (float!)
0252             // xi: floor(x)
0253
0254             // Transform [0,targetSize-1] to [left,right]
0255             x = float(i) / (targetSize - 1) * (right - left) + left;
0256             xi = int(floor(x));
0257
0258             if (x > float(in.size() - 1)) {
0259                 // This may happen if right > in.size()-1; Fill the rest of the vector
0260                 // with the default value now.
0261                 break;
0262             }
0263
0264             // Use linear interpolation in order to get smoother display
0265             if (xi == 0 || xi == in.size() - 1) {
0266                 // ... except if we are at the left or right border of the input signal.
0267                 // Special case here since we consider previous and future values as well for
0268                 // the actual interpolation (not possible here).
0269                 out[i] = in[xi];
0270             } else {
0271                 if (in[xi] > in[xi + 1] && x_prev < float(xi)) {
0272                     // This is a hack to preserve peaks.
0273                     // Consider f = {0, 100, 0}
0274                     //          x = {0.5,  1.5}
0275                     // Then x is 50 both times, and the 100 peak is lost.
0276                     // Get it back here for the first x after the peak (which is at xi).
0277                     // (x is the first after the peak if the previous x was smaller than floor(x).)
0278                     out[i] = in[xi];
0279                 } else {
0280                     out[i] = (xi + 1.f - x) * in[xi] + (x - xi) * in[xi + 1];
0281                 }
0282             }
0283             x_prev = x;
0284         }
0285     } else {
0286         // If there are more than 2 samples per pixel in average, then use the maximum of them
0287         // since by only looking at the left sample we might miss some maxima.
0288         int src = int(left);
0289         for (i = 0; i < int(targetSize); ++i) {
0290
0291             // x:  right bound
0292             // xi: floor(x)
0293             x = float(i + 1) / (targetSize - 1) * (right - left) + left;
0294             xi = int(floor(x));
0295             out[i] = fill;
0296
0297             for (; src < xi && src < in.size(); ++src) {
0298                 if (out[i] < in[src]) {
0299                     out[i] = in[src];
0300                 }
0301             }
0302
0303             //             xi_prev = xi;
0304         }
0305     }
0306     // Fill the rest of the vector if the right border exceeds the input vector.
0307     for (; i < int(targetSize); ++i) {
0308         out[i] = fill;
0309     }
0310
0311 #ifdef DEBUG_FFTTOOLS
0312     qCDebug(KDENLIVE_LOG) << "Interpolated " << targetSize << " nodes from " << in.size() << " input points in " << start.elapsed() << " ms";
0313 #endif
0314
0315     return out;
0316 }
0317
0318 #ifdef DEBUG_FFTTOOLS
0319 #undef DEBUG_FFTTOOLS
0320 #endif