plugins/acoustidimport/ffmpegfingerprintdecoder.cpp

0001 /**
0002  * \file ffmpegfingerprintdecoder.cpp
0003  * Chromaprint fingerprint decoder using FFmpeg.
0004  *
0005  * \b Project: Kid3
0006  * \author Urs Fleisch
0007  * \date 15 Feb 2013
0008  *
0009  * Copyright (C) 2013-2024  Urs Fleisch
0010  *
0011  * This file is part of Kid3.
0012  *
0013  * Kid3 is free software; you can redistribute it and/or modify
0014  * it under the terms of the GNU General Public License as published by
0015  * the Free Software Foundation; either version 2 of the License, or
0016  * (at your option) any later version.
0017  *
0018  * Kid3 is distributed in the hope that it will be useful,
0019  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0020  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0021  * GNU General Public License for more details.
0022  *
0023  * You should have received a copy of the GNU General Public License
0024  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0025  */
0026
0027 /** Needed for UINT64_C, INT64_C macros used by libav includes. */
0028 #define __STDC_CONSTANT_MACROS
0029 #include "ffmpegfingerprintdecoder.h"
0030 #include "acoustidconfig.h"
0031
0032 #include <cstdint>
0033 #include <cstdio>
0034 extern "C" {
0035 #include <libavcodec/avcodec.h>
0036 #include <libavformat/avformat.h>
0037
0038 #ifdef HAVE_AVRESAMPLE
0039 #include <libavresample/avresample.h>
0040 #include <libavutil/opt.h>
0041 #elif defined HAVE_SWRESAMPLE
0042 #include <libswresample/swresample.h>
0043 #elif defined HAVE_AV_AUDIO_CONVERT
0044 #include <libavutil/audioconvert.h>
0045 #include <libavutil/samplefmt.h>
0046
0047 /*
0048  * Declarations taken from "ffmpeg/audioconvert.h", "ffmpeg/samplefmt.h".
0049  */
0050 //! @cond
0051 struct AVAudioConvert;
0052 typedef struct AVAudioConvert AVAudioConvert;
0053
0054 AVAudioConvert *av_audio_convert_alloc(enum AVSampleFormat out_fmt, int out_channels,
0055                                        enum AVSampleFormat in_fmt, int in_channels,
0056                                        const float *matrix, int flags);
0057 void av_audio_convert_free(AVAudioConvert *ctx);
0058 int av_audio_convert(AVAudioConvert *ctx,
0059                            void * const out[6], const int out_stride[6],
0060                      const void * const  in[6], const int  in_stride[6], int len);
0061 //! @endcond
0062 #endif
0063 }
0064 #include <QFile>
0065 #include "fingerprintcalculator.h"
0066
0067 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(52, 94, 1)
0068 #define AV_SAMPLE_FMT_S16 SAMPLE_FMT_S16
0069 #define AVMEDIA_TYPE_AUDIO CODEC_TYPE_AUDIO
0070 #endif
0071
0072 /** Bytes needed for 1 second of 48khz 32bit audio. */
0073 #ifdef AVCODEC_MAX_AUDIO_FRAME_SIZE
0074 #define MAX_AUDIO_FRAME_SIZE AVCODEC_MAX_AUDIO_FRAME_SIZE
0075 #else
0076 #define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio
0077 #endif
0078
0079 namespace {
0080   constexpr int BUFFER_SIZE = MAX_AUDIO_FRAME_SIZE * 2;
0081
0082 /*
0083  * The following classes are used to benefit from the C++
0084  * "Resource Acquisition Is Initialization" (RAII) idiom when dealing with
0085  * AV resources.
0086  */
0087
0088 class Packet {
0089 public:
0090   explicit Packet(AVPacket* packet) : m_ptr(packet) {
0091   }
0092
0093   AVPacket* data() { return m_ptr; }
0094
0095   ~Packet() {
0096     if (m_ptr && m_ptr->data) {
0097 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(57, 8, 0)
0098       ::av_free_packet(m_ptr);
0099 #else
0100       ::av_packet_unref(m_ptr);
0101 #endif
0102     }
0103   }
0104
0105   Packet(const Packet&) = delete;
0106   Packet& operator=(const Packet&) = delete;
0107
0108   int streamIndex() const { return m_ptr ? m_ptr->stream_index : -1; }
0109
0110 private:
0111   AVPacket* m_ptr;
0112 };
0113
0114 class Codec {
0115 public:
0116   Codec() : m_ptr(nullptr), m_impl(nullptr), m_frame(nullptr), m_opened(false) {
0117   }
0118
0119   ~Codec() {
0120     if (m_frame)
0121 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(54, 28, 0)
0122       ::av_freep(&m_frame);
0123 #elif LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55, 28, 1)
0124       ::avcodec_free_frame(&m_frame);
0125 #else
0126       ::av_frame_free(&m_frame);
0127 #endif
0128     if (m_opened)
0129       ::avcodec_close(m_ptr);
0130 #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 33, 100)
0131     if (m_ptr)
0132       ::avcodec_free_context(&m_ptr);
0133 #endif
0134   }
0135
0136   Codec(const Codec&) = delete;
0137   Codec& operator=(const Codec&) = delete;
0138
0139   bool open() {
0140     m_opened = false;
0141     if (m_ptr && m_impl) {
0142       m_opened =
0143 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 5, 0)
0144         ::avcodec_open(m_ptr, m_impl) >= 0
0145 #else
0146         ::avcodec_open2(m_ptr, m_impl, nullptr) >= 0
0147 #endif
0148           ;
0149     }
0150     return m_opened;
0151   }
0152
0153   int channels() const {
0154 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 37, 100)
0155     return m_ptr->channels;
0156 #else
0157     return m_ptr->ch_layout.nb_channels;
0158 #endif
0159   }
0160
0161   AVSampleFormat sampleFormat() const { return m_ptr->sample_fmt; }
0162
0163   int sampleRate() const { return m_ptr->sample_rate; }
0164
0165 #if defined HAVE_AVRESAMPLE || defined HAVE_SWRESAMPLE
0166 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 37, 100)
0167   uint64_t channelLayout() const { return m_ptr->channel_layout; }
0168 #else
0169   const AVChannelLayout* channelLayout() const { return &m_ptr->ch_layout; }
0170 #endif
0171 #endif
0172
0173   int decode(int16_t* samples, int* frameSize, AVPacket* pkt) {
0174 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(52, 23, 0)
0175     return ::avcodec_decode_audio2(m_ptr,
0176       samples, frameSize, pkt->data, pkt->size);
0177 #elif LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 25, 0)
0178     return ::avcodec_decode_audio3(m_ptr,
0179       samples, frameSize, pkt);
0180 #else
0181 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55, 28, 1)
0182     if (!m_frame)
0183       m_frame = ::avcodec_alloc_frame();
0184     ::avcodec_get_frame_defaults(m_frame);
0185 #else
0186     if (!m_frame)
0187       m_frame = ::av_frame_alloc();
0188     ::av_frame_unref(m_frame);
0189 #endif
0190 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(57, 37, 100)
0191     int decoded = 0;
0192     int len = ::avcodec_decode_audio4(m_ptr, m_frame, &decoded, pkt);
0193     if (len >= 0 && decoded) {
0194       int planar = ::av_sample_fmt_is_planar(m_ptr->sample_fmt);
0195       int planeSize;
0196       int dataSize = ::av_samples_get_buffer_size(&planeSize, m_ptr->channels,
0197                          m_frame->nb_samples, m_ptr->sample_fmt, 1);
0198       if (*frameSize < dataSize)
0199         return -1;
0200       ::memcpy(samples, m_frame->extended_data[0], planeSize);
0201       if (planar && m_ptr->channels > 1) {
0202         uint8_t* out = reinterpret_cast<uint8_t*>(samples) + planeSize;
0203         for (int ch = 1; ch < m_ptr->channels; ++ch) {
0204           ::memcpy(out, m_frame->extended_data[ch], planeSize);
0205           out += planeSize;
0206         }
0207       }
0208       *frameSize = dataSize;
0209     } else {
0210       *frameSize = 0;
0211     }
0212     return len;
0213 #else
0214     if (::avcodec_send_packet(m_ptr, pkt) == 0 &&
0215         ::avcodec_receive_frame(m_ptr, m_frame) == 0) {
0216       int planar = ::av_sample_fmt_is_planar(m_ptr->sample_fmt);
0217       int planeSize;
0218       int dataSize = ::av_samples_get_buffer_size(&planeSize, channels(),
0219                          m_frame->nb_samples, m_ptr->sample_fmt, 1);
0220       if (*frameSize < dataSize)
0221         return -1;
0222       ::memcpy(samples, m_frame->extended_data[0], planeSize);
0223       if (planar && channels() > 1) {
0224         uint8_t* out = reinterpret_cast<uint8_t*>(samples) + planeSize;
0225         for (int ch = 1; ch < channels(); ++ch) {
0226           ::memcpy(out, m_frame->extended_data[ch], planeSize);
0227           out += planeSize;
0228         }
0229       }
0230       *frameSize = dataSize;
0231       return pkt->size;
0232     }
0233     *frameSize = 0;
0234     return -1;
0235 #endif
0236 #endif
0237   }
0238
0239 private:
0240   friend class Format;
0241   friend class Converter;
0242   AVCodecContext* m_ptr;
0243 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 0, 0)
0244   AVCodec* m_impl;
0245 #else
0246   const AVCodec* m_impl;
0247 #endif
0248   AVFrame* m_frame;
0249   bool m_opened;
0250 };
0251
0252 class Format {
0253 public:
0254   explicit Format(const char* fileName) : m_ptr(nullptr), m_streamIndex(-1), m_hasError(false) {
0255     if (
0256 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 2, 0)
0257         ::av_open_input_file(&m_ptr, fileName, 0, 0, 0) != 0
0258 #else
0259         ::avformat_open_input(&m_ptr, fileName, nullptr, nullptr) != 0
0260 #endif
0261         ||
0262 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 5, 0)
0263         ::av_find_stream_info(m_ptr) < 0
0264 #else
0265         ::avformat_find_stream_info(m_ptr, nullptr) < 0
0266 #endif
0267       )
0268       m_hasError = true;
0269   }
0270
0271   ~Format() {
0272     if (m_ptr)
0273 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 21, 0)
0274       ::av_close_input_file(m_ptr);
0275 #else
0276       ::avformat_close_input(&m_ptr);
0277 #endif
0278   }
0279
0280   Format(const Format&) = delete;
0281   Format& operator=(const Format&) = delete;
0282
0283   bool hasError() const { return m_hasError; }
0284
0285   AVStream* findAudioStream(Codec* codec) {
0286     AVStream* stream = nullptr;
0287 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(52, 91, 0)
0288     for (unsigned i = 0; i < m_ptr->nb_streams; ++i) {
0289       codec->m_ptr = m_ptr->streams[i]->codec;
0290       if (codec->m_ptr && codec->m_ptr->codec_type == AVMEDIA_TYPE_AUDIO) {
0291         stream = m_ptr->streams[i];
0292         m_streamIndex = i;
0293         break;
0294       }
0295     }
0296     codec->m_impl = ::avcodec_find_decoder(codec->m_ptr->codec_id);
0297 #else
0298     m_streamIndex = ::av_find_best_stream(m_ptr, AVMEDIA_TYPE_AUDIO, -1, -1,
0299                                           &codec->m_impl, 0);
0300     if (m_streamIndex >= 0) {
0301       stream = m_ptr->streams[m_streamIndex];
0302     }
0303     if (stream) {
0304 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(57, 33, 100)
0305       codec->m_ptr = stream->codec;
0306 #else
0307       codec->m_ptr = ::avcodec_alloc_context3(codec->m_impl);
0308       if (codec->m_ptr) {
0309         if (::avcodec_parameters_to_context(codec->m_ptr, stream->codecpar) < 0)
0310           ::avcodec_free_context(&codec->m_ptr);
0311       }
0312 #endif
0313     }
0314 #endif
0315     if (codec->m_ptr) {
0316       codec->m_ptr->request_sample_fmt = AV_SAMPLE_FMT_S16;
0317     }
0318     return stream;
0319   }
0320
0321   int64_t duration() const { return m_ptr ? m_ptr->duration : AV_NOPTS_VALUE; }
0322
0323   int streamIndex() const { return m_streamIndex; }
0324
0325   bool readFrame(Packet& packet) {
0326     return ::av_read_frame(m_ptr, packet.data()) >= 0;
0327   }
0328
0329 private:
0330   AVFormatContext* m_ptr;
0331   int m_streamIndex;
0332   bool m_hasError;
0333 };
0334
0335 #if defined HAVE_AVRESAMPLE || defined HAVE_SWRESAMPLE
0336 class Converter {
0337 public:
0338   Converter() : m_ptr(nullptr), m_maxDstNumSamples(0), m_isOpen(false) {
0339     m_dstData[0] = nullptr;
0340   }
0341
0342   ~Converter() {
0343     if (m_dstData[0]) {
0344       ::av_freep(&m_dstData[0]);
0345     }
0346     if (m_ptr) {
0347 #ifdef HAVE_AVRESAMPLE
0348       if (m_isOpen) {
0349         ::avresample_close(m_ptr);
0350       }
0351       ::avresample_free(&m_ptr);
0352 #elif defined HAVE_SWRESAMPLE
0353       ::swr_free(&m_ptr);
0354 #endif
0355     }
0356   }
0357
0358   Converter(const Converter&) = delete;
0359   Converter& operator=(const Converter&) = delete;
0360
0361 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 37, 100)
0362   bool createForCodec(const Codec& codecCtx) {
0363     int64_t channelLayout = codecCtx.channelLayout();
0364     if (!channelLayout) {
0365       channelLayout = ::av_get_default_channel_layout(codecCtx.channels());
0366     }
0367 #ifdef HAVE_AVRESAMPLE
0368     if ((m_ptr = ::avresample_alloc_context()) != 0) {
0369       ::av_opt_set_int(m_ptr, "in_channel_layout",  channelLayout, 0);
0370       ::av_opt_set_int(m_ptr, "in_sample_fmt",      codecCtx.sampleFormat(), 0);
0371       ::av_opt_set_int(m_ptr, "in_sample_rate",     codecCtx.sampleRate(), 0);
0372       ::av_opt_set_int(m_ptr, "out_channel_layout", channelLayout, 0);
0373       ::av_opt_set_int(m_ptr, "out_sample_fmt",     AV_SAMPLE_FMT_S16, 0);
0374       ::av_opt_set_int(m_ptr, "out_sample_rate",    codecCtx.sampleRate(), 0);
0375       m_isOpen = ::avresample_open(m_ptr) >= 0;
0376       return m_isOpen;
0377     }
0378 #elif defined HAVE_SWRESAMPLE
0379     if ((m_ptr = ::swr_alloc_set_opts(
0380            nullptr, channelLayout, AV_SAMPLE_FMT_S16, codecCtx.sampleRate(),
0381            channelLayout, codecCtx.sampleFormat(), codecCtx.sampleRate(),
0382            0, nullptr)) != nullptr) {
0383       m_isOpen = ::swr_init(m_ptr) >= 0;
0384       return m_isOpen;
0385     }
0386 #endif
0387     return false;
0388   }
0389 #else
0390   bool createForCodec(const Codec& codecCtx) {
0391     AVChannelLayout channelLayout;
0392     if (const AVChannelLayout* codecChannelLayout = codecCtx.channelLayout();
0393         ::av_channel_layout_check(codecChannelLayout)) {
0394       ::av_channel_layout_copy(&channelLayout, codecChannelLayout);
0395     } else {
0396       ::av_channel_layout_default(&channelLayout, codecCtx.channels());
0397     }
0398     m_ptr = nullptr;
0399     if (::swr_alloc_set_opts2(
0400              &m_ptr, &channelLayout, AV_SAMPLE_FMT_S16, codecCtx.sampleRate(),
0401              &channelLayout, codecCtx.sampleFormat(), codecCtx.sampleRate(),
0402              0, nullptr) == 0) {
0403       m_isOpen = ::swr_init(m_ptr) >= 0;
0404       ::av_channel_layout_uninit(&channelLayout);
0405       return m_isOpen;
0406     }
0407     ::av_channel_layout_uninit(&channelLayout);
0408     return false;
0409   }
0410 #endif
0411
0412   int16_t* convert(const Codec& codecCtx,
0413                    int16_t* buffer1, int16_t* buffer2,
0414                    int& bufferSize) {
0415     if (m_ptr) {
0416       int numSamplesOut;
0417       int16_t* result;
0418       if (codecCtx.m_frame) {
0419         if (codecCtx.m_frame->nb_samples > m_maxDstNumSamples) {
0420           ::av_freep(&m_dstData[0]);
0421           int dstLinesize = 0;
0422           if (::av_samples_alloc(m_dstData, &dstLinesize, codecCtx.channels(),
0423                       codecCtx.m_frame->nb_samples, AV_SAMPLE_FMT_S16, 1) < 0) {
0424             return nullptr;
0425           }
0426           m_maxDstNumSamples = codecCtx.m_frame->nb_samples;
0427         }
0428 #ifdef HAVE_AVRESAMPLE
0429 #if LIBAVRESAMPLE_VERSION_INT < AV_VERSION_INT(1, 0, 0)
0430         numSamplesOut = ::avresample_convert(
0431               m_ptr, reinterpret_cast<void**>(m_dstData), 0,
0432               codecCtx.m_frame->nb_samples,
0433               reinterpret_cast<void**>(codecCtx.m_frame->data), 0,
0434               codecCtx.m_frame->nb_samples);
0435 #else
0436         numSamplesOut = ::avresample_convert(
0437               m_ptr, m_dstData, 0, codecCtx.m_frame->nb_samples,
0438               reinterpret_cast<uint8_t**>(codecCtx.m_frame->data), 0,
0439               codecCtx.m_frame->nb_samples);
0440 #endif
0441 #elif defined HAVE_SWRESAMPLE
0442         numSamplesOut = ::swr_convert(
0443               m_ptr, m_dstData, codecCtx.m_frame->nb_samples,
0444               const_cast<const uint8_t**>(reinterpret_cast<uint8_t**>(
0445                                             codecCtx.m_frame->data)),
0446               codecCtx.m_frame->nb_samples);
0447 #endif
0448         result = reinterpret_cast<int16_t*>(m_dstData[0]);
0449       } else {
0450         int bytesPerSample = ::av_get_bytes_per_sample(codecCtx.sampleFormat());
0451         int numSamplesIn = bytesPerSample != 0 ? bufferSize / bytesPerSample : 0;
0452         int linesizeIn;
0453         int numChannels = codecCtx.channels();
0454         ::av_samples_get_buffer_size(&linesizeIn, numChannels,
0455             numChannels != 0 ? numSamplesIn / numChannels : 0,
0456             codecCtx.sampleFormat(), 0);
0457 #ifdef HAVE_AVRESAMPLE
0458 #if LIBAVRESAMPLE_VERSION_INT < AV_VERSION_INT(1, 0, 0)
0459         numSamplesOut = ::avresample_convert(
0460               m_ptr, reinterpret_cast<void**>(&buffer2), 0, BUFFER_SIZE,
0461               reinterpret_cast<void**>(&buffer1), linesizeIn, numSamplesIn);
0462 #else
0463         numSamplesOut = ::avresample_convert(
0464               m_ptr, reinterpret_cast<uint8_t**>(&buffer2), 0, BUFFER_SIZE,
0465               reinterpret_cast<uint8_t**>(&buffer1), linesizeIn, numSamplesIn);
0466 #endif
0467 #elif defined HAVE_SWRESAMPLE
0468         numSamplesOut = ::swr_convert(
0469               m_ptr, reinterpret_cast<uint8_t**>(&buffer2), BUFFER_SIZE,
0470               const_cast<const uint8_t**>(reinterpret_cast<uint8_t**>(&buffer1)),
0471               numSamplesIn);
0472 #endif
0473         result = buffer2;
0474       }
0475       if (numSamplesOut < 0) {
0476         return nullptr;
0477       }
0478       bufferSize = ::av_samples_get_buffer_size(nullptr, codecCtx.channels(),
0479                    numSamplesOut, AV_SAMPLE_FMT_S16, 1);
0480       return result;
0481     }
0482     return buffer1;
0483   }
0484
0485 private:
0486 #ifdef HAVE_AVRESAMPLE
0487   AVAudioResampleContext* m_ptr;
0488 #elif defined HAVE_SWRESAMPLE
0489   SwrContext* m_ptr;
0490 #endif
0491   uint8_t* m_dstData[1];
0492   int m_maxDstNumSamples;
0493   bool m_isOpen;
0494 };
0495 #elif defined HAVE_AV_AUDIO_CONVERT
0496 class Converter {
0497 public:
0498   Converter() : m_ptr(0) {}
0499
0500   ~Converter() {
0501     if (m_ptr)
0502       ::av_audio_convert_free(m_ptr);
0503   }
0504
0505   bool createForCodec(const Codec& codecCtx) {
0506     m_ptr = ::av_audio_convert_alloc(AV_SAMPLE_FMT_S16, codecCtx.channels(),
0507                  codecCtx.sampleFormat(), codecCtx.channels(), 0, 0);
0508     return m_ptr != 0;
0509   }
0510
0511   int16_t* convert(const Codec& codecCtx,
0512                    int16_t* buffer1, int16_t* buffer2,
0513                    int& bufferSize) {
0514     if (m_ptr) {
0515       const void *ibuf[6] = { buffer1 };
0516       void *obuf[6] = { buffer2 };
0517 #if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(51, 4, 0)
0518       int istride[6] = { ::av_get_bits_per_sample_format(codecCtx.sampleFormat()) / 8 };
0519 #else
0520       int istride[6] = { ::av_get_bytes_per_sample(codecCtx.sampleFormat()) };
0521 #endif
0522       int ostride[6] = { 2 };
0523       int len = istride[0] != 0 ? bufferSize / istride[0] : 0;
0524       if (::av_audio_convert(m_ptr, obuf, ostride, ibuf, istride, len) < 0) {
0525         return 0;
0526       }
0527       bufferSize = len * ostride[0];
0528       return buffer2;
0529     } else {
0530       return buffer1;
0531     }
0532   }
0533
0534 private:
0535   AVAudioConvert* m_ptr;
0536 };
0537 #else
0538 class Converter {
0539 public:
0540   bool createForCodec(const Codec&) { return false; }
0541   int16_t* convert(const Codec&, int16_t* buffer1, int16_t*, int&) {
0542     return buffer1;
0543   }
0544 };
0545 #endif
0546
0547 }
0548
0549
0550 FFmpegFingerprintDecoder::FFmpegFingerprintDecoder(QObject* parent)
0551   : AbstractFingerprintDecoder(parent)
0552 {
0553 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100)
0554   ::av_register_all();
0555 #endif
0556   ::av_log_set_level(AV_LOG_ERROR);
0557
0558   m_buffer1 = static_cast<qint16*>(::av_malloc(BUFFER_SIZE + 16));
0559   m_buffer2 = static_cast<qint16*>(::av_malloc(BUFFER_SIZE + 16));
0560 }
0561
0562 FFmpegFingerprintDecoder::~FFmpegFingerprintDecoder()
0563 {
0564   ::av_free(m_buffer1);
0565   ::av_free(m_buffer2);
0566 }
0567
0568 void FFmpegFingerprintDecoder::start(const QString& filePath)
0569 {
0570   AbstractFingerprintDecoder::start(filePath);
0571   /*
0572    * The code here is based on fpcalc.c from chromaprint-0.6/examples.
0573    */
0574   FingerprintCalculator::Error err = FingerprintCalculator::Ok;
0575   int duration = 0;
0576   QByteArray fileName(QFile::encodeName(filePath));
0577   Format format(fileName.constData());
0578   if (format.hasError()) {
0579     err = FingerprintCalculator::NoStreamFound;
0580     emit error(err);
0581     return;
0582   }
0583
0584   Codec codec;
0585   AVStream* stream = format.findAudioStream(&codec);
0586   if (!stream) {
0587     err = FingerprintCalculator::NoStreamFound;
0588     emit error(err);
0589     return;
0590   }
0591
0592   if (!codec.open() || codec.channels() <= 0) {
0593     err = FingerprintCalculator::NoCodecFound;
0594     emit error(err);
0595     return;
0596   }
0597
0598   Converter converter;
0599   if (codec.sampleFormat() != AV_SAMPLE_FMT_S16) {
0600     if (!converter.createForCodec(codec)) {
0601       err = FingerprintCalculator::NoConverterFound;
0602       emit error(err);
0603       return;
0604     }
0605   }
0606
0607   if (stream->duration != AV_NOPTS_VALUE) {
0608     duration = stream->time_base.den != 0
0609         ? stream->time_base.num * stream->duration / stream->time_base.den
0610         : 0;
0611   } else if (format.duration() != AV_NOPTS_VALUE) {
0612     duration = format.duration() / AV_TIME_BASE;
0613   } else {
0614     err = FingerprintCalculator::NoStreamFound;
0615     emit error(err);
0616     return;
0617   }
0618
0619 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 133, 100)
0620   AVPacket avpacket, avpacketTemp;
0621   AVPacket* packet = &avpacket;
0622   AVPacket* packetTemp = &avpacketTemp;
0623   ::av_init_packet(packet);
0624   ::av_init_packet(packetTemp);
0625 #else
0626   AVPacket* packet = ::av_packet_alloc();
0627   AVPacket* packetTemp = ::av_packet_alloc();
0628 #endif
0629
0630   constexpr int MAX_LENGTH = 120;
0631   int remaining = MAX_LENGTH * codec.channels() * codec.sampleRate();
0632   emit started(codec.sampleRate(), codec.channels());
0633
0634   while (remaining > 0 && err == FingerprintCalculator::Ok) {
0635     Packet pkt(packet);
0636     if (!format.readFrame(pkt))
0637       break;
0638
0639     if (pkt.streamIndex() == format.streamIndex()) {
0640       packetTemp->data = packet->data;
0641       packetTemp->size = packet->size;
0642
0643       while (packetTemp->size > 0) {
0644         int bufferSize = BUFFER_SIZE;
0645         int consumed = codec.decode(m_buffer1, &bufferSize, packetTemp);
0646
0647         if (consumed < 0) {
0648           break;
0649         }
0650
0651         packetTemp->data += consumed;
0652         packetTemp->size -= consumed;
0653
0654         if (bufferSize <= 0 || bufferSize > BUFFER_SIZE) {
0655           continue;
0656         }
0657
0658         int16_t *buffer = converter.convert(codec, m_buffer1, m_buffer2, bufferSize);
0659         if (!buffer)
0660           break;
0661
0662         int length = qMin(remaining, bufferSize / 2);
0663         emit bufferReady(QByteArray(reinterpret_cast<char*>(buffer), length * 2));
0664         if (isStopped()) {
0665           err = FingerprintCalculator::FingerprintCalculationFailed;
0666           break;
0667         }
0668
0669         remaining -= length;
0670         if (remaining <= 0) {
0671           break;
0672         }
0673       }
0674     }
0675   }
0676 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(58, 133, 100)
0677   ::av_packet_free(&packet);
0678   ::av_packet_free(&packetTemp);
0679 #endif
0680   if (err != FingerprintCalculator::Ok) {
0681     emit error(err);
0682   } else {
0683     emit finished(duration);
0684   }
0685 }
0686
0687
0688 /**
0689  * Create concrete fingerprint decoder.
0690  * @param parent parent object
0691  * @return fingerprint decoder instance.
0692  * @remarks This static method will be implemented by the concrete
0693  * fingerprint decoder which is used.
0694  */
0695 AbstractFingerprintDecoder*
0696 AbstractFingerprintDecoder::createFingerprintDecoder(QObject* parent) {
0697   return new FFmpegFingerprintDecoder(parent);
0698 }