File indexing completed on 2024-05-19 04:56:24
0001 /** 0002 * \file ffmpegfingerprintdecoder.cpp 0003 * Chromaprint fingerprint decoder using FFmpeg. 0004 * 0005 * \b Project: Kid3 0006 * \author Urs Fleisch 0007 * \date 15 Feb 2013 0008 * 0009 * Copyright (C) 2013-2024 Urs Fleisch 0010 * 0011 * This file is part of Kid3. 0012 * 0013 * Kid3 is free software; you can redistribute it and/or modify 0014 * it under the terms of the GNU General Public License as published by 0015 * the Free Software Foundation; either version 2 of the License, or 0016 * (at your option) any later version. 0017 * 0018 * Kid3 is distributed in the hope that it will be useful, 0019 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0021 * GNU General Public License for more details. 0022 * 0023 * You should have received a copy of the GNU General Public License 0024 * along with this program. If not, see <http://www.gnu.org/licenses/>. 0025 */ 0026 0027 /** Needed for UINT64_C, INT64_C macros used by libav includes. */ 0028 #define __STDC_CONSTANT_MACROS 0029 #include "ffmpegfingerprintdecoder.h" 0030 #include "acoustidconfig.h" 0031 0032 #include <cstdint> 0033 #include <cstdio> 0034 extern "C" { 0035 #include <libavcodec/avcodec.h> 0036 #include <libavformat/avformat.h> 0037 0038 #ifdef HAVE_AVRESAMPLE 0039 #include <libavresample/avresample.h> 0040 #include <libavutil/opt.h> 0041 #elif defined HAVE_SWRESAMPLE 0042 #include <libswresample/swresample.h> 0043 #elif defined HAVE_AV_AUDIO_CONVERT 0044 #include <libavutil/audioconvert.h> 0045 #include <libavutil/samplefmt.h> 0046 0047 /* 0048 * Declarations taken from "ffmpeg/audioconvert.h", "ffmpeg/samplefmt.h". 0049 */ 0050 //! @cond 0051 struct AVAudioConvert; 0052 typedef struct AVAudioConvert AVAudioConvert; 0053 0054 AVAudioConvert *av_audio_convert_alloc(enum AVSampleFormat out_fmt, int out_channels, 0055 enum AVSampleFormat in_fmt, int in_channels, 0056 const float *matrix, int flags); 0057 void av_audio_convert_free(AVAudioConvert *ctx); 0058 int av_audio_convert(AVAudioConvert *ctx, 0059 void * const out[6], const int out_stride[6], 0060 const void * const in[6], const int in_stride[6], int len); 0061 //! @endcond 0062 #endif 0063 } 0064 #include <QFile> 0065 #include "fingerprintcalculator.h" 0066 0067 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(52, 94, 1) 0068 #define AV_SAMPLE_FMT_S16 SAMPLE_FMT_S16 0069 #define AVMEDIA_TYPE_AUDIO CODEC_TYPE_AUDIO 0070 #endif 0071 0072 /** Bytes needed for 1 second of 48khz 32bit audio. */ 0073 #ifdef AVCODEC_MAX_AUDIO_FRAME_SIZE 0074 #define MAX_AUDIO_FRAME_SIZE AVCODEC_MAX_AUDIO_FRAME_SIZE 0075 #else 0076 #define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio 0077 #endif 0078 0079 namespace { 0080 constexpr int BUFFER_SIZE = MAX_AUDIO_FRAME_SIZE * 2; 0081 0082 /* 0083 * The following classes are used to benefit from the C++ 0084 * "Resource Acquisition Is Initialization" (RAII) idiom when dealing with 0085 * AV resources. 0086 */ 0087 0088 class Packet { 0089 public: 0090 explicit Packet(AVPacket* packet) : m_ptr(packet) { 0091 } 0092 0093 AVPacket* data() { return m_ptr; } 0094 0095 ~Packet() { 0096 if (m_ptr && m_ptr->data) { 0097 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(57, 8, 0) 0098 ::av_free_packet(m_ptr); 0099 #else 0100 ::av_packet_unref(m_ptr); 0101 #endif 0102 } 0103 } 0104 0105 Packet(const Packet&) = delete; 0106 Packet& operator=(const Packet&) = delete; 0107 0108 int streamIndex() const { return m_ptr ? m_ptr->stream_index : -1; } 0109 0110 private: 0111 AVPacket* m_ptr; 0112 }; 0113 0114 class Codec { 0115 public: 0116 Codec() : m_ptr(nullptr), m_impl(nullptr), m_frame(nullptr), m_opened(false) { 0117 } 0118 0119 ~Codec() { 0120 if (m_frame) 0121 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(54, 28, 0) 0122 ::av_freep(&m_frame); 0123 #elif LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55, 28, 1) 0124 ::avcodec_free_frame(&m_frame); 0125 #else 0126 ::av_frame_free(&m_frame); 0127 #endif 0128 if (m_opened) 0129 ::avcodec_close(m_ptr); 0130 #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 33, 100) 0131 if (m_ptr) 0132 ::avcodec_free_context(&m_ptr); 0133 #endif 0134 } 0135 0136 Codec(const Codec&) = delete; 0137 Codec& operator=(const Codec&) = delete; 0138 0139 bool open() { 0140 m_opened = false; 0141 if (m_ptr && m_impl) { 0142 m_opened = 0143 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 5, 0) 0144 ::avcodec_open(m_ptr, m_impl) >= 0 0145 #else 0146 ::avcodec_open2(m_ptr, m_impl, nullptr) >= 0 0147 #endif 0148 ; 0149 } 0150 return m_opened; 0151 } 0152 0153 int channels() const { 0154 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 37, 100) 0155 return m_ptr->channels; 0156 #else 0157 return m_ptr->ch_layout.nb_channels; 0158 #endif 0159 } 0160 0161 AVSampleFormat sampleFormat() const { return m_ptr->sample_fmt; } 0162 0163 int sampleRate() const { return m_ptr->sample_rate; } 0164 0165 #if defined HAVE_AVRESAMPLE || defined HAVE_SWRESAMPLE 0166 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 37, 100) 0167 uint64_t channelLayout() const { return m_ptr->channel_layout; } 0168 #else 0169 const AVChannelLayout* channelLayout() const { return &m_ptr->ch_layout; } 0170 #endif 0171 #endif 0172 0173 int decode(int16_t* samples, int* frameSize, AVPacket* pkt) { 0174 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(52, 23, 0) 0175 return ::avcodec_decode_audio2(m_ptr, 0176 samples, frameSize, pkt->data, pkt->size); 0177 #elif LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 25, 0) 0178 return ::avcodec_decode_audio3(m_ptr, 0179 samples, frameSize, pkt); 0180 #else 0181 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55, 28, 1) 0182 if (!m_frame) 0183 m_frame = ::avcodec_alloc_frame(); 0184 ::avcodec_get_frame_defaults(m_frame); 0185 #else 0186 if (!m_frame) 0187 m_frame = ::av_frame_alloc(); 0188 ::av_frame_unref(m_frame); 0189 #endif 0190 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(57, 37, 100) 0191 int decoded = 0; 0192 int len = ::avcodec_decode_audio4(m_ptr, m_frame, &decoded, pkt); 0193 if (len >= 0 && decoded) { 0194 int planar = ::av_sample_fmt_is_planar(m_ptr->sample_fmt); 0195 int planeSize; 0196 int dataSize = ::av_samples_get_buffer_size(&planeSize, m_ptr->channels, 0197 m_frame->nb_samples, m_ptr->sample_fmt, 1); 0198 if (*frameSize < dataSize) 0199 return -1; 0200 ::memcpy(samples, m_frame->extended_data[0], planeSize); 0201 if (planar && m_ptr->channels > 1) { 0202 uint8_t* out = reinterpret_cast<uint8_t*>(samples) + planeSize; 0203 for (int ch = 1; ch < m_ptr->channels; ++ch) { 0204 ::memcpy(out, m_frame->extended_data[ch], planeSize); 0205 out += planeSize; 0206 } 0207 } 0208 *frameSize = dataSize; 0209 } else { 0210 *frameSize = 0; 0211 } 0212 return len; 0213 #else 0214 if (::avcodec_send_packet(m_ptr, pkt) == 0 && 0215 ::avcodec_receive_frame(m_ptr, m_frame) == 0) { 0216 int planar = ::av_sample_fmt_is_planar(m_ptr->sample_fmt); 0217 int planeSize; 0218 int dataSize = ::av_samples_get_buffer_size(&planeSize, channels(), 0219 m_frame->nb_samples, m_ptr->sample_fmt, 1); 0220 if (*frameSize < dataSize) 0221 return -1; 0222 ::memcpy(samples, m_frame->extended_data[0], planeSize); 0223 if (planar && channels() > 1) { 0224 uint8_t* out = reinterpret_cast<uint8_t*>(samples) + planeSize; 0225 for (int ch = 1; ch < channels(); ++ch) { 0226 ::memcpy(out, m_frame->extended_data[ch], planeSize); 0227 out += planeSize; 0228 } 0229 } 0230 *frameSize = dataSize; 0231 return pkt->size; 0232 } 0233 *frameSize = 0; 0234 return -1; 0235 #endif 0236 #endif 0237 } 0238 0239 private: 0240 friend class Format; 0241 friend class Converter; 0242 AVCodecContext* m_ptr; 0243 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 0, 0) 0244 AVCodec* m_impl; 0245 #else 0246 const AVCodec* m_impl; 0247 #endif 0248 AVFrame* m_frame; 0249 bool m_opened; 0250 }; 0251 0252 class Format { 0253 public: 0254 explicit Format(const char* fileName) : m_ptr(nullptr), m_streamIndex(-1), m_hasError(false) { 0255 if ( 0256 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 2, 0) 0257 ::av_open_input_file(&m_ptr, fileName, 0, 0, 0) != 0 0258 #else 0259 ::avformat_open_input(&m_ptr, fileName, nullptr, nullptr) != 0 0260 #endif 0261 || 0262 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 5, 0) 0263 ::av_find_stream_info(m_ptr) < 0 0264 #else 0265 ::avformat_find_stream_info(m_ptr, nullptr) < 0 0266 #endif 0267 ) 0268 m_hasError = true; 0269 } 0270 0271 ~Format() { 0272 if (m_ptr) 0273 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 21, 0) 0274 ::av_close_input_file(m_ptr); 0275 #else 0276 ::avformat_close_input(&m_ptr); 0277 #endif 0278 } 0279 0280 Format(const Format&) = delete; 0281 Format& operator=(const Format&) = delete; 0282 0283 bool hasError() const { return m_hasError; } 0284 0285 AVStream* findAudioStream(Codec* codec) { 0286 AVStream* stream = nullptr; 0287 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(52, 91, 0) 0288 for (unsigned i = 0; i < m_ptr->nb_streams; ++i) { 0289 codec->m_ptr = m_ptr->streams[i]->codec; 0290 if (codec->m_ptr && codec->m_ptr->codec_type == AVMEDIA_TYPE_AUDIO) { 0291 stream = m_ptr->streams[i]; 0292 m_streamIndex = i; 0293 break; 0294 } 0295 } 0296 codec->m_impl = ::avcodec_find_decoder(codec->m_ptr->codec_id); 0297 #else 0298 m_streamIndex = ::av_find_best_stream(m_ptr, AVMEDIA_TYPE_AUDIO, -1, -1, 0299 &codec->m_impl, 0); 0300 if (m_streamIndex >= 0) { 0301 stream = m_ptr->streams[m_streamIndex]; 0302 } 0303 if (stream) { 0304 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(57, 33, 100) 0305 codec->m_ptr = stream->codec; 0306 #else 0307 codec->m_ptr = ::avcodec_alloc_context3(codec->m_impl); 0308 if (codec->m_ptr) { 0309 if (::avcodec_parameters_to_context(codec->m_ptr, stream->codecpar) < 0) 0310 ::avcodec_free_context(&codec->m_ptr); 0311 } 0312 #endif 0313 } 0314 #endif 0315 if (codec->m_ptr) { 0316 codec->m_ptr->request_sample_fmt = AV_SAMPLE_FMT_S16; 0317 } 0318 return stream; 0319 } 0320 0321 int64_t duration() const { return m_ptr ? m_ptr->duration : AV_NOPTS_VALUE; } 0322 0323 int streamIndex() const { return m_streamIndex; } 0324 0325 bool readFrame(Packet& packet) { 0326 return ::av_read_frame(m_ptr, packet.data()) >= 0; 0327 } 0328 0329 private: 0330 AVFormatContext* m_ptr; 0331 int m_streamIndex; 0332 bool m_hasError; 0333 }; 0334 0335 #if defined HAVE_AVRESAMPLE || defined HAVE_SWRESAMPLE 0336 class Converter { 0337 public: 0338 Converter() : m_ptr(nullptr), m_maxDstNumSamples(0), m_isOpen(false) { 0339 m_dstData[0] = nullptr; 0340 } 0341 0342 ~Converter() { 0343 if (m_dstData[0]) { 0344 ::av_freep(&m_dstData[0]); 0345 } 0346 if (m_ptr) { 0347 #ifdef HAVE_AVRESAMPLE 0348 if (m_isOpen) { 0349 ::avresample_close(m_ptr); 0350 } 0351 ::avresample_free(&m_ptr); 0352 #elif defined HAVE_SWRESAMPLE 0353 ::swr_free(&m_ptr); 0354 #endif 0355 } 0356 } 0357 0358 Converter(const Converter&) = delete; 0359 Converter& operator=(const Converter&) = delete; 0360 0361 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 37, 100) 0362 bool createForCodec(const Codec& codecCtx) { 0363 int64_t channelLayout = codecCtx.channelLayout(); 0364 if (!channelLayout) { 0365 channelLayout = ::av_get_default_channel_layout(codecCtx.channels()); 0366 } 0367 #ifdef HAVE_AVRESAMPLE 0368 if ((m_ptr = ::avresample_alloc_context()) != 0) { 0369 ::av_opt_set_int(m_ptr, "in_channel_layout", channelLayout, 0); 0370 ::av_opt_set_int(m_ptr, "in_sample_fmt", codecCtx.sampleFormat(), 0); 0371 ::av_opt_set_int(m_ptr, "in_sample_rate", codecCtx.sampleRate(), 0); 0372 ::av_opt_set_int(m_ptr, "out_channel_layout", channelLayout, 0); 0373 ::av_opt_set_int(m_ptr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); 0374 ::av_opt_set_int(m_ptr, "out_sample_rate", codecCtx.sampleRate(), 0); 0375 m_isOpen = ::avresample_open(m_ptr) >= 0; 0376 return m_isOpen; 0377 } 0378 #elif defined HAVE_SWRESAMPLE 0379 if ((m_ptr = ::swr_alloc_set_opts( 0380 nullptr, channelLayout, AV_SAMPLE_FMT_S16, codecCtx.sampleRate(), 0381 channelLayout, codecCtx.sampleFormat(), codecCtx.sampleRate(), 0382 0, nullptr)) != nullptr) { 0383 m_isOpen = ::swr_init(m_ptr) >= 0; 0384 return m_isOpen; 0385 } 0386 #endif 0387 return false; 0388 } 0389 #else 0390 bool createForCodec(const Codec& codecCtx) { 0391 AVChannelLayout channelLayout; 0392 if (const AVChannelLayout* codecChannelLayout = codecCtx.channelLayout(); 0393 ::av_channel_layout_check(codecChannelLayout)) { 0394 ::av_channel_layout_copy(&channelLayout, codecChannelLayout); 0395 } else { 0396 ::av_channel_layout_default(&channelLayout, codecCtx.channels()); 0397 } 0398 m_ptr = nullptr; 0399 if (::swr_alloc_set_opts2( 0400 &m_ptr, &channelLayout, AV_SAMPLE_FMT_S16, codecCtx.sampleRate(), 0401 &channelLayout, codecCtx.sampleFormat(), codecCtx.sampleRate(), 0402 0, nullptr) == 0) { 0403 m_isOpen = ::swr_init(m_ptr) >= 0; 0404 ::av_channel_layout_uninit(&channelLayout); 0405 return m_isOpen; 0406 } 0407 ::av_channel_layout_uninit(&channelLayout); 0408 return false; 0409 } 0410 #endif 0411 0412 int16_t* convert(const Codec& codecCtx, 0413 int16_t* buffer1, int16_t* buffer2, 0414 int& bufferSize) { 0415 if (m_ptr) { 0416 int numSamplesOut; 0417 int16_t* result; 0418 if (codecCtx.m_frame) { 0419 if (codecCtx.m_frame->nb_samples > m_maxDstNumSamples) { 0420 ::av_freep(&m_dstData[0]); 0421 int dstLinesize = 0; 0422 if (::av_samples_alloc(m_dstData, &dstLinesize, codecCtx.channels(), 0423 codecCtx.m_frame->nb_samples, AV_SAMPLE_FMT_S16, 1) < 0) { 0424 return nullptr; 0425 } 0426 m_maxDstNumSamples = codecCtx.m_frame->nb_samples; 0427 } 0428 #ifdef HAVE_AVRESAMPLE 0429 #if LIBAVRESAMPLE_VERSION_INT < AV_VERSION_INT(1, 0, 0) 0430 numSamplesOut = ::avresample_convert( 0431 m_ptr, reinterpret_cast<void**>(m_dstData), 0, 0432 codecCtx.m_frame->nb_samples, 0433 reinterpret_cast<void**>(codecCtx.m_frame->data), 0, 0434 codecCtx.m_frame->nb_samples); 0435 #else 0436 numSamplesOut = ::avresample_convert( 0437 m_ptr, m_dstData, 0, codecCtx.m_frame->nb_samples, 0438 reinterpret_cast<uint8_t**>(codecCtx.m_frame->data), 0, 0439 codecCtx.m_frame->nb_samples); 0440 #endif 0441 #elif defined HAVE_SWRESAMPLE 0442 numSamplesOut = ::swr_convert( 0443 m_ptr, m_dstData, codecCtx.m_frame->nb_samples, 0444 const_cast<const uint8_t**>(reinterpret_cast<uint8_t**>( 0445 codecCtx.m_frame->data)), 0446 codecCtx.m_frame->nb_samples); 0447 #endif 0448 result = reinterpret_cast<int16_t*>(m_dstData[0]); 0449 } else { 0450 int bytesPerSample = ::av_get_bytes_per_sample(codecCtx.sampleFormat()); 0451 int numSamplesIn = bytesPerSample != 0 ? bufferSize / bytesPerSample : 0; 0452 int linesizeIn; 0453 int numChannels = codecCtx.channels(); 0454 ::av_samples_get_buffer_size(&linesizeIn, numChannels, 0455 numChannels != 0 ? numSamplesIn / numChannels : 0, 0456 codecCtx.sampleFormat(), 0); 0457 #ifdef HAVE_AVRESAMPLE 0458 #if LIBAVRESAMPLE_VERSION_INT < AV_VERSION_INT(1, 0, 0) 0459 numSamplesOut = ::avresample_convert( 0460 m_ptr, reinterpret_cast<void**>(&buffer2), 0, BUFFER_SIZE, 0461 reinterpret_cast<void**>(&buffer1), linesizeIn, numSamplesIn); 0462 #else 0463 numSamplesOut = ::avresample_convert( 0464 m_ptr, reinterpret_cast<uint8_t**>(&buffer2), 0, BUFFER_SIZE, 0465 reinterpret_cast<uint8_t**>(&buffer1), linesizeIn, numSamplesIn); 0466 #endif 0467 #elif defined HAVE_SWRESAMPLE 0468 numSamplesOut = ::swr_convert( 0469 m_ptr, reinterpret_cast<uint8_t**>(&buffer2), BUFFER_SIZE, 0470 const_cast<const uint8_t**>(reinterpret_cast<uint8_t**>(&buffer1)), 0471 numSamplesIn); 0472 #endif 0473 result = buffer2; 0474 } 0475 if (numSamplesOut < 0) { 0476 return nullptr; 0477 } 0478 bufferSize = ::av_samples_get_buffer_size(nullptr, codecCtx.channels(), 0479 numSamplesOut, AV_SAMPLE_FMT_S16, 1); 0480 return result; 0481 } 0482 return buffer1; 0483 } 0484 0485 private: 0486 #ifdef HAVE_AVRESAMPLE 0487 AVAudioResampleContext* m_ptr; 0488 #elif defined HAVE_SWRESAMPLE 0489 SwrContext* m_ptr; 0490 #endif 0491 uint8_t* m_dstData[1]; 0492 int m_maxDstNumSamples; 0493 bool m_isOpen; 0494 }; 0495 #elif defined HAVE_AV_AUDIO_CONVERT 0496 class Converter { 0497 public: 0498 Converter() : m_ptr(0) {} 0499 0500 ~Converter() { 0501 if (m_ptr) 0502 ::av_audio_convert_free(m_ptr); 0503 } 0504 0505 bool createForCodec(const Codec& codecCtx) { 0506 m_ptr = ::av_audio_convert_alloc(AV_SAMPLE_FMT_S16, codecCtx.channels(), 0507 codecCtx.sampleFormat(), codecCtx.channels(), 0, 0); 0508 return m_ptr != 0; 0509 } 0510 0511 int16_t* convert(const Codec& codecCtx, 0512 int16_t* buffer1, int16_t* buffer2, 0513 int& bufferSize) { 0514 if (m_ptr) { 0515 const void *ibuf[6] = { buffer1 }; 0516 void *obuf[6] = { buffer2 }; 0517 #if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(51, 4, 0) 0518 int istride[6] = { ::av_get_bits_per_sample_format(codecCtx.sampleFormat()) / 8 }; 0519 #else 0520 int istride[6] = { ::av_get_bytes_per_sample(codecCtx.sampleFormat()) }; 0521 #endif 0522 int ostride[6] = { 2 }; 0523 int len = istride[0] != 0 ? bufferSize / istride[0] : 0; 0524 if (::av_audio_convert(m_ptr, obuf, ostride, ibuf, istride, len) < 0) { 0525 return 0; 0526 } 0527 bufferSize = len * ostride[0]; 0528 return buffer2; 0529 } else { 0530 return buffer1; 0531 } 0532 } 0533 0534 private: 0535 AVAudioConvert* m_ptr; 0536 }; 0537 #else 0538 class Converter { 0539 public: 0540 bool createForCodec(const Codec&) { return false; } 0541 int16_t* convert(const Codec&, int16_t* buffer1, int16_t*, int&) { 0542 return buffer1; 0543 } 0544 }; 0545 #endif 0546 0547 } 0548 0549 0550 FFmpegFingerprintDecoder::FFmpegFingerprintDecoder(QObject* parent) 0551 : AbstractFingerprintDecoder(parent) 0552 { 0553 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100) 0554 ::av_register_all(); 0555 #endif 0556 ::av_log_set_level(AV_LOG_ERROR); 0557 0558 m_buffer1 = static_cast<qint16*>(::av_malloc(BUFFER_SIZE + 16)); 0559 m_buffer2 = static_cast<qint16*>(::av_malloc(BUFFER_SIZE + 16)); 0560 } 0561 0562 FFmpegFingerprintDecoder::~FFmpegFingerprintDecoder() 0563 { 0564 ::av_free(m_buffer1); 0565 ::av_free(m_buffer2); 0566 } 0567 0568 void FFmpegFingerprintDecoder::start(const QString& filePath) 0569 { 0570 AbstractFingerprintDecoder::start(filePath); 0571 /* 0572 * The code here is based on fpcalc.c from chromaprint-0.6/examples. 0573 */ 0574 FingerprintCalculator::Error err = FingerprintCalculator::Ok; 0575 int duration = 0; 0576 QByteArray fileName(QFile::encodeName(filePath)); 0577 Format format(fileName.constData()); 0578 if (format.hasError()) { 0579 err = FingerprintCalculator::NoStreamFound; 0580 emit error(err); 0581 return; 0582 } 0583 0584 Codec codec; 0585 AVStream* stream = format.findAudioStream(&codec); 0586 if (!stream) { 0587 err = FingerprintCalculator::NoStreamFound; 0588 emit error(err); 0589 return; 0590 } 0591 0592 if (!codec.open() || codec.channels() <= 0) { 0593 err = FingerprintCalculator::NoCodecFound; 0594 emit error(err); 0595 return; 0596 } 0597 0598 Converter converter; 0599 if (codec.sampleFormat() != AV_SAMPLE_FMT_S16) { 0600 if (!converter.createForCodec(codec)) { 0601 err = FingerprintCalculator::NoConverterFound; 0602 emit error(err); 0603 return; 0604 } 0605 } 0606 0607 if (stream->duration != AV_NOPTS_VALUE) { 0608 duration = stream->time_base.den != 0 0609 ? stream->time_base.num * stream->duration / stream->time_base.den 0610 : 0; 0611 } else if (format.duration() != AV_NOPTS_VALUE) { 0612 duration = format.duration() / AV_TIME_BASE; 0613 } else { 0614 err = FingerprintCalculator::NoStreamFound; 0615 emit error(err); 0616 return; 0617 } 0618 0619 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 133, 100) 0620 AVPacket avpacket, avpacketTemp; 0621 AVPacket* packet = &avpacket; 0622 AVPacket* packetTemp = &avpacketTemp; 0623 ::av_init_packet(packet); 0624 ::av_init_packet(packetTemp); 0625 #else 0626 AVPacket* packet = ::av_packet_alloc(); 0627 AVPacket* packetTemp = ::av_packet_alloc(); 0628 #endif 0629 0630 constexpr int MAX_LENGTH = 120; 0631 int remaining = MAX_LENGTH * codec.channels() * codec.sampleRate(); 0632 emit started(codec.sampleRate(), codec.channels()); 0633 0634 while (remaining > 0 && err == FingerprintCalculator::Ok) { 0635 Packet pkt(packet); 0636 if (!format.readFrame(pkt)) 0637 break; 0638 0639 if (pkt.streamIndex() == format.streamIndex()) { 0640 packetTemp->data = packet->data; 0641 packetTemp->size = packet->size; 0642 0643 while (packetTemp->size > 0) { 0644 int bufferSize = BUFFER_SIZE; 0645 int consumed = codec.decode(m_buffer1, &bufferSize, packetTemp); 0646 0647 if (consumed < 0) { 0648 break; 0649 } 0650 0651 packetTemp->data += consumed; 0652 packetTemp->size -= consumed; 0653 0654 if (bufferSize <= 0 || bufferSize > BUFFER_SIZE) { 0655 continue; 0656 } 0657 0658 int16_t *buffer = converter.convert(codec, m_buffer1, m_buffer2, bufferSize); 0659 if (!buffer) 0660 break; 0661 0662 int length = qMin(remaining, bufferSize / 2); 0663 emit bufferReady(QByteArray(reinterpret_cast<char*>(buffer), length * 2)); 0664 if (isStopped()) { 0665 err = FingerprintCalculator::FingerprintCalculationFailed; 0666 break; 0667 } 0668 0669 remaining -= length; 0670 if (remaining <= 0) { 0671 break; 0672 } 0673 } 0674 } 0675 } 0676 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(58, 133, 100) 0677 ::av_packet_free(&packet); 0678 ::av_packet_free(&packetTemp); 0679 #endif 0680 if (err != FingerprintCalculator::Ok) { 0681 emit error(err); 0682 } else { 0683 emit finished(duration); 0684 } 0685 } 0686 0687 0688 /** 0689 * Create concrete fingerprint decoder. 0690 * @param parent parent object 0691 * @return fingerprint decoder instance. 0692 * @remarks This static method will be implemented by the concrete 0693 * fingerprint decoder which is used. 0694 */ 0695 AbstractFingerprintDecoder* 0696 AbstractFingerprintDecoder::createFingerprintDecoder(QObject* parent) { 0697 return new FFmpegFingerprintDecoder(parent); 0698 }