threadimageio/video/videothumbdecoder_p.cpp

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2016-04-21
0007  * Description : video thumbnails extraction based on ffmpeg
0008  *
0009  * SPDX-FileCopyrightText: 2010      by Dirk Vanden Boer <dirk dot vdb at gmail dot com>
0010  * SPDX-FileCopyrightText: 2016-2018 by Maik Qualmann <metzpinguin at gmail dot com>
0011  * SPDX-FileCopyrightText: 2016-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0012  *
0013  * SPDX-License-Identifier: GPL-2.0-or-later
0014  *
0015  * ============================================================ */
0016
0017 #include "videothumbdecoder_p.h"
0018
0019 // Local includes
0020
0021 #include "digikam_debug.h"
0022
0023 namespace Digikam
0024 {
0025
0026 VideoThumbDecoder::Private::Private()
0027     : videoStream           (-1),
0028       pFormatContext        (nullptr),
0029       pVideoCodecContext    (nullptr),
0030       pVideoCodecParameters (nullptr),
0031       pVideoCodec           (nullptr),
0032       pVideoStream          (nullptr),
0033       pFrame                (nullptr),
0034       pFrameBuffer          (nullptr),
0035       pPacket               (nullptr),
0036       allowSeek             (true),
0037       initialized           (false),
0038       bufferSinkContext     (nullptr),
0039       bufferSourceContext   (nullptr),
0040       filterGraph           (nullptr),
0041       filterFrame           (nullptr),
0042       lastWidth             (0),
0043       lastHeight            (0),
0044       lastPixfmt            (AV_PIX_FMT_NONE)
0045 {
0046 }
0047
0048 VideoThumbDecoder::Private::~Private()
0049 {
0050 }
0051
0052 void VideoThumbDecoder::Private::createAVFrame(AVFrame** const avFrame,
0053                                           quint8** const frameBuffer,
0054                                           int width,
0055                                           int height,
0056                                           AVPixelFormat format)
0057 {
0058     *avFrame     = av_frame_alloc();
0059     int numBytes = av_image_get_buffer_size(format, width, height, 1);
0060     *frameBuffer = reinterpret_cast<quint8*>(av_malloc(numBytes));
0061
0062     av_image_fill_arrays((*avFrame)->data, (*avFrame)->linesize, *frameBuffer, format, width, height, 1);
0063 }
0064
0065 bool VideoThumbDecoder::Private::initializeVideo()
0066 {
0067     for (unsigned int i = 0 ; i < pFormatContext->nb_streams ; ++i)
0068     {
0069         if (pFormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
0070         {
0071             pVideoStream = pFormatContext->streams[i];
0072             videoStream  = i;
0073             break;
0074         }
0075     }
0076
0077     if (videoStream == -1)
0078     {
0079         qDebug(DIGIKAM_GENERAL_LOG) << "Could not find video stream";
0080
0081         return false;
0082     }
0083
0084     pVideoCodecParameters = pFormatContext->streams[videoStream]->codecpar;
0085     pVideoCodec           = avcodec_find_decoder(pVideoCodecParameters->codec_id);
0086
0087     if (pVideoCodec == nullptr)
0088     {
0089         // set to 0, otherwise avcodec_close(d->pVideoCodecContext) crashes
0090
0091         pVideoCodecContext = nullptr;
0092         qDebug(DIGIKAM_GENERAL_LOG) << "Video Codec not found";
0093
0094         return false;
0095     }
0096
0097     pVideoCodecContext = avcodec_alloc_context3(pVideoCodec);
0098     avcodec_parameters_to_context(pVideoCodecContext, pVideoCodecParameters);
0099
0100     if (avcodec_open2(pVideoCodecContext, pVideoCodec, nullptr) < 0)
0101     {
0102         qDebug(DIGIKAM_GENERAL_LOG) << "Could not open video codec";
0103
0104         return false;
0105     }
0106
0107     return true;
0108 }
0109
0110 bool VideoThumbDecoder::Private::decodeVideoPacket() const
0111 {
0112     if (pPacket->stream_index != videoStream)
0113     {
0114         return false;
0115     }
0116
0117     av_frame_unref(pFrame);
0118
0119     int frameFinished = 0;
0120
0121 #if LIBAVCODEC_VERSION_MAJOR < 53
0122
0123     int bytesDecoded = avcodec_decode_video(pVideoCodecContext,
0124                                             pFrame,
0125                                             &frameFinished,
0126                                             pPacket->data,
0127                                             pPacket->size);
0128 #else
0129
0130     int bytesDecoded = decodeVideoNew(pVideoCodecContext,
0131                                       pFrame,
0132                                       &frameFinished,
0133                                       pPacket);
0134
0135 #endif
0136
0137     if (bytesDecoded < 0)
0138     {
0139         qDebug(DIGIKAM_GENERAL_LOG) << "Failed to decode video frame: bytesDecoded < 0";
0140     }
0141
0142     return (frameFinished > 0);
0143 }
0144
0145 int VideoThumbDecoder::Private::decodeVideoNew(AVCodecContext* const avContext,
0146                                           AVFrame* const avFrame,
0147                                           int* gotFrame,
0148                                           AVPacket* const avPacket) const
0149 {
0150     int ret   = 0;
0151     *gotFrame = 0;
0152
0153     if (avPacket)
0154     {
0155         ret = avcodec_send_packet(avContext, avPacket);
0156
0157         // In particular, we don't expect AVERROR(EAGAIN), because we read all
0158         // decoded frames with avcodec_receive_frame() until done.
0159
0160         if (ret < 0)
0161         {
0162             return (ret == AVERROR_EOF ? 0 : ret);
0163         }
0164     }
0165
0166     ret = avcodec_receive_frame(avContext, avFrame);
0167
0168     if ((ret < 0) && (ret != AVERROR(EAGAIN)) && (ret != AVERROR_EOF))
0169     {
0170         return ret;
0171     }
0172
0173     if (ret >= 0)
0174     {
0175         *gotFrame = 1;
0176     }
0177
0178     return 0;
0179 }
0180
0181 bool VideoThumbDecoder::Private::getVideoPacket()
0182 {
0183     bool framesAvailable = true;
0184     bool frameDecoded    = false;
0185     int  attempts        = 0;
0186
0187     if (pPacket)
0188     {
0189         av_packet_unref(pPacket);
0190         delete pPacket;
0191     }
0192
0193     pPacket = new AVPacket();
0194
0195     while (framesAvailable &&
0196            !frameDecoded   &&
0197            (attempts++ < 1000))
0198     {
0199         framesAvailable = (av_read_frame(pFormatContext, pPacket) >= 0);
0200
0201         if (framesAvailable)
0202         {
0203             frameDecoded = (pPacket->stream_index == videoStream);
0204
0205             if (!frameDecoded)
0206             {
0207                 av_packet_unref(pPacket);
0208             }
0209         }
0210     }
0211
0212     return frameDecoded;
0213 }
0214
0215 void VideoThumbDecoder::Private::deleteFilterGraph()
0216 {
0217     if (filterGraph)
0218     {
0219         av_frame_free(&filterFrame);
0220         avfilter_graph_free(&filterGraph);
0221         filterGraph = nullptr;
0222     }
0223 }
0224
0225 bool VideoThumbDecoder::Private::initFilterGraph(enum AVPixelFormat pixfmt,
0226                                             int width, int height)
0227 {
0228     AVFilterInOut* inputs  = nullptr;
0229     AVFilterInOut* outputs = nullptr;
0230
0231     deleteFilterGraph();
0232     filterGraph            = avfilter_graph_alloc();
0233
0234     QByteArray arguments("buffer=");
0235     arguments             += "video_size=" + QByteArray::number(width)  + 'x' + QByteArray::number(height) + ':';
0236     arguments             += "pix_fmt="    + QByteArray::number(pixfmt) + ':';
0237     arguments             += "time_base=1/1:pixel_aspect=0/1[in];";
0238     arguments             += "[in]yadif[out];";
0239     arguments             += "[out]buffersink";
0240
0241     int ret = avfilter_graph_parse2(filterGraph, arguments.constData(), &inputs, &outputs);
0242
0243     if (ret < 0)
0244     {
0245         qWarning(DIGIKAM_GENERAL_LOG) << "Unable to parse filter graph";
0246
0247         return false;
0248     }
0249
0250     if (inputs || outputs)
0251     {
0252         return false;
0253     }
0254
0255     ret = avfilter_graph_config(filterGraph, nullptr);
0256
0257     if (ret < 0)
0258     {
0259         qWarning(DIGIKAM_GENERAL_LOG) << "Unable to validate filter graph";
0260
0261         return false;
0262     }
0263
0264     bufferSourceContext = avfilter_graph_get_filter(filterGraph, "Parsed_buffer_0");
0265     bufferSinkContext   = avfilter_graph_get_filter(filterGraph, "Parsed_buffersink_2");
0266
0267     if (!bufferSourceContext || !bufferSinkContext)
0268     {
0269         qWarning(DIGIKAM_GENERAL_LOG) << "Unable to get source or sink";
0270
0271         return false;
0272     }
0273
0274     filterFrame = av_frame_alloc();
0275     lastWidth   = width;
0276     lastHeight  = height;
0277     lastPixfmt  = pixfmt;
0278
0279     return true;
0280 }
0281
0282 bool VideoThumbDecoder::Private::processFilterGraph(AVFrame* const dst,
0283                                                const AVFrame* const src,
0284                                                enum AVPixelFormat pixfmt,
0285                                                int width, int height)
0286 {
0287     if (!filterGraph           ||
0288         (width  != lastWidth)  ||
0289         (height != lastHeight) ||
0290         (pixfmt != lastPixfmt))
0291     {
0292
0293         if (!initFilterGraph(pixfmt, width, height))
0294         {
0295             return false;
0296         }
0297     }
0298
0299     memcpy(filterFrame->data,     src->data,     sizeof(src->data));
0300     memcpy(filterFrame->linesize, src->linesize, sizeof(src->linesize));
0301
0302     filterFrame->width  = width;
0303     filterFrame->height = height;
0304     filterFrame->format = pixfmt;
0305     int ret             = av_buffersrc_add_frame(bufferSourceContext, filterFrame);
0306
0307     if (ret < 0)
0308     {
0309         return false;
0310     }
0311
0312     ret = av_buffersink_get_frame(bufferSinkContext, filterFrame);
0313
0314     if (ret < 0)
0315     {
0316         return false;
0317     }
0318
0319     av_image_copy(dst->data, dst->linesize, (const uint8_t**)filterFrame->data, filterFrame->linesize, pixfmt, width, height);
0320     av_frame_unref(filterFrame);
0321
0322     return true;
0323 }
0324
0325 void VideoThumbDecoder::Private::convertAndScaleFrame(AVPixelFormat format,
0326                                                  int scaledSize,
0327                                                  bool maintainAspectRatio,
0328                                                  int& scaledWidth,
0329                                                  int& scaledHeight)
0330 {
0331     AVPixelFormat pVideoCodecContextPixFormat;
0332     pVideoCodecContextPixFormat = pVideoCodecContext->pix_fmt;
0333
0334 #if LIBAVUTIL_VERSION_MAJOR > 55
0335
0336     switch (pVideoCodecContextPixFormat)
0337     {
0338         case AV_PIX_FMT_YUVJ420P:
0339         {
0340             pVideoCodecContextPixFormat = AV_PIX_FMT_YUV420P;
0341             break;
0342         }
0343
0344         case AV_PIX_FMT_YUVJ422P:
0345         {
0346             pVideoCodecContextPixFormat = AV_PIX_FMT_YUV422P;
0347             break;
0348         }
0349
0350         case AV_PIX_FMT_YUVJ444P:
0351         {
0352             pVideoCodecContextPixFormat = AV_PIX_FMT_YUV444P;
0353             break;
0354         }
0355
0356         case AV_PIX_FMT_YUVJ440P:
0357         {
0358             pVideoCodecContextPixFormat = AV_PIX_FMT_YUV440P;
0359             break;
0360         }
0361
0362         default:
0363         {
0364             break;
0365         }
0366     }
0367
0368 #endif
0369
0370     calculateDimensions(scaledSize, maintainAspectRatio, scaledWidth, scaledHeight);
0371
0372     SwsContext* const scaleContext = sws_getContext(pVideoCodecContext->width,
0373                                                     pVideoCodecContext->height,
0374                                                     pVideoCodecContextPixFormat,
0375                                                     scaledWidth,
0376                                                     scaledHeight,
0377                                                     format,
0378                                                     SWS_BICUBIC,
0379                                                     nullptr,
0380                                                     nullptr,
0381                                                     nullptr);
0382
0383     if (!scaleContext)
0384     {
0385         qDebug(DIGIKAM_GENERAL_LOG) << "Failed to create resize context";
0386         return;
0387     }
0388
0389     AVFrame* convertedFrame       = nullptr;
0390     uint8_t* convertedFrameBuffer = nullptr;
0391
0392     createAVFrame(&convertedFrame,
0393                   &convertedFrameBuffer,
0394                   scaledWidth,
0395                   scaledHeight,
0396                   format);
0397
0398     sws_scale(scaleContext,
0399               pFrame->data,
0400               pFrame->linesize,
0401               0,
0402               pVideoCodecContext->height,
0403               convertedFrame->data,
0404               convertedFrame->linesize);
0405
0406     sws_freeContext(scaleContext);
0407
0408     av_frame_free(&pFrame);
0409     av_free(pFrameBuffer);
0410
0411     pFrame       = convertedFrame;
0412     pFrameBuffer = convertedFrameBuffer;
0413 }
0414
0415 void VideoThumbDecoder::Private::calculateDimensions(int squareSize,
0416                                                 bool maintainAspectRatio,
0417                                                 int& destWidth,
0418                                                 int& destHeight)
0419 {
0420     if (!maintainAspectRatio)
0421     {
0422         destWidth  = squareSize;
0423         destHeight = squareSize;
0424     }
0425     else
0426     {
0427         int srcWidth            = pVideoCodecContext->width;
0428         int srcHeight           = pVideoCodecContext->height;
0429         int ascpectNominator    = pVideoCodecContext->sample_aspect_ratio.num;
0430         int ascpectDenominator  = pVideoCodecContext->sample_aspect_ratio.den;
0431
0432         if ((ascpectNominator != 0) && (ascpectDenominator != 0))
0433         {
0434             srcWidth = srcWidth * ascpectNominator / ascpectDenominator;
0435         }
0436
0437         if (srcWidth > srcHeight)
0438         {
0439             destWidth  = squareSize;
0440             destHeight = static_cast<int>(static_cast<float>(squareSize) / srcWidth * srcHeight);
0441         }
0442         else
0443         {
0444             destWidth  = static_cast<int>(static_cast<float>(squareSize) / srcHeight * srcWidth);
0445             destHeight = squareSize;
0446         }
0447     }
0448 }
0449
0450 } // namespace Digikam