File indexing completed on 2025-04-27 03:58:10

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2016-04-21
0007  * Description : video thumbnails extraction based on ffmpeg
0008  *
0009  * SPDX-FileCopyrightText: 2010      by Dirk Vanden Boer <dirk dot vdb at gmail dot com>
0010  * SPDX-FileCopyrightText: 2016-2018 by Maik Qualmann <metzpinguin at gmail dot com>
0011  * SPDX-FileCopyrightText: 2016-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0012  *
0013  * SPDX-License-Identifier: GPL-2.0-or-later
0014  *
0015  * ============================================================ */
0016 
0017 #include "videothumbdecoder_p.h"
0018 
0019 // Local includes
0020 
0021 #include "digikam_debug.h"
0022 
0023 namespace Digikam
0024 {
0025 
0026 VideoThumbDecoder::Private::Private()
0027     : videoStream           (-1),
0028       pFormatContext        (nullptr),
0029       pVideoCodecContext    (nullptr),
0030       pVideoCodecParameters (nullptr),
0031       pVideoCodec           (nullptr),
0032       pVideoStream          (nullptr),
0033       pFrame                (nullptr),
0034       pFrameBuffer          (nullptr),
0035       pPacket               (nullptr),
0036       allowSeek             (true),
0037       initialized           (false),
0038       bufferSinkContext     (nullptr),
0039       bufferSourceContext   (nullptr),
0040       filterGraph           (nullptr),
0041       filterFrame           (nullptr),
0042       lastWidth             (0),
0043       lastHeight            (0),
0044       lastPixfmt            (AV_PIX_FMT_NONE)
0045 {
0046 }
0047 
0048 VideoThumbDecoder::Private::~Private()
0049 {
0050 }
0051 
0052 void VideoThumbDecoder::Private::createAVFrame(AVFrame** const avFrame,
0053                                           quint8** const frameBuffer,
0054                                           int width,
0055                                           int height,
0056                                           AVPixelFormat format)
0057 {
0058     *avFrame     = av_frame_alloc();
0059     int numBytes = av_image_get_buffer_size(format, width, height, 1);
0060     *frameBuffer = reinterpret_cast<quint8*>(av_malloc(numBytes));
0061 
0062     av_image_fill_arrays((*avFrame)->data, (*avFrame)->linesize, *frameBuffer, format, width, height, 1);
0063 }
0064 
0065 bool VideoThumbDecoder::Private::initializeVideo()
0066 {
0067     for (unsigned int i = 0 ; i < pFormatContext->nb_streams ; ++i)
0068     {
0069         if (pFormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
0070         {
0071             pVideoStream = pFormatContext->streams[i];
0072             videoStream  = i;
0073             break;
0074         }
0075     }
0076 
0077     if (videoStream == -1)
0078     {
0079         qDebug(DIGIKAM_GENERAL_LOG) << "Could not find video stream";
0080 
0081         return false;
0082     }
0083 
0084     pVideoCodecParameters = pFormatContext->streams[videoStream]->codecpar;
0085     pVideoCodec           = avcodec_find_decoder(pVideoCodecParameters->codec_id);
0086 
0087     if (pVideoCodec == nullptr)
0088     {
0089         // set to 0, otherwise avcodec_close(d->pVideoCodecContext) crashes
0090 
0091         pVideoCodecContext = nullptr;
0092         qDebug(DIGIKAM_GENERAL_LOG) << "Video Codec not found";
0093 
0094         return false;
0095     }
0096 
0097     pVideoCodecContext = avcodec_alloc_context3(pVideoCodec);
0098     avcodec_parameters_to_context(pVideoCodecContext, pVideoCodecParameters);
0099 
0100     if (avcodec_open2(pVideoCodecContext, pVideoCodec, nullptr) < 0)
0101     {
0102         qDebug(DIGIKAM_GENERAL_LOG) << "Could not open video codec";
0103 
0104         return false;
0105     }
0106 
0107     return true;
0108 }
0109 
0110 bool VideoThumbDecoder::Private::decodeVideoPacket() const
0111 {
0112     if (pPacket->stream_index != videoStream)
0113     {
0114         return false;
0115     }
0116 
0117     av_frame_unref(pFrame);
0118 
0119     int frameFinished = 0;
0120 
0121 #if LIBAVCODEC_VERSION_MAJOR < 53
0122 
0123     int bytesDecoded = avcodec_decode_video(pVideoCodecContext,
0124                                             pFrame,
0125                                             &frameFinished,
0126                                             pPacket->data,
0127                                             pPacket->size);
0128 #else
0129 
0130     int bytesDecoded = decodeVideoNew(pVideoCodecContext,
0131                                       pFrame,
0132                                       &frameFinished,
0133                                       pPacket);
0134 
0135 #endif
0136 
0137     if (bytesDecoded < 0)
0138     {
0139         qDebug(DIGIKAM_GENERAL_LOG) << "Failed to decode video frame: bytesDecoded < 0";
0140     }
0141 
0142     return (frameFinished > 0);
0143 }
0144 
0145 int VideoThumbDecoder::Private::decodeVideoNew(AVCodecContext* const avContext,
0146                                           AVFrame* const avFrame,
0147                                           int* gotFrame,
0148                                           AVPacket* const avPacket) const
0149 {
0150     int ret   = 0;
0151     *gotFrame = 0;
0152 
0153     if (avPacket)
0154     {
0155         ret = avcodec_send_packet(avContext, avPacket);
0156 
0157         // In particular, we don't expect AVERROR(EAGAIN), because we read all
0158         // decoded frames with avcodec_receive_frame() until done.
0159 
0160         if (ret < 0)
0161         {
0162             return (ret == AVERROR_EOF ? 0 : ret);
0163         }
0164     }
0165 
0166     ret = avcodec_receive_frame(avContext, avFrame);
0167 
0168     if ((ret < 0) && (ret != AVERROR(EAGAIN)) && (ret != AVERROR_EOF))
0169     {
0170         return ret;
0171     }
0172 
0173     if (ret >= 0)
0174     {
0175         *gotFrame = 1;
0176     }
0177 
0178     return 0;
0179 }
0180 
0181 bool VideoThumbDecoder::Private::getVideoPacket()
0182 {
0183     bool framesAvailable = true;
0184     bool frameDecoded    = false;
0185     int  attempts        = 0;
0186 
0187     if (pPacket)
0188     {
0189         av_packet_unref(pPacket);
0190         delete pPacket;
0191     }
0192 
0193     pPacket = new AVPacket();
0194 
0195     while (framesAvailable &&
0196            !frameDecoded   &&
0197            (attempts++ < 1000))
0198     {
0199         framesAvailable = (av_read_frame(pFormatContext, pPacket) >= 0);
0200 
0201         if (framesAvailable)
0202         {
0203             frameDecoded = (pPacket->stream_index == videoStream);
0204 
0205             if (!frameDecoded)
0206             {
0207                 av_packet_unref(pPacket);
0208             }
0209         }
0210     }
0211 
0212     return frameDecoded;
0213 }
0214 
0215 void VideoThumbDecoder::Private::deleteFilterGraph()
0216 {
0217     if (filterGraph)
0218     {
0219         av_frame_free(&filterFrame);
0220         avfilter_graph_free(&filterGraph);
0221         filterGraph = nullptr;
0222     }
0223 }
0224 
0225 bool VideoThumbDecoder::Private::initFilterGraph(enum AVPixelFormat pixfmt,
0226                                             int width, int height)
0227 {
0228     AVFilterInOut* inputs  = nullptr;
0229     AVFilterInOut* outputs = nullptr;
0230 
0231     deleteFilterGraph();
0232     filterGraph            = avfilter_graph_alloc();
0233 
0234     QByteArray arguments("buffer=");
0235     arguments             += "video_size=" + QByteArray::number(width)  + 'x' + QByteArray::number(height) + ':';
0236     arguments             += "pix_fmt="    + QByteArray::number(pixfmt) + ':';
0237     arguments             += "time_base=1/1:pixel_aspect=0/1[in];";
0238     arguments             += "[in]yadif[out];";
0239     arguments             += "[out]buffersink";
0240 
0241     int ret = avfilter_graph_parse2(filterGraph, arguments.constData(), &inputs, &outputs);
0242 
0243     if (ret < 0)
0244     {
0245         qWarning(DIGIKAM_GENERAL_LOG) << "Unable to parse filter graph";
0246 
0247         return false;
0248     }
0249 
0250     if (inputs || outputs)
0251     {
0252         return false;
0253     }
0254 
0255     ret = avfilter_graph_config(filterGraph, nullptr);
0256 
0257     if (ret < 0)
0258     {
0259         qWarning(DIGIKAM_GENERAL_LOG) << "Unable to validate filter graph";
0260 
0261         return false;
0262     }
0263 
0264     bufferSourceContext = avfilter_graph_get_filter(filterGraph, "Parsed_buffer_0");
0265     bufferSinkContext   = avfilter_graph_get_filter(filterGraph, "Parsed_buffersink_2");
0266 
0267     if (!bufferSourceContext || !bufferSinkContext)
0268     {
0269         qWarning(DIGIKAM_GENERAL_LOG) << "Unable to get source or sink";
0270 
0271         return false;
0272     }
0273 
0274     filterFrame = av_frame_alloc();
0275     lastWidth   = width;
0276     lastHeight  = height;
0277     lastPixfmt  = pixfmt;
0278 
0279     return true;
0280 }
0281 
0282 bool VideoThumbDecoder::Private::processFilterGraph(AVFrame* const dst,
0283                                                const AVFrame* const src,
0284                                                enum AVPixelFormat pixfmt,
0285                                                int width, int height)
0286 {
0287     if (!filterGraph           ||
0288         (width  != lastWidth)  ||
0289         (height != lastHeight) ||
0290         (pixfmt != lastPixfmt))
0291     {
0292 
0293         if (!initFilterGraph(pixfmt, width, height))
0294         {
0295             return false;
0296         }
0297     }
0298 
0299     memcpy(filterFrame->data,     src->data,     sizeof(src->data));
0300     memcpy(filterFrame->linesize, src->linesize, sizeof(src->linesize));
0301 
0302     filterFrame->width  = width;
0303     filterFrame->height = height;
0304     filterFrame->format = pixfmt;
0305     int ret             = av_buffersrc_add_frame(bufferSourceContext, filterFrame);
0306 
0307     if (ret < 0)
0308     {
0309         return false;
0310     }
0311 
0312     ret = av_buffersink_get_frame(bufferSinkContext, filterFrame);
0313 
0314     if (ret < 0)
0315     {
0316         return false;
0317     }
0318 
0319     av_image_copy(dst->data, dst->linesize, (const uint8_t**)filterFrame->data, filterFrame->linesize, pixfmt, width, height);
0320     av_frame_unref(filterFrame);
0321 
0322     return true;
0323 }
0324 
0325 void VideoThumbDecoder::Private::convertAndScaleFrame(AVPixelFormat format,
0326                                                  int scaledSize,
0327                                                  bool maintainAspectRatio,
0328                                                  int& scaledWidth,
0329                                                  int& scaledHeight)
0330 {
0331     AVPixelFormat pVideoCodecContextPixFormat;
0332     pVideoCodecContextPixFormat = pVideoCodecContext->pix_fmt;
0333 
0334 #if LIBAVUTIL_VERSION_MAJOR > 55
0335 
0336     switch (pVideoCodecContextPixFormat)
0337     {
0338         case AV_PIX_FMT_YUVJ420P:
0339         {
0340             pVideoCodecContextPixFormat = AV_PIX_FMT_YUV420P;
0341             break;
0342         }
0343 
0344         case AV_PIX_FMT_YUVJ422P:
0345         {
0346             pVideoCodecContextPixFormat = AV_PIX_FMT_YUV422P;
0347             break;
0348         }
0349 
0350         case AV_PIX_FMT_YUVJ444P:
0351         {
0352             pVideoCodecContextPixFormat = AV_PIX_FMT_YUV444P;
0353             break;
0354         }
0355 
0356         case AV_PIX_FMT_YUVJ440P:
0357         {
0358             pVideoCodecContextPixFormat = AV_PIX_FMT_YUV440P;
0359             break;
0360         }
0361 
0362         default:
0363         {
0364             break;
0365         }
0366     }
0367 
0368 #endif
0369 
0370     calculateDimensions(scaledSize, maintainAspectRatio, scaledWidth, scaledHeight);
0371 
0372     SwsContext* const scaleContext = sws_getContext(pVideoCodecContext->width,
0373                                                     pVideoCodecContext->height,
0374                                                     pVideoCodecContextPixFormat,
0375                                                     scaledWidth,
0376                                                     scaledHeight,
0377                                                     format,
0378                                                     SWS_BICUBIC,
0379                                                     nullptr,
0380                                                     nullptr,
0381                                                     nullptr);
0382 
0383     if (!scaleContext)
0384     {
0385         qDebug(DIGIKAM_GENERAL_LOG) << "Failed to create resize context";
0386         return;
0387     }
0388 
0389     AVFrame* convertedFrame       = nullptr;
0390     uint8_t* convertedFrameBuffer = nullptr;
0391 
0392     createAVFrame(&convertedFrame,
0393                   &convertedFrameBuffer,
0394                   scaledWidth,
0395                   scaledHeight,
0396                   format);
0397 
0398     sws_scale(scaleContext,
0399               pFrame->data,
0400               pFrame->linesize,
0401               0,
0402               pVideoCodecContext->height,
0403               convertedFrame->data,
0404               convertedFrame->linesize);
0405 
0406     sws_freeContext(scaleContext);
0407 
0408     av_frame_free(&pFrame);
0409     av_free(pFrameBuffer);
0410 
0411     pFrame       = convertedFrame;
0412     pFrameBuffer = convertedFrameBuffer;
0413 }
0414 
0415 void VideoThumbDecoder::Private::calculateDimensions(int squareSize,
0416                                                 bool maintainAspectRatio,
0417                                                 int& destWidth,
0418                                                 int& destHeight)
0419 {
0420     if (!maintainAspectRatio)
0421     {
0422         destWidth  = squareSize;
0423         destHeight = squareSize;
0424     }
0425     else
0426     {
0427         int srcWidth            = pVideoCodecContext->width;
0428         int srcHeight           = pVideoCodecContext->height;
0429         int ascpectNominator    = pVideoCodecContext->sample_aspect_ratio.num;
0430         int ascpectDenominator  = pVideoCodecContext->sample_aspect_ratio.den;
0431 
0432         if ((ascpectNominator != 0) && (ascpectDenominator != 0))
0433         {
0434             srcWidth = srcWidth * ascpectNominator / ascpectDenominator;
0435         }
0436 
0437         if (srcWidth > srcHeight)
0438         {
0439             destWidth  = squareSize;
0440             destHeight = static_cast<int>(static_cast<float>(squareSize) / srcWidth * srcHeight);
0441         }
0442         else
0443         {
0444             destWidth  = static_cast<int>(static_cast<float>(squareSize) / srcHeight * srcWidth);
0445             destHeight = squareSize;
0446         }
0447     }
0448 }
0449 
0450 } // namespace Digikam