File indexing completed on 2025-03-09 03:54:58

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam
0004  *
0005  * Date        : 2019-07-22
0006  * Description : Class to perform faces detection using OpenCV DNN module
0007  *
0008  * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0009  * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0010  *
0011  * SPDX-License-Identifier: GPL-2.0-or-later
0012  *
0013  * ============================================================ */
0014 
0015 #include "opencvdnnfacedetector.h"
0016 
0017 // C++ includes
0018 
0019 #include <vector>
0020 
0021 // Qt includes
0022 
0023 #include <QtGlobal>
0024 #include <QStandardPaths>
0025 #include <qmath.h>
0026 
0027 // Local includes
0028 
0029 #include "digikam_debug.h"
0030 #include "dnnfacedetectorssd.h"
0031 #include "dnnfacedetectoryolo.h"
0032 
0033 namespace Digikam
0034 {
0035 
0036 OpenCVDNNFaceDetector::OpenCVDNNFaceDetector(DetectorNNModel model)
0037     : m_modelType(model)
0038 {
0039     switch (m_modelType)
0040     {
0041         case DetectorNNModel::SSDMOBILENET:
0042         {
0043             m_inferenceEngine = new DNNFaceDetectorSSD;
0044             break;
0045         }
0046 
0047         case DetectorNNModel::YOLO:
0048         {
0049             m_inferenceEngine = new DNNFaceDetectorYOLO;
0050             break;
0051         }
0052 
0053         default:
0054         {
0055             qFatal("UNKNOWN neural network model");
0056         }
0057     }
0058 }
0059 
0060 OpenCVDNNFaceDetector::~OpenCVDNNFaceDetector()
0061 {
0062     delete m_inferenceEngine;
0063 }
0064 
0065 int OpenCVDNNFaceDetector::recommendedImageSizeForDetection()
0066 {
0067     return 800;
0068 }
0069 
0070 // TODO: prepareForDetection give different performances
0071 cv::Mat OpenCVDNNFaceDetector::prepareForDetection(const DImg& inputImage, cv::Size& paddedSize) const
0072 {
0073     if (inputImage.isNull() || !inputImage.size().isValid())
0074     {
0075         return cv::Mat();
0076     }
0077 
0078     cv::Mat cvImage;
0079     int type               = inputImage.sixteenBit() ? CV_16UC4 : CV_8UC4;
0080     cv::Mat cvImageWrapper = cv::Mat(inputImage.height(), inputImage.width(), type, inputImage.bits());
0081 
0082     if (inputImage.hasAlpha())
0083     {
0084         cvtColor(cvImageWrapper, cvImage, cv::COLOR_RGBA2BGR);
0085     }
0086     else
0087     {
0088         cvtColor(cvImageWrapper, cvImage, cv::COLOR_RGB2BGR);
0089     }
0090 
0091     if (type == CV_16UC4)
0092     {
0093         cvImage.convertTo(cvImage, CV_8UC3, 1 / 256.0);
0094     }
0095 
0096     return prepareForDetection(cvImage, paddedSize);
0097 }
0098 
0099 cv::Mat OpenCVDNNFaceDetector::prepareForDetection(const QImage& inputImage, cv::Size& paddedSize) const
0100 {
0101     if (inputImage.isNull() || !inputImage.size().isValid())
0102     {
0103         return cv::Mat();
0104     }
0105 
0106     cv::Mat cvImage;
0107     cv::Mat cvImageWrapper;
0108     QImage qimage(inputImage);
0109 
0110     switch (qimage.format())
0111     {
0112         case QImage::Format_RGB32:
0113         case QImage::Format_ARGB32:
0114         case QImage::Format_ARGB32_Premultiplied:
0115         {
0116             // I think we can ignore premultiplication when converting to grayscale
0117 
0118             cvImageWrapper = cv::Mat(qimage.height(), qimage.width(), CV_8UC4,
0119                                      qimage.scanLine(0), qimage.bytesPerLine());
0120             cvtColor(cvImageWrapper, cvImage, cv::COLOR_RGBA2BGR);
0121             break;
0122         }
0123 
0124         default:
0125         {
0126             qimage         = qimage.convertToFormat(QImage::Format_RGB888);
0127             cvImageWrapper = cv::Mat(qimage.height(), qimage.width(), CV_8UC3,
0128                                      qimage.scanLine(0), qimage.bytesPerLine());
0129             cvtColor(cvImageWrapper, cvImage, cv::COLOR_RGB2BGR);
0130             break;
0131         }
0132     }
0133 
0134     return prepareForDetection(cvImage, paddedSize);
0135 }
0136 
0137 cv::Mat OpenCVDNNFaceDetector::prepareForDetection(const QString& inputImagePath, cv::Size& paddedSize) const
0138 {
0139     std::vector<char> buffer;
0140     QFile file(inputImagePath);
0141     buffer.resize(file.size());
0142 
0143     if (!file.open(QIODevice::ReadOnly))
0144     {
0145         return cv::Mat();
0146     }
0147 
0148     file.read(buffer.data(), file.size());
0149     file.close();
0150 
0151     cv::Mat cvImage = cv::imdecode(std::vector<char>(buffer.begin(), buffer.end()), cv::IMREAD_COLOR);
0152 
0153     return prepareForDetection(cvImage, paddedSize);
0154 }
0155 
0156 cv::Mat OpenCVDNNFaceDetector::prepareForDetection(cv::Mat& cvImage, cv::Size& paddedSize) const
0157 {
0158     // Resize image before padding to fit in neural net
0159 
0160     cv::Size inputImageSize = m_inferenceEngine->nnInputSizeRequired();
0161     float k                 = qMin(inputImageSize.width  * 1.0 / cvImage.cols,
0162                                    inputImageSize.height * 1.0 / cvImage.rows);
0163 
0164     int newWidth            = (int)(k * cvImage.cols);
0165     int newHeight           = (int)(k * cvImage.rows);
0166     cv::resize(cvImage, cvImage, cv::Size(newWidth, newHeight));
0167 
0168     // Pad with black pixels
0169 
0170     int padX                = (inputImageSize.width  - newWidth)  / 2;
0171     int padY                = (inputImageSize.height - newHeight) / 2;
0172 
0173     cv::Mat imagePadded;
0174 
0175     cv::copyMakeBorder(cvImage, imagePadded,
0176                        padY, padY,
0177                        padX, padX,
0178                        cv::BORDER_CONSTANT,
0179                        cv::Scalar(0, 0, 0));
0180 
0181     paddedSize              = cv::Size(padX, padY);
0182 
0183     return imagePadded;
0184 }
0185 
0186 /**
0187  * There is no proof that doing this will help, since face can be detected at various positions (even half, masked faces
0188  * can be detected), not only frontal. Effort on doing this should be questioned.
0189  * TODO: Restructure and improve Face Detection module.
0190 
0191 void OpenCVDNNFaceDetector::resizeBboxToStandardHumanFace(int& width, int& height)
0192 {
0193     // Human head sizes data
0194     // https://en.wikipedia.org/wiki/Human_head#Average_head_sizes
0195 
0196     float maxRatioFrontalFace    = 15.4 / 15.5;
0197     float minRatioNonFrontalFace = 8.6  / 21.6;
0198 
0199     float r = width*1.0/height, rReference;
0200 
0201     if      ((r >= minRatioNonFrontalFace*0.9) && r <= (maxRatioFrontalFace * 1.1))
0202     {
0203         rReference = r;
0204     }
0205     else if (r <= 0.25)
0206     {
0207         rReference = r * 1.5;
0208     }
0209     else if (r >= 4)
0210     {
0211         rReference = r / 1.5;
0212     }
0213     else if (r < minRatioNonFrontalFace * 0.9)
0214     {
0215         rReference = minRatioNonFrontalFace;
0216     }
0217     else if (r > maxRatioFrontalFace * 1.1)
0218     {
0219         rReference = maxRatioFrontalFace;
0220     }
0221 
0222     if (width > height)
0223     {
0224         height = width / rReference;
0225     }
0226     else
0227     {
0228         width = height * rReference;
0229     }
0230 }
0231 */
0232 
0233 QList<QRect> OpenCVDNNFaceDetector::detectFaces(const cv::Mat& inputImage,
0234                                                 const cv::Size& paddedSize)
0235 {
0236     std::vector<cv::Rect> detectedBboxes = cvDetectFaces(inputImage, paddedSize);
0237 
0238     QList<QRect> results;
0239 /*
0240     cv::Mat imageTest = inputImage.clone();
0241 */
0242     for (const cv::Rect& bbox : detectedBboxes)
0243     {
0244         QRect rect(bbox.x, bbox.y, bbox.width, bbox.height);
0245         results << rect;
0246 /*
0247         qCDebug(DIGIKAM_FACESENGINE_LOG) << rect;
0248         cv::rectangle(imageTest, cv::Rect(bbox.x + paddedSize.width,
0249                                           bbox.y + paddedSize.height,
0250                                           bbox.width, bbox.height), cv::Scalar(0, 128, 0));
0251 */
0252     }
0253 /*
0254     cv::imshow("image", imageTest);
0255     cv::waitKey(0);
0256 */
0257     return results;
0258 }
0259 
0260 std::vector<cv::Rect> OpenCVDNNFaceDetector::cvDetectFaces(const cv::Mat& inputImage,
0261                                                            const cv::Size& paddedSize)
0262 {
0263     std::vector<cv::Rect> detectedBboxes;
0264 
0265     m_inferenceEngine->detectFaces(inputImage, paddedSize, detectedBboxes);
0266 
0267     return detectedBboxes;
0268 }
0269 
0270 } // namespace Digikam