detection/opencv-dnn/dnnfacedetectorssd.cpp

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam
0004  *
0005  * Date        : 2019-08-08
0006  * Description : Derived class to perform SSD neural network inference
0007  *               for face detection
0008  *
0009  * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0010  * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015
0016 #include "dnnfacedetectorssd.h"
0017
0018 // Qt includes
0019
0020 #include <QList>
0021 #include <QRect>
0022 #include <QString>
0023 #include <QFileInfo>
0024 #include <QMutexLocker>
0025 #include <QStandardPaths>
0026
0027 // Local includes
0028
0029 #include "digikam_debug.h"
0030 #include "digikam_config.h"
0031
0032 namespace Digikam
0033 {
0034
0035 DNNFaceDetectorSSD::DNNFaceDetectorSSD()
0036     : DNNFaceDetectorBase(1.0, cv::Scalar(104.0, 177.0, 123.0), cv::Size(300, 300))
0037 {
0038     loadModels();
0039 }
0040
0041 DNNFaceDetectorSSD::~DNNFaceDetectorSSD()
0042 {
0043 }
0044
0045 bool DNNFaceDetectorSSD::loadModels()
0046 {
0047     QString appPath = QStandardPaths::locate(QStandardPaths::GenericDataLocation,
0048                                              QLatin1String("digikam/facesengine"),
0049                                              QStandardPaths::LocateDirectory);
0050
0051     QString model   = QLatin1String("deploy.prototxt");
0052     QString data    = QLatin1String("res10_300x300_ssd_iter_140000_fp16.caffemodel");
0053
0054     QString nnmodel = appPath + QLatin1Char('/') + model;
0055     QString nndata  = appPath + QLatin1Char('/') + data;
0056
0057     if (QFileInfo::exists(nnmodel) && QFileInfo::exists(nndata))
0058     {
0059         try
0060         {
0061             qCDebug(DIGIKAM_FACEDB_LOG) << "SSD model:" << model << ", SSD data:" << data;
0062
0063 #ifdef Q_OS_WIN
0064
0065             net = cv::dnn::readNetFromCaffe(nnmodel.toLocal8Bit().constData(),
0066                                             nndata.toLocal8Bit().constData());
0067
0068 #else
0069
0070             net = cv::dnn::readNetFromCaffe(nnmodel.toStdString(),
0071                                             nndata.toStdString());
0072
0073 #endif
0074
0075 #if (OPENCV_VERSION == QT_VERSION_CHECK(4, 7, 0))
0076
0077             net.enableWinograd(false);
0078
0079 #endif
0080
0081         }
0082         catch (cv::Exception& e)
0083         {
0084             qCWarning(DIGIKAM_FACEDB_LOG) << "cv::Exception:" << e.what();
0085
0086             return false;
0087         }
0088         catch (...)
0089         {
0090            qCWarning(DIGIKAM_FACEDB_LOG) << "Default exception from OpenCV";
0091
0092            return false;
0093         }
0094     }
0095     else
0096     {
0097         qCCritical(DIGIKAM_FACEDB_LOG) << "Cannot found faces engine DNN model" << model << "or" << data;
0098         qCCritical(DIGIKAM_FACEDB_LOG) << "Faces detection feature cannot be used!";
0099
0100         return false;
0101     }
0102
0103     return true;
0104 }
0105
0106 void DNNFaceDetectorSSD::detectFaces(const cv::Mat& inputImage,
0107                                      const cv::Size& paddedSize,
0108                                      std::vector<cv::Rect>& detectedBboxes)
0109 {
0110     if (inputImage.empty())
0111     {
0112         qCDebug(DIGIKAM_FACESENGINE_LOG) << "Invalid image given, not detecting faces.";
0113         return;
0114     }
0115
0116     cv::Mat detection;
0117     cv::Mat inputBlob = cv::dnn::blobFromImage(inputImage, scaleFactor, inputImageSize, meanValToSubtract, true, false);
0118
0119     if (!net.empty())
0120     {
0121         QMutexLocker lock(&mutex);
0122         net.setInput(inputBlob);
0123         detection = net.forward();
0124     }
0125
0126     postprocess(detection, paddedSize, detectedBboxes);
0127 }
0128
0129 void DNNFaceDetectorSSD::postprocess(cv::Mat detection,
0130                                      const cv::Size& paddedSize,
0131                                      std::vector<cv::Rect>& detectedBboxes) const
0132 {
0133     std::vector<float> goodConfidences, doubtConfidences, confidences;
0134     std::vector<cv::Rect> goodBoxes, doubtBoxes, boxes;
0135
0136     cv::Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
0137
0138     // TODO: model problem, confidence of ssd output too low ===> false detection
0139
0140     for (int i = 0 ; i < detectionMat.rows ; ++i)
0141     {
0142         float confidence = detectionMat.at<float>(i, 2);
0143
0144         if (confidence > confidenceThreshold)
0145         {
0146             float leftRatio   = detectionMat.at<float>(i, 3);
0147             float topRatio    = detectionMat.at<float>(i, 4);
0148             float rightRatio  = detectionMat.at<float>(i, 5);
0149             float bottomRatio = detectionMat.at<float>(i, 6);
0150
0151             int left          = (int)(leftRatio   * inputImageSize.width);
0152             int right         = (int)(rightRatio  * inputImageSize.width);
0153             int top           = (int)(topRatio    * inputImageSize.height);
0154             int bottom        = (int)(bottomRatio * inputImageSize.height);
0155
0156             selectBbox(paddedSize,
0157                        confidence,
0158                        left,
0159                        right,
0160                        top,
0161                        bottom,
0162                        goodConfidences,
0163                        goodBoxes,
0164                        doubtConfidences,
0165                        doubtBoxes);
0166         }
0167     }
0168 /*
0169     qCDebug(DIGIKAM_FACESENGINE_LOG) << "nb of doubtbox = " << doubtBoxes.size();
0170     qCDebug(DIGIKAM_FACESENGINE_LOG) << "nb of goodbox = " << goodBoxes.size();
0171 */
0172     if (goodBoxes.empty())
0173     {
0174         boxes       = doubtBoxes;
0175         confidences = doubtConfidences;
0176     }
0177     else
0178     {
0179         boxes       = goodBoxes;
0180         confidences = goodConfidences;
0181     }
0182
0183     // Perform non maximum suppression to eliminate redundant overlapping boxes with lower confidences
0184
0185     std::vector<int> indices;
0186     cv::dnn::NMSBoxes(boxes, confidences, confidenceThreshold, nmsThreshold, indices);
0187
0188     // Get detected bounding boxes
0189
0190     for (size_t i = 0 ; i < indices.size() ; ++i)
0191     {
0192         cv::Rect bbox = boxes[indices[i]];
0193         correctBbox(bbox, paddedSize);
0194         detectedBboxes.push_back(cv::Rect(bbox.x, bbox.y, bbox.width, bbox.height));
0195     }
0196 }
0197
0198 } // namespace Digikam