File indexing completed on 2025-03-09 03:54:58
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam 0004 * 0005 * Date : 2019-08-08 0006 * Description : Derived class to perform SSD neural network inference 0007 * for face detection 0008 * 0009 * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com> 0010 * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0011 * 0012 * SPDX-License-Identifier: GPL-2.0-or-later 0013 * 0014 * ============================================================ */ 0015 0016 #include "dnnfacedetectorssd.h" 0017 0018 // Qt includes 0019 0020 #include <QList> 0021 #include <QRect> 0022 #include <QString> 0023 #include <QFileInfo> 0024 #include <QMutexLocker> 0025 #include <QStandardPaths> 0026 0027 // Local includes 0028 0029 #include "digikam_debug.h" 0030 #include "digikam_config.h" 0031 0032 namespace Digikam 0033 { 0034 0035 DNNFaceDetectorSSD::DNNFaceDetectorSSD() 0036 : DNNFaceDetectorBase(1.0, cv::Scalar(104.0, 177.0, 123.0), cv::Size(300, 300)) 0037 { 0038 loadModels(); 0039 } 0040 0041 DNNFaceDetectorSSD::~DNNFaceDetectorSSD() 0042 { 0043 } 0044 0045 bool DNNFaceDetectorSSD::loadModels() 0046 { 0047 QString appPath = QStandardPaths::locate(QStandardPaths::GenericDataLocation, 0048 QLatin1String("digikam/facesengine"), 0049 QStandardPaths::LocateDirectory); 0050 0051 QString model = QLatin1String("deploy.prototxt"); 0052 QString data = QLatin1String("res10_300x300_ssd_iter_140000_fp16.caffemodel"); 0053 0054 QString nnmodel = appPath + QLatin1Char('/') + model; 0055 QString nndata = appPath + QLatin1Char('/') + data; 0056 0057 if (QFileInfo::exists(nnmodel) && QFileInfo::exists(nndata)) 0058 { 0059 try 0060 { 0061 qCDebug(DIGIKAM_FACEDB_LOG) << "SSD model:" << model << ", SSD data:" << data; 0062 0063 #ifdef Q_OS_WIN 0064 0065 net = cv::dnn::readNetFromCaffe(nnmodel.toLocal8Bit().constData(), 0066 nndata.toLocal8Bit().constData()); 0067 0068 #else 0069 0070 net = cv::dnn::readNetFromCaffe(nnmodel.toStdString(), 0071 nndata.toStdString()); 0072 0073 #endif 0074 0075 #if (OPENCV_VERSION == QT_VERSION_CHECK(4, 7, 0)) 0076 0077 net.enableWinograd(false); 0078 0079 #endif 0080 0081 } 0082 catch (cv::Exception& e) 0083 { 0084 qCWarning(DIGIKAM_FACEDB_LOG) << "cv::Exception:" << e.what(); 0085 0086 return false; 0087 } 0088 catch (...) 0089 { 0090 qCWarning(DIGIKAM_FACEDB_LOG) << "Default exception from OpenCV"; 0091 0092 return false; 0093 } 0094 } 0095 else 0096 { 0097 qCCritical(DIGIKAM_FACEDB_LOG) << "Cannot found faces engine DNN model" << model << "or" << data; 0098 qCCritical(DIGIKAM_FACEDB_LOG) << "Faces detection feature cannot be used!"; 0099 0100 return false; 0101 } 0102 0103 return true; 0104 } 0105 0106 void DNNFaceDetectorSSD::detectFaces(const cv::Mat& inputImage, 0107 const cv::Size& paddedSize, 0108 std::vector<cv::Rect>& detectedBboxes) 0109 { 0110 if (inputImage.empty()) 0111 { 0112 qCDebug(DIGIKAM_FACESENGINE_LOG) << "Invalid image given, not detecting faces."; 0113 return; 0114 } 0115 0116 cv::Mat detection; 0117 cv::Mat inputBlob = cv::dnn::blobFromImage(inputImage, scaleFactor, inputImageSize, meanValToSubtract, true, false); 0118 0119 if (!net.empty()) 0120 { 0121 QMutexLocker lock(&mutex); 0122 net.setInput(inputBlob); 0123 detection = net.forward(); 0124 } 0125 0126 postprocess(detection, paddedSize, detectedBboxes); 0127 } 0128 0129 void DNNFaceDetectorSSD::postprocess(cv::Mat detection, 0130 const cv::Size& paddedSize, 0131 std::vector<cv::Rect>& detectedBboxes) const 0132 { 0133 std::vector<float> goodConfidences, doubtConfidences, confidences; 0134 std::vector<cv::Rect> goodBoxes, doubtBoxes, boxes; 0135 0136 cv::Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); 0137 0138 // TODO: model problem, confidence of ssd output too low ===> false detection 0139 0140 for (int i = 0 ; i < detectionMat.rows ; ++i) 0141 { 0142 float confidence = detectionMat.at<float>(i, 2); 0143 0144 if (confidence > confidenceThreshold) 0145 { 0146 float leftRatio = detectionMat.at<float>(i, 3); 0147 float topRatio = detectionMat.at<float>(i, 4); 0148 float rightRatio = detectionMat.at<float>(i, 5); 0149 float bottomRatio = detectionMat.at<float>(i, 6); 0150 0151 int left = (int)(leftRatio * inputImageSize.width); 0152 int right = (int)(rightRatio * inputImageSize.width); 0153 int top = (int)(topRatio * inputImageSize.height); 0154 int bottom = (int)(bottomRatio * inputImageSize.height); 0155 0156 selectBbox(paddedSize, 0157 confidence, 0158 left, 0159 right, 0160 top, 0161 bottom, 0162 goodConfidences, 0163 goodBoxes, 0164 doubtConfidences, 0165 doubtBoxes); 0166 } 0167 } 0168 /* 0169 qCDebug(DIGIKAM_FACESENGINE_LOG) << "nb of doubtbox = " << doubtBoxes.size(); 0170 qCDebug(DIGIKAM_FACESENGINE_LOG) << "nb of goodbox = " << goodBoxes.size(); 0171 */ 0172 if (goodBoxes.empty()) 0173 { 0174 boxes = doubtBoxes; 0175 confidences = doubtConfidences; 0176 } 0177 else 0178 { 0179 boxes = goodBoxes; 0180 confidences = goodConfidences; 0181 } 0182 0183 // Perform non maximum suppression to eliminate redundant overlapping boxes with lower confidences 0184 0185 std::vector<int> indices; 0186 cv::dnn::NMSBoxes(boxes, confidences, confidenceThreshold, nmsThreshold, indices); 0187 0188 // Get detected bounding boxes 0189 0190 for (size_t i = 0 ; i < indices.size() ; ++i) 0191 { 0192 cv::Rect bbox = boxes[indices[i]]; 0193 correctBbox(bbox, paddedSize); 0194 detectedBboxes.push_back(cv::Rect(bbox.x, bbox.y, bbox.width, bbox.height)); 0195 } 0196 } 0197 0198 } // namespace Digikam