File indexing completed on 2025-03-09 03:54:58
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam 0004 * 0005 * Date : 2019-08-08 0006 * Description : Base class to perform low-level neural network inference 0007 * for face detection 0008 * 0009 * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com> 0010 * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0011 * 0012 * SPDX-License-Identifier: GPL-2.0-or-later 0013 * 0014 * ============================================================ */ 0015 0016 #include "dnnfacedetectorbase.h" 0017 0018 // Qt includes 0019 0020 #include <QtGlobal> 0021 #include <QRect> 0022 0023 // Local includes 0024 0025 #include "digikam_debug.h" 0026 0027 namespace Digikam 0028 { 0029 // TODO: re-verify these threshold 0030 0031 float DNNFaceDetectorBase::confidenceThreshold = 0.6F; 0032 float DNNFaceDetectorBase::nmsThreshold = 0.4F; 0033 0034 // -------------------------------------------------------------------------------------------------------- 0035 0036 DNNFaceDetectorBase::DNNFaceDetectorBase() 0037 : scaleFactor(1.0) 0038 { 0039 } 0040 0041 DNNFaceDetectorBase::DNNFaceDetectorBase(float scale, 0042 const cv::Scalar& val, 0043 const cv::Size& inputImgSize) 0044 : scaleFactor (scale), 0045 meanValToSubtract (val), 0046 inputImageSize (inputImgSize) 0047 { 0048 } 0049 0050 DNNFaceDetectorBase::~DNNFaceDetectorBase() 0051 { 0052 } 0053 0054 cv::Size DNNFaceDetectorBase::nnInputSizeRequired() const 0055 { 0056 return inputImageSize; 0057 } 0058 0059 // TODO: these confidence boxes usually go together --> a structure to encapsulate them ??? 0060 0061 void DNNFaceDetectorBase::selectBbox(const cv::Size& paddedSize, 0062 float confidence, 0063 int left, 0064 int right, 0065 int top, 0066 int bottom, 0067 std::vector<float>& goodConfidences, 0068 std::vector<cv::Rect>& goodBoxes, 0069 std::vector<float>& doubtConfidences, 0070 std::vector<cv::Rect>& doubtBoxes) const 0071 { 0072 int width = right - left; 0073 int height = bottom - top; 0074 0075 cv::Rect bbox(left, top, width, height); 0076 0077 // take the net size of image 0078 0079 int borderLeft = paddedSize.width; 0080 int borderRight = inputImageSize.width - paddedSize.width; 0081 int borderTop = paddedSize.height; 0082 int borderBottom = inputImageSize.height - paddedSize.height; 0083 0084 /** 0085 * Classify bounding boxes detected. 0086 * Good bounding boxes are defined as boxes that reside within the non-padded zone or 0087 * those that are out only for min of (10% of padded range, 10% of bbox dim). 0088 * 0089 * Bad bounding boxes are defined as boxes that have at maximum 25% of each dimension 0090 * out of non-padded zone. 0091 */ 0092 0093 if ((left >= (int)cv::min(borderLeft*0.9, borderLeft - 0.1*width)) && 0094 (right <= (int)cv::max(borderRight + 0.1*paddedSize.width, borderRight + 0.1*width)) && 0095 (top >= (int)cv::min(borderTop*0.9, borderTop - 0.1*height)) && 0096 (bottom <= (int)cv::max(borderBottom + 0.1*paddedSize.height, borderBottom + 0.1*height))) 0097 { 0098 goodBoxes.push_back(bbox); 0099 goodConfidences.push_back(confidence); 0100 0101 qCDebug(DIGIKAM_FACESENGINE_LOG) << "Good rect = " << QRect(bbox.x, bbox.y, bbox.width, bbox.height) 0102 << ", conf = " << confidence; 0103 } 0104 else if ((right > left) && 0105 (right >= (borderLeft + 0.75*width)) && 0106 (left <= (borderRight - 0.75*width)) && 0107 (bottom > top) && 0108 (bottom >= (borderRight + 0.75*height)) && 0109 (top <= (borderBottom - 0.75*height))) 0110 { 0111 doubtBoxes.push_back(bbox); 0112 doubtConfidences.push_back(confidence); 0113 0114 qCDebug(DIGIKAM_FACESENGINE_LOG) << "Doubt rect = " << QRect(bbox.x, bbox.y, bbox.width, bbox.height) 0115 << ", conf = " << confidence; 0116 } 0117 } 0118 0119 void DNNFaceDetectorBase::correctBbox(cv::Rect& bbox, const cv::Size& paddedSize) const 0120 { 0121 // TODO: Should the box be cropped to square or not??? 0122 0123 int left = cv::max(bbox.x - paddedSize.width, 0); 0124 int right = cv::min(left + bbox.width, inputImageSize.width - 2*paddedSize.width); 0125 int top = cv::max(bbox.y - paddedSize.height, 0); 0126 int bottom = cv::min(top + bbox.height, inputImageSize.height - 2*paddedSize.height); 0127 0128 bbox.x = left; 0129 bbox.y = top; 0130 bbox.width = right - left; 0131 bbox.height = bottom - top; 0132 } 0133 0134 } // namespace Digikam