detection/opencv-dnn/dnnfacedetectorbase.cpp

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam
0004  *
0005  * Date        : 2019-08-08
0006  * Description : Base class to perform low-level neural network inference
0007  *               for face detection
0008  *
0009  * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0010  * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015
0016 #include "dnnfacedetectorbase.h"
0017
0018 // Qt includes
0019
0020 #include <QtGlobal>
0021 #include <QRect>
0022
0023 // Local includes
0024
0025 #include "digikam_debug.h"
0026
0027 namespace Digikam
0028 {
0029 // TODO: re-verify these threshold
0030
0031 float DNNFaceDetectorBase::confidenceThreshold  = 0.6F;
0032 float DNNFaceDetectorBase::nmsThreshold         = 0.4F;
0033
0034 // --------------------------------------------------------------------------------------------------------
0035
0036 DNNFaceDetectorBase::DNNFaceDetectorBase()
0037   : scaleFactor(1.0)
0038 {
0039 }
0040
0041 DNNFaceDetectorBase::DNNFaceDetectorBase(float scale,
0042                                          const cv::Scalar& val,
0043                                          const cv::Size& inputImgSize)
0044   : scaleFactor         (scale),
0045     meanValToSubtract   (val),
0046     inputImageSize      (inputImgSize)
0047 {
0048 }
0049
0050 DNNFaceDetectorBase::~DNNFaceDetectorBase()
0051 {
0052 }
0053
0054 cv::Size DNNFaceDetectorBase::nnInputSizeRequired() const
0055 {
0056     return inputImageSize;
0057 }
0058
0059 // TODO: these confidence boxes usually go together --> a structure to encapsulate them ???
0060
0061 void DNNFaceDetectorBase::selectBbox(const cv::Size& paddedSize,
0062                                      float confidence,
0063                                      int left,
0064                                      int right,
0065                                      int top,
0066                                      int bottom,
0067                                      std::vector<float>& goodConfidences,
0068                                      std::vector<cv::Rect>& goodBoxes,
0069                                      std::vector<float>& doubtConfidences,
0070                                      std::vector<cv::Rect>& doubtBoxes) const
0071 {
0072     int width        = right  - left;
0073     int height       = bottom - top;
0074
0075     cv::Rect bbox(left, top, width, height);
0076
0077     // take the net size of image
0078
0079     int borderLeft   = paddedSize.width;
0080     int borderRight  = inputImageSize.width  - paddedSize.width;
0081     int borderTop    = paddedSize.height;
0082     int borderBottom = inputImageSize.height - paddedSize.height;
0083
0084     /**
0085      * Classify bounding boxes detected.
0086      * Good bounding boxes are defined as boxes that reside within the non-padded zone or
0087      * those that are out only for min of (10% of padded range, 10% of bbox dim).
0088      *
0089      * Bad bounding boxes are defined as boxes that have at maximum 25% of each dimension
0090      * out of non-padded zone.
0091      */
0092
0093     if      ((left   >= (int)cv::min(borderLeft*0.9,                       borderLeft   - 0.1*width))      &&
0094              (right  <= (int)cv::max(borderRight  + 0.1*paddedSize.width,  borderRight  + 0.1*width))      &&
0095              (top    >= (int)cv::min(borderTop*0.9,                        borderTop    - 0.1*height))     &&
0096              (bottom <= (int)cv::max(borderBottom + 0.1*paddedSize.height, borderBottom + 0.1*height)))
0097     {
0098         goodBoxes.push_back(bbox);
0099         goodConfidences.push_back(confidence);
0100
0101         qCDebug(DIGIKAM_FACESENGINE_LOG) << "Good rect = " << QRect(bbox.x, bbox.y, bbox.width, bbox.height)
0102                                          << ", conf = " << confidence;
0103     }
0104     else if ((right  >  left)                           &&
0105              (right  >= (borderLeft   + 0.75*width))    &&
0106              (left   <= (borderRight  - 0.75*width))    &&
0107              (bottom >  top)                            &&
0108              (bottom >= (borderRight  + 0.75*height))   &&
0109              (top    <= (borderBottom - 0.75*height)))
0110     {
0111         doubtBoxes.push_back(bbox);
0112         doubtConfidences.push_back(confidence);
0113
0114         qCDebug(DIGIKAM_FACESENGINE_LOG) << "Doubt rect = " << QRect(bbox.x, bbox.y, bbox.width, bbox.height)
0115                                          << ", conf = " << confidence;
0116     }
0117 }
0118
0119 void DNNFaceDetectorBase::correctBbox(cv::Rect& bbox, const cv::Size& paddedSize) const
0120 {
0121     // TODO: Should the box be cropped to square or not???
0122
0123     int left    = cv::max(bbox.x - paddedSize.width,  0);
0124     int right   = cv::min(left   + bbox.width,        inputImageSize.width  - 2*paddedSize.width);
0125     int top     = cv::max(bbox.y - paddedSize.height, 0);
0126     int bottom  = cv::min(top    + bbox.height,       inputImageSize.height - 2*paddedSize.height);
0127
0128     bbox.x      = left;
0129     bbox.y      = top;
0130     bbox.width  = right  - left;
0131     bbox.height = bottom - top;
0132 }
0133
0134 } // namespace Digikam