File indexing completed on 2025-03-09 03:54:58

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam
0004  *
0005  * Date        : 2019-08-08
0006  * Description : Base class to perform low-level neural network inference
0007  *               for face detection
0008  *
0009  * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0010  * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015 
0016 #ifndef DIGIKAM_FACESENGINE_DNN_FACE_DETECTOR_BASE_H
0017 #define DIGIKAM_FACESENGINE_DNN_FACE_DETECTOR_BASE_H
0018 
0019 // C++ includes
0020 
0021 #include <vector>
0022 
0023 // Qt includes
0024 
0025 #include <QMutex>
0026 
0027 // Local includes
0028 
0029 #include "digikam_opencv.h"
0030 #include "digikam_export.h"
0031 
0032 namespace Digikam
0033 {
0034 
0035 class DIGIKAM_EXPORT DNNFaceDetectorBase
0036 {
0037 
0038 public:
0039 
0040     explicit DNNFaceDetectorBase();
0041     explicit DNNFaceDetectorBase(float scale, const cv::Scalar& val, const cv::Size& inputImgSize);
0042     virtual ~DNNFaceDetectorBase();
0043 
0044     virtual void detectFaces(const cv::Mat& inputImage,
0045                              const cv::Size& paddedSize,
0046                              std::vector<cv::Rect>& detectedBboxes) = 0;
0047 
0048     cv::Size nnInputSizeRequired() const;
0049 
0050 protected:
0051 
0052     void selectBbox(const cv::Size& paddedSize,
0053                     float confidence,
0054                     int left,
0055                     int right,
0056                     int top,
0057                     int bottom,
0058                     std::vector<float>& goodConfidences, std::vector<cv::Rect>& goodBoxes,
0059                     std::vector<float>& doubtConfidences, std::vector<cv::Rect>& doubtBoxes) const;
0060 
0061     void correctBbox(cv::Rect& bbox,
0062                      const cv::Size& paddedSize) const;
0063 
0064 public:
0065 
0066     static float confidenceThreshold;    ///< Threshold for bbox detection. It can be init and changed in the GUI
0067     static float nmsThreshold;           ///< Threshold for nms suppression
0068 
0069 protected:
0070 
0071     float        scaleFactor;
0072     cv::Scalar   meanValToSubtract;
0073     cv::Size     inputImageSize;
0074 
0075     cv::dnn::Net net;
0076 
0077     QMutex       mutex;
0078 
0079 private:
0080 
0081     // Disable
0082     DNNFaceDetectorBase(const DNNFaceDetectorBase&)            = delete;
0083     DNNFaceDetectorBase& operator=(const DNNFaceDetectorBase&) = delete;
0084 };
0085 
0086 } // namespace Digikam
0087 
0088 #endif // DIGIKAM_FACESENGINE_DNN_FACE_DETECTOR_BASE_H