File indexing completed on 2025-03-09 03:54:58
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam 0004 * 0005 * Date : 2019-07-22 0006 * Description : Class to perform faces detection using OpenCV DNN module 0007 * 0008 * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com> 0009 * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0010 * 0011 * SPDX-License-Identifier: GPL-2.0-or-later 0012 * 0013 * ============================================================ */ 0014 0015 #include "opencvdnnfacedetector.h" 0016 0017 // C++ includes 0018 0019 #include <vector> 0020 0021 // Qt includes 0022 0023 #include <QtGlobal> 0024 #include <QStandardPaths> 0025 #include <qmath.h> 0026 0027 // Local includes 0028 0029 #include "digikam_debug.h" 0030 #include "dnnfacedetectorssd.h" 0031 #include "dnnfacedetectoryolo.h" 0032 0033 namespace Digikam 0034 { 0035 0036 OpenCVDNNFaceDetector::OpenCVDNNFaceDetector(DetectorNNModel model) 0037 : m_modelType(model) 0038 { 0039 switch (m_modelType) 0040 { 0041 case DetectorNNModel::SSDMOBILENET: 0042 { 0043 m_inferenceEngine = new DNNFaceDetectorSSD; 0044 break; 0045 } 0046 0047 case DetectorNNModel::YOLO: 0048 { 0049 m_inferenceEngine = new DNNFaceDetectorYOLO; 0050 break; 0051 } 0052 0053 default: 0054 { 0055 qFatal("UNKNOWN neural network model"); 0056 } 0057 } 0058 } 0059 0060 OpenCVDNNFaceDetector::~OpenCVDNNFaceDetector() 0061 { 0062 delete m_inferenceEngine; 0063 } 0064 0065 int OpenCVDNNFaceDetector::recommendedImageSizeForDetection() 0066 { 0067 return 800; 0068 } 0069 0070 // TODO: prepareForDetection give different performances 0071 cv::Mat OpenCVDNNFaceDetector::prepareForDetection(const DImg& inputImage, cv::Size& paddedSize) const 0072 { 0073 if (inputImage.isNull() || !inputImage.size().isValid()) 0074 { 0075 return cv::Mat(); 0076 } 0077 0078 cv::Mat cvImage; 0079 int type = inputImage.sixteenBit() ? CV_16UC4 : CV_8UC4; 0080 cv::Mat cvImageWrapper = cv::Mat(inputImage.height(), inputImage.width(), type, inputImage.bits()); 0081 0082 if (inputImage.hasAlpha()) 0083 { 0084 cvtColor(cvImageWrapper, cvImage, cv::COLOR_RGBA2BGR); 0085 } 0086 else 0087 { 0088 cvtColor(cvImageWrapper, cvImage, cv::COLOR_RGB2BGR); 0089 } 0090 0091 if (type == CV_16UC4) 0092 { 0093 cvImage.convertTo(cvImage, CV_8UC3, 1 / 256.0); 0094 } 0095 0096 return prepareForDetection(cvImage, paddedSize); 0097 } 0098 0099 cv::Mat OpenCVDNNFaceDetector::prepareForDetection(const QImage& inputImage, cv::Size& paddedSize) const 0100 { 0101 if (inputImage.isNull() || !inputImage.size().isValid()) 0102 { 0103 return cv::Mat(); 0104 } 0105 0106 cv::Mat cvImage; 0107 cv::Mat cvImageWrapper; 0108 QImage qimage(inputImage); 0109 0110 switch (qimage.format()) 0111 { 0112 case QImage::Format_RGB32: 0113 case QImage::Format_ARGB32: 0114 case QImage::Format_ARGB32_Premultiplied: 0115 { 0116 // I think we can ignore premultiplication when converting to grayscale 0117 0118 cvImageWrapper = cv::Mat(qimage.height(), qimage.width(), CV_8UC4, 0119 qimage.scanLine(0), qimage.bytesPerLine()); 0120 cvtColor(cvImageWrapper, cvImage, cv::COLOR_RGBA2BGR); 0121 break; 0122 } 0123 0124 default: 0125 { 0126 qimage = qimage.convertToFormat(QImage::Format_RGB888); 0127 cvImageWrapper = cv::Mat(qimage.height(), qimage.width(), CV_8UC3, 0128 qimage.scanLine(0), qimage.bytesPerLine()); 0129 cvtColor(cvImageWrapper, cvImage, cv::COLOR_RGB2BGR); 0130 break; 0131 } 0132 } 0133 0134 return prepareForDetection(cvImage, paddedSize); 0135 } 0136 0137 cv::Mat OpenCVDNNFaceDetector::prepareForDetection(const QString& inputImagePath, cv::Size& paddedSize) const 0138 { 0139 std::vector<char> buffer; 0140 QFile file(inputImagePath); 0141 buffer.resize(file.size()); 0142 0143 if (!file.open(QIODevice::ReadOnly)) 0144 { 0145 return cv::Mat(); 0146 } 0147 0148 file.read(buffer.data(), file.size()); 0149 file.close(); 0150 0151 cv::Mat cvImage = cv::imdecode(std::vector<char>(buffer.begin(), buffer.end()), cv::IMREAD_COLOR); 0152 0153 return prepareForDetection(cvImage, paddedSize); 0154 } 0155 0156 cv::Mat OpenCVDNNFaceDetector::prepareForDetection(cv::Mat& cvImage, cv::Size& paddedSize) const 0157 { 0158 // Resize image before padding to fit in neural net 0159 0160 cv::Size inputImageSize = m_inferenceEngine->nnInputSizeRequired(); 0161 float k = qMin(inputImageSize.width * 1.0 / cvImage.cols, 0162 inputImageSize.height * 1.0 / cvImage.rows); 0163 0164 int newWidth = (int)(k * cvImage.cols); 0165 int newHeight = (int)(k * cvImage.rows); 0166 cv::resize(cvImage, cvImage, cv::Size(newWidth, newHeight)); 0167 0168 // Pad with black pixels 0169 0170 int padX = (inputImageSize.width - newWidth) / 2; 0171 int padY = (inputImageSize.height - newHeight) / 2; 0172 0173 cv::Mat imagePadded; 0174 0175 cv::copyMakeBorder(cvImage, imagePadded, 0176 padY, padY, 0177 padX, padX, 0178 cv::BORDER_CONSTANT, 0179 cv::Scalar(0, 0, 0)); 0180 0181 paddedSize = cv::Size(padX, padY); 0182 0183 return imagePadded; 0184 } 0185 0186 /** 0187 * There is no proof that doing this will help, since face can be detected at various positions (even half, masked faces 0188 * can be detected), not only frontal. Effort on doing this should be questioned. 0189 * TODO: Restructure and improve Face Detection module. 0190 0191 void OpenCVDNNFaceDetector::resizeBboxToStandardHumanFace(int& width, int& height) 0192 { 0193 // Human head sizes data 0194 // https://en.wikipedia.org/wiki/Human_head#Average_head_sizes 0195 0196 float maxRatioFrontalFace = 15.4 / 15.5; 0197 float minRatioNonFrontalFace = 8.6 / 21.6; 0198 0199 float r = width*1.0/height, rReference; 0200 0201 if ((r >= minRatioNonFrontalFace*0.9) && r <= (maxRatioFrontalFace * 1.1)) 0202 { 0203 rReference = r; 0204 } 0205 else if (r <= 0.25) 0206 { 0207 rReference = r * 1.5; 0208 } 0209 else if (r >= 4) 0210 { 0211 rReference = r / 1.5; 0212 } 0213 else if (r < minRatioNonFrontalFace * 0.9) 0214 { 0215 rReference = minRatioNonFrontalFace; 0216 } 0217 else if (r > maxRatioFrontalFace * 1.1) 0218 { 0219 rReference = maxRatioFrontalFace; 0220 } 0221 0222 if (width > height) 0223 { 0224 height = width / rReference; 0225 } 0226 else 0227 { 0228 width = height * rReference; 0229 } 0230 } 0231 */ 0232 0233 QList<QRect> OpenCVDNNFaceDetector::detectFaces(const cv::Mat& inputImage, 0234 const cv::Size& paddedSize) 0235 { 0236 std::vector<cv::Rect> detectedBboxes = cvDetectFaces(inputImage, paddedSize); 0237 0238 QList<QRect> results; 0239 /* 0240 cv::Mat imageTest = inputImage.clone(); 0241 */ 0242 for (const cv::Rect& bbox : detectedBboxes) 0243 { 0244 QRect rect(bbox.x, bbox.y, bbox.width, bbox.height); 0245 results << rect; 0246 /* 0247 qCDebug(DIGIKAM_FACESENGINE_LOG) << rect; 0248 cv::rectangle(imageTest, cv::Rect(bbox.x + paddedSize.width, 0249 bbox.y + paddedSize.height, 0250 bbox.width, bbox.height), cv::Scalar(0, 128, 0)); 0251 */ 0252 } 0253 /* 0254 cv::imshow("image", imageTest); 0255 cv::waitKey(0); 0256 */ 0257 return results; 0258 } 0259 0260 std::vector<cv::Rect> OpenCVDNNFaceDetector::cvDetectFaces(const cv::Mat& inputImage, 0261 const cv::Size& paddedSize) 0262 { 0263 std::vector<cv::Rect> detectedBboxes; 0264 0265 m_inferenceEngine->detectFaces(inputImage, paddedSize, detectedBboxes); 0266 0267 return detectedBboxes; 0268 } 0269 0270 } // namespace Digikam