File indexing completed on 2025-03-09 03:55:00
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam 0004 * 0005 * Date : 2019-06-01 0006 * Description : Face recognition using deep learning 0007 * The internal DNN library interface 0008 * 0009 * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com> 0010 * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0011 * 0012 * SPDX-License-Identifier: GPL-2.0-or-later 0013 * 0014 * ============================================================ */ 0015 0016 #include "dnnfaceextractor.h" 0017 0018 // Qt includes 0019 0020 #include <QMutex> 0021 #include <QString> 0022 #include <QFileInfo> 0023 #include <QDataStream> 0024 #include <QMutexLocker> 0025 #include <QElapsedTimer> 0026 #include <QStandardPaths> 0027 0028 // Local includes 0029 0030 #include "digikam_debug.h" 0031 #include "digikam_config.h" 0032 #include "recognitionpreprocessor.h" 0033 0034 namespace Digikam 0035 { 0036 0037 class Q_DECL_HIDDEN DNNFaceExtractor::Private 0038 { 0039 public: 0040 0041 Private() 0042 : preprocessor (nullptr), 0043 ref (1), 0044 0045 // As we use OpenFace, we need to set appropriate values for image color space and image size 0046 0047 imageSize (cv::Size(96, 96)), 0048 scaleFactor (1.0F / 255.0F), 0049 meanValToSubtract (cv::Scalar(0.0, 0.0, 0.0)) 0050 { 0051 } 0052 0053 ~Private() 0054 { 0055 delete preprocessor; 0056 } 0057 0058 public: 0059 0060 RecognitionPreprocessor* preprocessor; 0061 0062 int ref; 0063 0064 cv::dnn::Net net; 0065 QMutex mutex; 0066 0067 cv::Size imageSize; 0068 float scaleFactor; 0069 cv::Scalar meanValToSubtract; 0070 }; 0071 0072 DNNFaceExtractor::DNNFaceExtractor() 0073 : d(new Private) 0074 { 0075 loadModels(); 0076 } 0077 0078 DNNFaceExtractor::DNNFaceExtractor(const DNNFaceExtractor& other) 0079 : d(other.d) 0080 { 0081 ++(d->ref); 0082 } 0083 0084 DNNFaceExtractor::~DNNFaceExtractor() 0085 { 0086 --(d->ref); 0087 0088 if (d->ref == 0) 0089 { 0090 delete d; 0091 } 0092 } 0093 0094 bool DNNFaceExtractor::loadModels() 0095 { 0096 QString appPath = QStandardPaths::locate(QStandardPaths::GenericDataLocation, 0097 QLatin1String("digikam/facesengine"), 0098 QStandardPaths::LocateDirectory); 0099 0100 /* 0101 QString proto = QLatin1String("ResNet-50-deploy.prototxt"); 0102 QString model = QLatin1String("ResNet-50-model.caffemodel"); 0103 0104 QString nnproto = appPath + QLatin1Char('/') + proto; 0105 QString nnmodel = appPath + QLatin1Char('/') + model; 0106 0107 if (!nnproto.isEmpty() && !nnmodel.isEmpty()) 0108 { 0109 qCDebug(DIGIKAM_FACEDB_LOG) << nnproto; 0110 qCDebug(DIGIKAM_FACEDB_LOG) << nnmodel; 0111 0112 d->net = cv::dnn::readNetFromCaffe(nnproto.toStdString(), nnmodel.toStdString()); 0113 } 0114 else 0115 { 0116 qCCritical(DIGIKAM_FACEDB_LOG) << "Cannot found faces engine DNN model" << proto << "or" << model; 0117 qCCritical(DIGIKAM_FACEDB_LOG) << "Faces recognition feature cannot be used!"; 0118 0119 return false; 0120 } 0121 */ 0122 0123 d->preprocessor = new RecognitionPreprocessor; 0124 d->preprocessor->init(PreprocessorSelection::OPENFACE); 0125 0126 QString model = QLatin1String("openface_nn4.small2.v1.t7"); 0127 QString nnmodel = appPath + QLatin1Char('/') + model; 0128 0129 if (QFileInfo::exists(nnmodel)) 0130 { 0131 try 0132 { 0133 qCDebug(DIGIKAM_FACEDB_LOG) << "Extractor model:" << nnmodel; 0134 0135 #ifdef Q_OS_WIN 0136 0137 d->net = cv::dnn::readNetFromTorch(nnmodel.toLocal8Bit().constData()); 0138 0139 #else 0140 0141 d->net = cv::dnn::readNetFromTorch(nnmodel.toStdString()); 0142 0143 #endif 0144 0145 #if (OPENCV_VERSION == QT_VERSION_CHECK(4, 7, 0)) 0146 0147 d->net.enableWinograd(false); 0148 0149 #endif 0150 0151 } 0152 catch (cv::Exception& e) 0153 { 0154 qCWarning(DIGIKAM_FACEDB_LOG) << "cv::Exception:" << e.what(); 0155 0156 return false; 0157 } 0158 catch (...) 0159 { 0160 qCWarning(DIGIKAM_FACEDB_LOG) << "Default exception from OpenCV"; 0161 0162 return false; 0163 } 0164 } 0165 else 0166 { 0167 qCCritical(DIGIKAM_FACEDB_LOG) << "Cannot found faces engine DNN model" << model; 0168 qCCritical(DIGIKAM_FACEDB_LOG) << "Faces recognition feature cannot be used!"; 0169 0170 return false; 0171 } 0172 0173 return true; 0174 } 0175 0176 double DNNFaceExtractor::cosineDistance(std::vector<float> v1, 0177 std::vector<float> v2) 0178 { 0179 Q_ASSERT(v1.size() == v2.size()); 0180 0181 double scalarProduct = std::inner_product(v1.begin(), v1.end(), v2.begin(), 0.0); 0182 double normV1 = sqrt(std::inner_product(v1.begin(), v1.end(), v1.begin(), 0.0)); 0183 double normV2 = sqrt(std::inner_product(v2.begin(), v2.end(), v2.begin(), 0.0)); 0184 0185 return (scalarProduct / (normV1 * normV2)); 0186 } 0187 0188 double DNNFaceExtractor::L2squareDistance(std::vector<float> v1, 0189 std::vector<float> v2) 0190 { 0191 Q_ASSERT(v1.size() == v2.size()); 0192 0193 double sqrDistance = 0.0; 0194 0195 for (size_t i = 0 ; i < v1.size() ; ++i) 0196 { 0197 sqrDistance += pow((v1[i] - v2[i]), 2); 0198 } 0199 0200 return sqrDistance; 0201 } 0202 0203 double DNNFaceExtractor::L2squareNormDistance(std::vector<float> v1, std::vector<float> v2) 0204 { 0205 Q_ASSERT(v1.size() == v2.size()); 0206 0207 double normV1 = sqrt(std::inner_product(v1.begin(), v1.end(), v1.begin(), 0.0)); 0208 double normV2 = sqrt(std::inner_product(v2.begin(), v2.end(), v2.begin(), 0.0)); 0209 double sqrDistance = 0.0; 0210 0211 for (size_t i = 0 ; i < v1.size() ; ++i) 0212 { 0213 sqrDistance += pow((v1[i]/normV1 - v2[i]/normV2), 2); 0214 } 0215 0216 return sqrDistance; 0217 } 0218 0219 cv::Mat DNNFaceExtractor::vectortomat(const std::vector<float>& vector) 0220 { 0221 cv::Mat mat(1, vector.size(), 5); 0222 0223 memcpy(mat.data, vector.data(), vector.size()*sizeof(float)); 0224 0225 return mat; 0226 } 0227 0228 QJsonArray DNNFaceExtractor::encodeVector(const std::vector<float>& vector) 0229 { 0230 QJsonArray array; 0231 0232 for (size_t i = 0 ; i < vector.size() ; ++i) 0233 { 0234 array << vector[i]; 0235 } 0236 0237 return array; 0238 } 0239 0240 std::vector<float> DNNFaceExtractor::decodeVector(const QJsonArray& json) 0241 { 0242 std::vector<float> vector; 0243 0244 for (int i = 0 ; i < json.size() ; ++i) 0245 { 0246 vector.push_back(static_cast<float>(json[i].toDouble())); 0247 } 0248 0249 return vector; 0250 } 0251 0252 0253 cv::Mat DNNFaceExtractor::alignFace(const cv::Mat& inputImage) const 0254 { 0255 return d->preprocessor->preprocess(inputImage); 0256 } 0257 0258 cv::Mat DNNFaceExtractor::getFaceEmbedding(const cv::Mat& faceImage) 0259 { 0260 cv::Mat face_descriptors; 0261 cv::Mat alignedFace; 0262 /* 0263 qCDebug(DIGIKAM_FACEDB_LOG) << "faceImage channels: " << faceImage.channels(); 0264 qCDebug(DIGIKAM_FACEDB_LOG) << "faceImage size: (" << faceImage.rows << ", " << faceImage.cols << ")\n"; 0265 */ 0266 QElapsedTimer timer; 0267 0268 timer.start(); 0269 /* 0270 alignedFace = faceImage; 0271 */ 0272 alignedFace = d->preprocessor->preprocess(faceImage); 0273 0274 qCDebug(DIGIKAM_FACEDB_LOG) << "Finish aligning face in " << timer.elapsed() << " ms"; 0275 qCDebug(DIGIKAM_FACEDB_LOG) << "Start neural network"; 0276 0277 timer.start(); 0278 0279 cv::Mat blob = cv::dnn::blobFromImage(alignedFace, d->scaleFactor, d->imageSize, cv::Scalar(), true, false); 0280 0281 if (!d->net.empty()) 0282 { 0283 QMutexLocker lock(&d->mutex); 0284 d->net.setInput(blob); 0285 face_descriptors = d->net.forward(); 0286 } 0287 0288 qCDebug(DIGIKAM_FACEDB_LOG) << "Finish computing face embedding in " 0289 << timer.elapsed() << " ms"; 0290 0291 /* 0292 cv::Mat blob = cv::dnn::blobFromImage(faceImage, 1.0 / 255, cv::Size(96, 96), cv::Scalar(0,0,0), false, true, CV_32F); // work for openface.nn4 0293 cv::Mat blob = cv::dnn::blobFromImage(faceImage, 1.0 / 255, cv::Size(224,224), cv::Scalar(0,0,0), false, true, CV_32F); 0294 net.setInput(blob); 0295 cv::Mat face_descriptors = net.forward(); 0296 */ 0297 0298 return face_descriptors; 0299 } 0300 0301 } // namespace Digikam