File indexing completed on 2025-03-09 03:55:01
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam 0004 * 0005 * Date : 2020-05-22 0006 * Description : Wrapper of face recognition using OpenFace 0007 * 0008 * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com> 0009 * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0010 * SPDX-FileCopyrightText: 2020 by Nghia Duong <minhnghiaduong997 at gmail dot com> 0011 * 0012 * SPDX-License-Identifier: GPL-2.0-or-later 0013 * 0014 * ============================================================ */ 0015 0016 #ifndef OPENCV_DNN_FACERECOGNIZER_P_H 0017 #define OPENCV_DNN_FACERECOGNIZER_P_H 0018 0019 #include "opencvdnnfacerecognizer.h" 0020 0021 // C++ includes 0022 0023 #include <iostream> 0024 0025 // Qt includes 0026 0027 #include <QElapsedTimer> 0028 0029 // Local includes 0030 0031 #include "digikam_debug.h" 0032 #include "dnnfaceextractor.h" 0033 #include "facedbaccess.h" 0034 #include "facedb.h" 0035 #include "kd_tree.h" 0036 0037 namespace Digikam 0038 { 0039 0040 class Q_DECL_HIDDEN OpenCVDNNFaceRecognizer::Private 0041 { 0042 public: 0043 0044 Private(Classifier method) 0045 : method (method), 0046 tree (nullptr), 0047 kNeighbors (5), 0048 threshold (0.4), 0049 newDataAdded (true) 0050 { 0051 for (int i = 0 ; i < 1 ; ++i) 0052 { 0053 extractors << new DNNFaceExtractor; 0054 } 0055 0056 switch (method) 0057 { 0058 case SVM: 0059 { 0060 svm = cv::ml::SVM::create(); 0061 svm->setKernel(cv::ml::SVM::LINEAR); 0062 break; 0063 } 0064 0065 case OpenCV_KNN: 0066 { 0067 knn = cv::ml::KNearest::create(); 0068 knn->setAlgorithmType(cv::ml::KNearest::BRUTE_FORCE); 0069 knn->setIsClassifier(true); 0070 break; 0071 } 0072 0073 case Tree: 0074 { 0075 tree = FaceDbAccess().db()->reconstructTree(); 0076 break; 0077 } 0078 0079 case DB: 0080 { 0081 break; 0082 } 0083 0084 default: 0085 { 0086 qFatal("Invalid classifier"); 0087 } 0088 } 0089 } 0090 0091 ~Private() 0092 { 0093 QVector<DNNFaceExtractor*>::iterator extractor = extractors.begin(); 0094 0095 while (extractor != extractors.end()) 0096 { 0097 delete *extractor; 0098 extractor = extractors.erase(extractor); 0099 } 0100 0101 delete tree; 0102 } 0103 0104 public: 0105 0106 bool trainSVM(); 0107 bool trainKNN(); 0108 0109 int predictSVM(const cv::Mat& faceEmbedding); 0110 int predictKNN(const cv::Mat& faceEmbedding); 0111 0112 int predictKDTree(const cv::Mat& faceEmbedding) const; 0113 int predictDb(const cv::Mat& faceEmbedding) const; 0114 0115 bool insertData(const cv::Mat& position, const int label, const QString& context = QString()); 0116 0117 public: 0118 0119 Classifier method; 0120 0121 QVector<DNNFaceExtractor*> extractors; 0122 cv::Ptr<cv::ml::SVM> svm; 0123 cv::Ptr<cv::ml::KNearest> knn; 0124 0125 KDTree* tree; 0126 int kNeighbors; 0127 float threshold; 0128 0129 bool newDataAdded; 0130 0131 public: 0132 0133 class ParallelRecognizer; 0134 class ParallelTrainer; 0135 }; 0136 0137 class OpenCVDNNFaceRecognizer::Private::ParallelRecognizer : public cv::ParallelLoopBody 0138 { 0139 public: 0140 0141 ParallelRecognizer(OpenCVDNNFaceRecognizer::Private* d, 0142 const QList<QImage*>& images, 0143 QVector<int>& ids) 0144 : images (images), 0145 ids (ids), 0146 d (d) 0147 { 0148 ids.resize(images.size()); 0149 } 0150 0151 void operator()(const cv::Range& range) const override 0152 { 0153 for(int i = range.start ; i < range.end ; ++i) 0154 { 0155 int id = -1; 0156 0157 cv::Mat faceEmbedding = d->extractors[i%(d->extractors.size())]->getFaceEmbedding(OpenCVDNNFaceRecognizer::prepareForRecognition(*images[i])); 0158 0159 switch (d->method) 0160 { 0161 case SVM: 0162 { 0163 id = d->predictSVM(faceEmbedding); 0164 break; 0165 } 0166 0167 case OpenCV_KNN: 0168 { 0169 id = d->predictKNN(faceEmbedding); 0170 break; 0171 } 0172 0173 case Tree: 0174 { 0175 id = d->predictKDTree(faceEmbedding); 0176 break; 0177 } 0178 0179 case DB: 0180 { 0181 id = d->predictDb(faceEmbedding); 0182 break; 0183 } 0184 0185 default: 0186 { 0187 qCWarning(DIGIKAM_FACEDB_LOG) << "Not recognized classifying method"; 0188 } 0189 } 0190 0191 ids[i] = id; 0192 } 0193 } 0194 0195 private: 0196 0197 const QList<QImage*>& images; 0198 QVector<int>& ids; 0199 0200 OpenCVDNNFaceRecognizer::Private* const d; 0201 0202 private: 0203 0204 Q_DISABLE_COPY(ParallelRecognizer) 0205 }; 0206 0207 class OpenCVDNNFaceRecognizer::Private::ParallelTrainer: public cv::ParallelLoopBody 0208 { 0209 public: 0210 0211 ParallelTrainer(OpenCVDNNFaceRecognizer::Private* d, 0212 const QList<QImage*>& images, 0213 const int& id, 0214 const QString& context) 0215 : images (images), 0216 id (id), 0217 context (context), 0218 d (d) 0219 { 0220 } 0221 0222 void operator()(const cv::Range& range) const override 0223 { 0224 for(int i = range.start ; i < range.end ; ++i) 0225 { 0226 cv::Mat faceEmbedding = d->extractors[i%(d->extractors.size())]-> 0227 getFaceEmbedding(OpenCVDNNFaceRecognizer::prepareForRecognition(*images[i])); 0228 0229 if (!d->insertData(faceEmbedding, id, context)) 0230 { 0231 qCWarning(DIGIKAM_FACEDB_LOG) << "Fail to register a face of identity" << id; 0232 } 0233 } 0234 } 0235 0236 private: 0237 0238 const QList<QImage*>& images; 0239 const int& id; 0240 const QString& context; 0241 0242 OpenCVDNNFaceRecognizer::Private* const d; 0243 0244 private: 0245 0246 Q_DISABLE_COPY(ParallelTrainer) 0247 }; 0248 0249 bool OpenCVDNNFaceRecognizer::Private::trainSVM() 0250 { 0251 QElapsedTimer timer; 0252 timer.start(); 0253 0254 svm->train(FaceDbAccess().db()->trainData()); 0255 0256 qCDebug(DIGIKAM_FACEDB_LOG) << "Support vector machine trains in" << timer.elapsed() << "ms"; 0257 0258 return (svm->isTrained()); 0259 } 0260 0261 bool OpenCVDNNFaceRecognizer::Private::trainKNN() 0262 { 0263 QElapsedTimer timer; 0264 timer.start(); 0265 0266 knn->train(FaceDbAccess().db()->trainData()); 0267 0268 qCDebug(DIGIKAM_FACEDB_LOG) << "KNN trains in" << timer.elapsed() << "ms"; 0269 0270 return (knn->isTrained()); 0271 } 0272 0273 int OpenCVDNNFaceRecognizer::Private::predictSVM(const cv::Mat& faceEmbedding) 0274 { 0275 if (newDataAdded) 0276 { 0277 if (!trainSVM()) 0278 { 0279 return -1; 0280 } 0281 0282 newDataAdded = false; 0283 } 0284 0285 return (int(svm->predict(faceEmbedding))); 0286 } 0287 0288 int OpenCVDNNFaceRecognizer::Private::predictKNN(const cv::Mat& faceEmbedding) 0289 { 0290 if (newDataAdded) 0291 { 0292 if (!trainKNN()) 0293 { 0294 return -1; 0295 } 0296 0297 newDataAdded = false; 0298 } 0299 0300 cv::Mat output; 0301 knn->findNearest(faceEmbedding, kNeighbors, output); 0302 0303 return (int(output.at<float>(0))); 0304 } 0305 0306 int OpenCVDNNFaceRecognizer::Private::predictKDTree(const cv::Mat& faceEmbedding) const 0307 { 0308 if (!tree) 0309 { 0310 return -1; 0311 } 0312 0313 // Look for K-nearest neighbor which have the cosine distance greater than the threshold. 0314 0315 QMap<double, QVector<int> > closestNeighbors = tree->getClosestNeighbors(faceEmbedding, threshold, 0.8, kNeighbors); 0316 0317 QMap<int, QVector<double> > votingGroups; 0318 0319 for (QMap<double, QVector<int> >::const_iterator iter = closestNeighbors.cbegin(); 0320 iter != closestNeighbors.cend(); 0321 ++iter) 0322 { 0323 for (QVector<int>::const_iterator node = iter.value().cbegin(); 0324 node != iter.value().cend(); 0325 ++node) 0326 { 0327 int label = (*node); 0328 0329 votingGroups[label].append(iter.key()); 0330 } 0331 } 0332 0333 double maxScore = 0.0; 0334 int prediction = -1; 0335 0336 for (QMap<int, QVector<double> >::const_iterator group = votingGroups.cbegin(); 0337 group != votingGroups.cend(); 0338 ++group) 0339 { 0340 double score = 0.0; 0341 0342 for (int i = 0 ; i < group.value().size() ; ++i) 0343 { 0344 score += (threshold - group.value()[i]); 0345 } 0346 0347 if (score > maxScore) 0348 { 0349 maxScore = score; 0350 prediction = group.key(); 0351 } 0352 } 0353 0354 return prediction; 0355 } 0356 0357 int OpenCVDNNFaceRecognizer::Private::predictDb(const cv::Mat& faceEmbedding) const 0358 { 0359 QMap<double, QVector<int> > closestNeighbors = FaceDbAccess().db()->getClosestNeighborsTreeDb(faceEmbedding, threshold, 0.8, kNeighbors); 0360 0361 QMap<int, QVector<double> > votingGroups; 0362 0363 for (QMap<double, QVector<int> >::const_iterator iter = closestNeighbors.cbegin(); 0364 iter != closestNeighbors.cend(); 0365 ++iter) 0366 { 0367 for (int i = 0 ; i < iter.value().size() ; ++i) 0368 { 0369 votingGroups[iter.value()[i]].append(iter.key()); 0370 } 0371 } 0372 0373 double maxScore = 0.0; 0374 int prediction = -1; 0375 0376 for (QMap<int, QVector<double> >::const_iterator group = votingGroups.cbegin(); 0377 group != votingGroups.cend(); 0378 ++group) 0379 { 0380 double score = 0.0; 0381 0382 for (int i = 0 ; i < group.value().size() ; ++i) 0383 { 0384 score += (threshold - group.value()[i]); 0385 } 0386 0387 if (score > maxScore) 0388 { 0389 maxScore = score; 0390 prediction = group.key(); 0391 } 0392 } 0393 0394 return prediction; 0395 } 0396 0397 bool OpenCVDNNFaceRecognizer::Private::insertData(const cv::Mat& nodePos, const int label, const QString& context) 0398 { 0399 int nodeId = FaceDbAccess().db()->insertFaceVector(nodePos, label, context); 0400 0401 if (nodeId <= 0) 0402 { 0403 qCWarning(DIGIKAM_FACEDB_LOG) << "error inserting face embedding to database"; 0404 } 0405 0406 if (method == DB) 0407 { 0408 if (! FaceDbAccess().db()->insertToTreeDb(nodeId, nodePos)) 0409 { 0410 qCWarning(DIGIKAM_FACEDB_LOG) << "Error insert face embedding"; 0411 0412 return false; 0413 } 0414 } 0415 else if (method == Tree) 0416 { 0417 KDNode* const newNode = tree->add(nodePos, label); 0418 0419 if (newNode) 0420 { 0421 newNode->setNodeId(nodeId); 0422 } 0423 else 0424 { 0425 qCWarning(DIGIKAM_FACEDB_LOG) << "Error insert new node" << nodeId; 0426 0427 return false; 0428 } 0429 } 0430 0431 return true; 0432 } 0433 0434 } // namespace Digikam 0435 0436 #endif // OPENCV_DNN_FACERECOGNIZER_P_H