File indexing completed on 2025-03-09 03:55:01

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam
0004  *
0005  * Date        : 2020-05-22
0006  * Description : Wrapper of face recognition using OpenFace
0007  *
0008  * SPDX-FileCopyrightText: 2019      by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0009  * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0010  * SPDX-FileCopyrightText: 2020      by Nghia Duong <minhnghiaduong997 at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015 
0016 #ifndef OPENCV_DNN_FACERECOGNIZER_P_H
0017 #define OPENCV_DNN_FACERECOGNIZER_P_H
0018 
0019 #include "opencvdnnfacerecognizer.h"
0020 
0021 // C++ includes
0022 
0023 #include <iostream>
0024 
0025 // Qt includes
0026 
0027 #include <QElapsedTimer>
0028 
0029 // Local includes
0030 
0031 #include "digikam_debug.h"
0032 #include "dnnfaceextractor.h"
0033 #include "facedbaccess.h"
0034 #include "facedb.h"
0035 #include "kd_tree.h"
0036 
0037 namespace Digikam
0038 {
0039 
0040 class Q_DECL_HIDDEN OpenCVDNNFaceRecognizer::Private
0041 {
0042 public:
0043 
0044     Private(Classifier method)
0045         : method        (method),
0046           tree          (nullptr),
0047           kNeighbors    (5),
0048           threshold     (0.4),
0049           newDataAdded  (true)
0050     {
0051         for (int i = 0 ; i < 1 ; ++i)
0052         {
0053             extractors << new DNNFaceExtractor;
0054         }
0055 
0056         switch (method)
0057         {
0058             case SVM:
0059             {
0060                 svm = cv::ml::SVM::create();
0061                 svm->setKernel(cv::ml::SVM::LINEAR);
0062                 break;
0063             }
0064 
0065             case OpenCV_KNN:
0066             {
0067                 knn = cv::ml::KNearest::create();
0068                 knn->setAlgorithmType(cv::ml::KNearest::BRUTE_FORCE);
0069                 knn->setIsClassifier(true);
0070                 break;
0071             }
0072 
0073             case Tree:
0074             {
0075                 tree = FaceDbAccess().db()->reconstructTree();
0076                 break;
0077             }
0078 
0079             case DB:
0080             {
0081                 break;
0082             }
0083 
0084             default:
0085             {
0086                 qFatal("Invalid classifier");
0087             }
0088         }
0089     }
0090 
0091     ~Private()
0092     {
0093         QVector<DNNFaceExtractor*>::iterator extractor = extractors.begin();
0094 
0095         while (extractor != extractors.end())
0096         {
0097             delete *extractor;
0098             extractor = extractors.erase(extractor);
0099         }
0100 
0101         delete tree;
0102     }
0103 
0104 public:
0105 
0106     bool trainSVM();
0107     bool trainKNN();
0108 
0109     int predictSVM(const cv::Mat& faceEmbedding);
0110     int predictKNN(const cv::Mat& faceEmbedding);
0111 
0112     int predictKDTree(const cv::Mat& faceEmbedding) const;
0113     int predictDb(const cv::Mat& faceEmbedding) const;
0114 
0115     bool insertData(const cv::Mat& position, const int label, const QString& context = QString());
0116 
0117 public:
0118 
0119     Classifier                 method;
0120 
0121     QVector<DNNFaceExtractor*> extractors;
0122     cv::Ptr<cv::ml::SVM>       svm;
0123     cv::Ptr<cv::ml::KNearest>  knn;
0124 
0125     KDTree*                    tree;
0126     int                        kNeighbors;
0127     float                      threshold;
0128 
0129     bool                       newDataAdded;
0130 
0131 public:
0132 
0133     class ParallelRecognizer;
0134     class ParallelTrainer;
0135 };
0136 
0137 class OpenCVDNNFaceRecognizer::Private::ParallelRecognizer : public cv::ParallelLoopBody
0138 {
0139 public:
0140 
0141     ParallelRecognizer(OpenCVDNNFaceRecognizer::Private* d,
0142                        const QList<QImage*>& images,
0143                        QVector<int>& ids)
0144         : images    (images),
0145           ids       (ids),
0146           d         (d)
0147     {
0148         ids.resize(images.size());
0149     }
0150 
0151     void operator()(const cv::Range& range) const override
0152     {
0153         for(int i = range.start ; i < range.end ; ++i)
0154         {
0155             int id = -1;
0156 
0157             cv::Mat faceEmbedding = d->extractors[i%(d->extractors.size())]->getFaceEmbedding(OpenCVDNNFaceRecognizer::prepareForRecognition(*images[i]));
0158 
0159             switch (d->method)
0160             {
0161                 case SVM:
0162                 {
0163                     id = d->predictSVM(faceEmbedding);
0164                     break;
0165                 }
0166 
0167                 case OpenCV_KNN:
0168                 {
0169                     id = d->predictKNN(faceEmbedding);
0170                     break;
0171                 }
0172 
0173                 case Tree:
0174                 {
0175                     id = d->predictKDTree(faceEmbedding);
0176                     break;
0177                 }
0178 
0179                 case DB:
0180                 {
0181                     id = d->predictDb(faceEmbedding);
0182                     break;
0183                 }
0184 
0185                 default:
0186                 {
0187                     qCWarning(DIGIKAM_FACEDB_LOG) << "Not recognized classifying method";
0188                 }
0189             }
0190 
0191             ids[i] = id;
0192         }
0193     }
0194 
0195 private:
0196 
0197     const QList<QImage*>&                   images;
0198     QVector<int>&                           ids;
0199 
0200     OpenCVDNNFaceRecognizer::Private* const d;
0201 
0202 private:
0203 
0204     Q_DISABLE_COPY(ParallelRecognizer)
0205 };
0206 
0207 class OpenCVDNNFaceRecognizer::Private::ParallelTrainer: public cv::ParallelLoopBody
0208 {
0209 public:
0210 
0211     ParallelTrainer(OpenCVDNNFaceRecognizer::Private* d,
0212                     const QList<QImage*>& images,
0213                     const int& id,
0214                     const QString& context)
0215         : images    (images),
0216           id        (id),
0217           context   (context),
0218           d         (d)
0219     {
0220     }
0221 
0222     void operator()(const cv::Range& range) const override
0223     {
0224         for(int i = range.start ; i < range.end ; ++i)
0225         {
0226             cv::Mat faceEmbedding = d->extractors[i%(d->extractors.size())]->
0227                 getFaceEmbedding(OpenCVDNNFaceRecognizer::prepareForRecognition(*images[i]));
0228 
0229             if (!d->insertData(faceEmbedding, id, context))
0230             {
0231                 qCWarning(DIGIKAM_FACEDB_LOG) << "Fail to register a face of identity" << id;
0232             }
0233         }
0234     }
0235 
0236 private:
0237 
0238     const QList<QImage*>&                   images;
0239     const int&                              id;
0240     const QString&                          context;
0241 
0242     OpenCVDNNFaceRecognizer::Private* const d;
0243 
0244 private:
0245 
0246     Q_DISABLE_COPY(ParallelTrainer)
0247 };
0248 
0249 bool OpenCVDNNFaceRecognizer::Private::trainSVM()
0250 {
0251     QElapsedTimer timer;
0252     timer.start();
0253 
0254     svm->train(FaceDbAccess().db()->trainData());
0255 
0256     qCDebug(DIGIKAM_FACEDB_LOG) << "Support vector machine trains in" << timer.elapsed() << "ms";
0257 
0258     return (svm->isTrained());
0259 }
0260 
0261 bool OpenCVDNNFaceRecognizer::Private::trainKNN()
0262 {
0263     QElapsedTimer timer;
0264     timer.start();
0265 
0266     knn->train(FaceDbAccess().db()->trainData());
0267 
0268     qCDebug(DIGIKAM_FACEDB_LOG) << "KNN trains in" << timer.elapsed() << "ms";
0269 
0270     return (knn->isTrained());
0271 }
0272 
0273 int OpenCVDNNFaceRecognizer::Private::predictSVM(const cv::Mat& faceEmbedding)
0274 {
0275     if (newDataAdded)
0276     {
0277         if (!trainSVM())
0278         {
0279             return -1;
0280         }
0281 
0282         newDataAdded = false;
0283     }
0284 
0285     return (int(svm->predict(faceEmbedding)));
0286 }
0287 
0288 int OpenCVDNNFaceRecognizer::Private::predictKNN(const cv::Mat& faceEmbedding)
0289 {
0290     if (newDataAdded)
0291     {
0292         if (!trainKNN())
0293         {
0294             return -1;
0295         }
0296 
0297         newDataAdded = false;
0298     }
0299 
0300     cv::Mat output;
0301     knn->findNearest(faceEmbedding, kNeighbors, output);
0302 
0303     return (int(output.at<float>(0)));
0304 }
0305 
0306 int OpenCVDNNFaceRecognizer::Private::predictKDTree(const cv::Mat& faceEmbedding) const
0307 {
0308     if (!tree)
0309     {
0310         return -1;
0311     }
0312 
0313     // Look for K-nearest neighbor which have the cosine distance greater than the threshold.
0314 
0315     QMap<double, QVector<int> > closestNeighbors = tree->getClosestNeighbors(faceEmbedding, threshold, 0.8, kNeighbors);
0316 
0317     QMap<int, QVector<double> > votingGroups;
0318 
0319     for (QMap<double, QVector<int> >::const_iterator iter  = closestNeighbors.cbegin();
0320                                                      iter != closestNeighbors.cend();
0321                                                      ++iter)
0322     {
0323         for (QVector<int>::const_iterator node  = iter.value().cbegin();
0324                                           node != iter.value().cend();
0325                                           ++node)
0326         {
0327             int label = (*node);
0328 
0329             votingGroups[label].append(iter.key());
0330         }
0331     }
0332 
0333     double maxScore = 0.0;
0334     int prediction  = -1;
0335 
0336     for (QMap<int, QVector<double> >::const_iterator group  = votingGroups.cbegin();
0337                                                      group != votingGroups.cend();
0338                                                      ++group)
0339     {
0340         double score = 0.0;
0341 
0342         for (int i = 0 ; i < group.value().size() ; ++i)
0343         {
0344             score += (threshold - group.value()[i]);
0345         }
0346 
0347         if (score > maxScore)
0348         {
0349             maxScore   = score;
0350             prediction = group.key();
0351         }
0352     }
0353 
0354     return prediction;
0355 }
0356 
0357 int OpenCVDNNFaceRecognizer::Private::predictDb(const cv::Mat& faceEmbedding) const
0358 {
0359     QMap<double, QVector<int> > closestNeighbors = FaceDbAccess().db()->getClosestNeighborsTreeDb(faceEmbedding, threshold, 0.8, kNeighbors);
0360 
0361     QMap<int, QVector<double> > votingGroups;
0362 
0363     for (QMap<double, QVector<int> >::const_iterator iter  = closestNeighbors.cbegin();
0364                                                      iter != closestNeighbors.cend();
0365                                                      ++iter)
0366     {
0367         for (int i = 0 ; i < iter.value().size() ; ++i)
0368         {
0369             votingGroups[iter.value()[i]].append(iter.key());
0370         }
0371     }
0372 
0373     double maxScore = 0.0;
0374     int prediction  = -1;
0375 
0376     for (QMap<int, QVector<double> >::const_iterator group  = votingGroups.cbegin();
0377                                                      group != votingGroups.cend();
0378                                                      ++group)
0379     {
0380         double score = 0.0;
0381 
0382         for (int i = 0 ; i < group.value().size() ; ++i)
0383         {
0384             score += (threshold - group.value()[i]);
0385         }
0386 
0387         if (score > maxScore)
0388         {
0389             maxScore   = score;
0390             prediction = group.key();
0391         }
0392     }
0393 
0394     return prediction;
0395 }
0396 
0397 bool OpenCVDNNFaceRecognizer::Private::insertData(const cv::Mat& nodePos, const int label, const QString& context)
0398 {
0399     int nodeId = FaceDbAccess().db()->insertFaceVector(nodePos, label, context);
0400 
0401     if (nodeId <= 0)
0402     {
0403         qCWarning(DIGIKAM_FACEDB_LOG) << "error inserting face embedding to database";
0404     }
0405 
0406     if      (method == DB)
0407     {
0408         if (! FaceDbAccess().db()->insertToTreeDb(nodeId, nodePos))
0409         {
0410             qCWarning(DIGIKAM_FACEDB_LOG) << "Error insert face embedding";
0411 
0412             return false;
0413         }
0414     }
0415     else if (method == Tree)
0416     {
0417         KDNode* const newNode = tree->add(nodePos, label);
0418 
0419         if (newNode)
0420         {
0421             newNode->setNodeId(nodeId);
0422         }
0423         else
0424         {
0425             qCWarning(DIGIKAM_FACEDB_LOG) << "Error insert new node" << nodeId;
0426 
0427             return false;
0428         }
0429     }
0430 
0431     return true;
0432 }
0433 
0434 } // namespace Digikam
0435 
0436 #endif // OPENCV_DNN_FACERECOGNIZER_P_H