File indexing completed on 2025-03-09 03:55:00

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam
0004  *
0005  * Date        : 2019-06-01
0006  * Description : Face recognition using deep learning
0007  *               The internal DNN library interface
0008  *
0009  * SPDX-FileCopyrightText: 2019      by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0010  * SPDX-FileCopyrightText: 2020-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015 
0016 #include "dnnfaceextractor.h"
0017 
0018 // Qt includes
0019 
0020 #include <QMutex>
0021 #include <QString>
0022 #include <QFileInfo>
0023 #include <QDataStream>
0024 #include <QMutexLocker>
0025 #include <QElapsedTimer>
0026 #include <QStandardPaths>
0027 
0028 // Local includes
0029 
0030 #include "digikam_debug.h"
0031 #include "digikam_config.h"
0032 #include "recognitionpreprocessor.h"
0033 
0034 namespace Digikam
0035 {
0036 
0037 class Q_DECL_HIDDEN DNNFaceExtractor::Private
0038 {
0039 public:
0040 
0041     Private()
0042         : preprocessor      (nullptr),
0043           ref               (1),
0044 
0045           // As we use OpenFace, we need to set appropriate values for image color space and image size
0046 
0047           imageSize         (cv::Size(96, 96)),
0048           scaleFactor       (1.0F / 255.0F),
0049           meanValToSubtract (cv::Scalar(0.0, 0.0, 0.0))
0050     {
0051     }
0052 
0053     ~Private()
0054     {
0055         delete preprocessor;
0056     }
0057 
0058 public:
0059 
0060     RecognitionPreprocessor* preprocessor;
0061 
0062     int                      ref;
0063 
0064     cv::dnn::Net             net;
0065     QMutex                   mutex;
0066 
0067     cv::Size                 imageSize;
0068     float                    scaleFactor;
0069     cv::Scalar               meanValToSubtract;
0070 };
0071 
0072 DNNFaceExtractor::DNNFaceExtractor()
0073     : d(new Private)
0074 {
0075     loadModels();
0076 }
0077 
0078 DNNFaceExtractor::DNNFaceExtractor(const DNNFaceExtractor& other)
0079     : d(other.d)
0080 {
0081     ++(d->ref);
0082 }
0083 
0084 DNNFaceExtractor::~DNNFaceExtractor()
0085 {
0086     --(d->ref);
0087 
0088     if (d->ref == 0)
0089     {
0090         delete d;
0091     }
0092 }
0093 
0094 bool DNNFaceExtractor::loadModels()
0095 {
0096     QString appPath = QStandardPaths::locate(QStandardPaths::GenericDataLocation,
0097                                              QLatin1String("digikam/facesengine"),
0098                                              QStandardPaths::LocateDirectory);
0099 
0100 /*
0101     QString proto   = QLatin1String("ResNet-50-deploy.prototxt");
0102     QString model   = QLatin1String("ResNet-50-model.caffemodel");
0103 
0104     QString nnproto = appPath + QLatin1Char('/') + proto;
0105     QString nnmodel = appPath + QLatin1Char('/') + model;
0106 
0107     if (!nnproto.isEmpty() && !nnmodel.isEmpty())
0108     {
0109         qCDebug(DIGIKAM_FACEDB_LOG) << nnproto;
0110         qCDebug(DIGIKAM_FACEDB_LOG) << nnmodel;
0111 
0112         d->net = cv::dnn::readNetFromCaffe(nnproto.toStdString(), nnmodel.toStdString());
0113     }
0114     else
0115     {
0116         qCCritical(DIGIKAM_FACEDB_LOG) << "Cannot found faces engine DNN model" << proto << "or" << model;
0117         qCCritical(DIGIKAM_FACEDB_LOG) << "Faces recognition feature cannot be used!";
0118 
0119         return false;
0120     }
0121 */
0122 
0123     d->preprocessor = new RecognitionPreprocessor;
0124     d->preprocessor->init(PreprocessorSelection::OPENFACE);
0125 
0126     QString model   = QLatin1String("openface_nn4.small2.v1.t7");
0127     QString nnmodel = appPath + QLatin1Char('/') + model;
0128 
0129     if (QFileInfo::exists(nnmodel))
0130     {
0131         try
0132         {
0133             qCDebug(DIGIKAM_FACEDB_LOG) << "Extractor model:" << nnmodel;
0134 
0135 #ifdef Q_OS_WIN
0136 
0137             d->net = cv::dnn::readNetFromTorch(nnmodel.toLocal8Bit().constData());
0138 
0139 #else
0140 
0141             d->net = cv::dnn::readNetFromTorch(nnmodel.toStdString());
0142 
0143 #endif
0144 
0145 #if (OPENCV_VERSION == QT_VERSION_CHECK(4, 7, 0))
0146 
0147             d->net.enableWinograd(false);
0148 
0149 #endif
0150 
0151         }
0152         catch (cv::Exception& e)
0153         {
0154             qCWarning(DIGIKAM_FACEDB_LOG) << "cv::Exception:" << e.what();
0155 
0156             return false;
0157         }
0158         catch (...)
0159         {
0160            qCWarning(DIGIKAM_FACEDB_LOG) << "Default exception from OpenCV";
0161 
0162            return false;
0163         }
0164     }
0165     else
0166     {
0167         qCCritical(DIGIKAM_FACEDB_LOG) << "Cannot found faces engine DNN model" << model;
0168         qCCritical(DIGIKAM_FACEDB_LOG) << "Faces recognition feature cannot be used!";
0169 
0170         return false;
0171     }
0172 
0173     return true;
0174 }
0175 
0176 double DNNFaceExtractor::cosineDistance(std::vector<float> v1,
0177                                         std::vector<float> v2)
0178 {
0179     Q_ASSERT(v1.size() == v2.size());
0180 
0181     double scalarProduct = std::inner_product(v1.begin(), v1.end(), v2.begin(), 0.0);
0182     double normV1        = sqrt(std::inner_product(v1.begin(), v1.end(), v1.begin(), 0.0));
0183     double normV2        = sqrt(std::inner_product(v2.begin(), v2.end(), v2.begin(), 0.0));
0184 
0185     return (scalarProduct / (normV1 * normV2));
0186 }
0187 
0188 double DNNFaceExtractor::L2squareDistance(std::vector<float> v1,
0189                                           std::vector<float> v2)
0190 {
0191     Q_ASSERT(v1.size() == v2.size());
0192 
0193     double sqrDistance = 0.0;
0194 
0195     for (size_t i = 0 ; i < v1.size() ; ++i)
0196     {
0197         sqrDistance += pow((v1[i] - v2[i]), 2);
0198     }
0199 
0200     return sqrDistance;
0201 }
0202 
0203 double DNNFaceExtractor::L2squareNormDistance(std::vector<float> v1, std::vector<float> v2)
0204 {
0205     Q_ASSERT(v1.size() == v2.size());
0206 
0207     double normV1      = sqrt(std::inner_product(v1.begin(), v1.end(), v1.begin(), 0.0));
0208     double normV2      = sqrt(std::inner_product(v2.begin(), v2.end(), v2.begin(), 0.0));
0209     double sqrDistance = 0.0;
0210 
0211     for (size_t i = 0 ; i < v1.size() ; ++i)
0212     {
0213         sqrDistance += pow((v1[i]/normV1 - v2[i]/normV2), 2);
0214     }
0215 
0216     return sqrDistance;
0217 }
0218 
0219 cv::Mat DNNFaceExtractor::vectortomat(const std::vector<float>& vector)
0220 {
0221     cv::Mat mat(1, vector.size(), 5);
0222 
0223     memcpy(mat.data, vector.data(), vector.size()*sizeof(float));
0224 
0225     return mat;
0226 }
0227 
0228 QJsonArray DNNFaceExtractor::encodeVector(const std::vector<float>& vector)
0229 {
0230     QJsonArray array;
0231 
0232     for (size_t i = 0 ; i < vector.size() ; ++i)
0233     {
0234         array << vector[i];
0235     }
0236 
0237     return array;
0238 }
0239 
0240 std::vector<float> DNNFaceExtractor::decodeVector(const QJsonArray& json)
0241 {
0242     std::vector<float> vector;
0243 
0244     for (int i = 0 ; i < json.size() ; ++i)
0245     {
0246         vector.push_back(static_cast<float>(json[i].toDouble()));
0247     }
0248 
0249     return vector;
0250 }
0251 
0252 
0253 cv::Mat DNNFaceExtractor::alignFace(const cv::Mat& inputImage) const
0254 {
0255     return d->preprocessor->preprocess(inputImage);
0256 }
0257 
0258 cv::Mat DNNFaceExtractor::getFaceEmbedding(const cv::Mat& faceImage)
0259 {
0260     cv::Mat face_descriptors;
0261     cv::Mat alignedFace;
0262 /*
0263     qCDebug(DIGIKAM_FACEDB_LOG) << "faceImage channels: " << faceImage.channels();
0264     qCDebug(DIGIKAM_FACEDB_LOG) << "faceImage size: (" << faceImage.rows << ", " << faceImage.cols << ")\n";
0265 */
0266     QElapsedTimer timer;
0267 
0268     timer.start();
0269 /*
0270     alignedFace = faceImage;
0271 */
0272     alignedFace = d->preprocessor->preprocess(faceImage);
0273 
0274     qCDebug(DIGIKAM_FACEDB_LOG) << "Finish aligning face in " << timer.elapsed() << " ms";
0275     qCDebug(DIGIKAM_FACEDB_LOG) << "Start neural network";
0276 
0277     timer.start();
0278 
0279     cv::Mat blob = cv::dnn::blobFromImage(alignedFace, d->scaleFactor, d->imageSize, cv::Scalar(), true, false);
0280 
0281     if (!d->net.empty())
0282     {
0283         QMutexLocker lock(&d->mutex);
0284         d->net.setInput(blob);
0285         face_descriptors = d->net.forward();
0286     }
0287 
0288     qCDebug(DIGIKAM_FACEDB_LOG) << "Finish computing face embedding in "
0289                                 << timer.elapsed() << " ms";
0290 
0291 /*
0292     cv::Mat blob = cv::dnn::blobFromImage(faceImage, 1.0 / 255, cv::Size(96, 96), cv::Scalar(0,0,0), false, true, CV_32F); // work for openface.nn4
0293     cv::Mat blob = cv::dnn::blobFromImage(faceImage, 1.0 / 255, cv::Size(224,224), cv::Scalar(0,0,0), false, true, CV_32F);
0294     net.setInput(blob);
0295     cv::Mat face_descriptors = net.forward();
0296 */
0297 
0298     return face_descriptors;
0299 }
0300 
0301 } // namespace Digikam