preprocessing/recognition/openfacepreprocessor.cpp

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2019-07-09
0007  * Description : Preprocessor for openface nn model
0008  *
0009  * SPDX-FileCopyrightText: 2019      by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0010  * SPDX-FileCopyrightText: 2019-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015
0016 #include "openfacepreprocessor.h"
0017
0018 // Qt includes
0019
0020 #include <QFile>
0021 #include <QTime>
0022 #include <QString>
0023 #include <QDataStream>
0024 #include <QStandardPaths>
0025
0026 // Local includes
0027
0028 #include "digikam_debug.h"
0029 #include "fullobjectdetection.h"
0030
0031 namespace Digikam
0032 {
0033
0034 // --------------------------------------- Static global variables -----------------------------------
0035
0036 /** Template for face landmark to perform alignment with open face
0037   * This variable must be declared as static so that it is allocated as long as
0038   * digiKam is still running. We need that because this variable is the internal data
0039   * for matrix faceTemplate below.
0040   */
0041 static float FACE_TEMPLATE[3][2] = {
0042                                        {18.639072F, 16.249624F},
0043                                        {75.73048F,  15.18443F },
0044                                        {47.515285F, 49.38637F }
0045                                    };
0046
0047 // ---------------------------------------------------------------------------------------------------
0048
0049 OpenfacePreprocessor::OpenfacePreprocessor()
0050     : outImageSize          (cv::Size(96, 96)),
0051       faceTemplate          (cv::Mat(3, 2, CV_32F, &FACE_TEMPLATE)),
0052       outerEyesNosePositions( {36, 45, 33} )
0053 {
0054 }
0055
0056 OpenfacePreprocessor::~OpenfacePreprocessor()
0057 {
0058 }
0059
0060 bool OpenfacePreprocessor::loadModels()
0061 {
0062     QString appPath = QStandardPaths::locate(QStandardPaths::GenericDataLocation,
0063                                              QLatin1String("digikam/facesengine"),
0064                                              QStandardPaths::LocateDirectory);
0065
0066     QString data    = QLatin1String("shapepredictor.dat");
0067     QString spdata  = appPath + QLatin1Char('/') + data;
0068
0069     QFile model(spdata);
0070     RedEye::ShapePredictor* const temp = new RedEye::ShapePredictor();
0071
0072     qCDebug(DIGIKAM_FACEDB_LOG) << "Start reading shape predictor file";
0073
0074     if (model.open(QIODevice::ReadOnly))
0075     {
0076         QDataStream dataStream(&model);
0077         dataStream.setFloatingPointPrecision(QDataStream::SinglePrecision);
0078         dataStream >> *temp;
0079         sp = *temp;
0080         model.close();
0081     }
0082     else
0083     {
0084         delete temp;
0085
0086         qCCritical(DIGIKAM_FACEDB_LOG) << "Cannot found faces engine model" << data;
0087         qCCritical(DIGIKAM_FACEDB_LOG) << "Faces recognition feature cannot be used!";
0088
0089         return false;
0090     }
0091
0092     delete temp;
0093
0094     qCDebug(DIGIKAM_FACEDB_LOG) << "Finish reading shape predictor file";
0095
0096     return true;
0097 }
0098
0099 cv::Mat OpenfacePreprocessor::process(const cv::Mat& image)
0100 {
0101     if (!sp.num_parts())
0102     {
0103         return image;
0104     }
0105
0106     int type = image.type();
0107     qCDebug(DIGIKAM_FACEDB_LOG) << "type: " << type;
0108
0109     cv::Mat gray;
0110
0111     if ((type == CV_8UC3) || (type == CV_16UC3))
0112     {
0113         cv::cvtColor(image, gray, CV_RGB2GRAY);   // 3 channels
0114     }
0115     else
0116     {
0117         cv::cvtColor(image, gray, CV_RGBA2GRAY);  // 4 channels
0118     }
0119
0120     if ((type == CV_16UC3) || (type == CV_16UC4))
0121     {
0122         gray.convertTo(gray, CV_8UC1, 1 / 255.0);
0123     }
0124
0125     cv::Rect new_rect(0, 0, image.cols, image.rows);
0126     cv::Mat landmarks(3, 2, CV_32F);
0127
0128     mutex.lock();
0129     FullObjectDetection object = sp(gray, new_rect);
0130     mutex.unlock();
0131
0132     for (size_t i = 0 ; i < outerEyesNosePositions.size() ; ++i)
0133     {
0134         int index                      = outerEyesNosePositions[i];
0135         landmarks.at<float>((int)i, 0) = object.part(index)[0];
0136         landmarks.at<float>((int)i, 1) = object.part(index)[1];
0137 /*
0138         qCDebug(DIGIKAM_FACESENGINE_LOG) << "index = " << index
0139                                          << ", landmarks: (" << landmarks.at<float>(i, 0)
0140                                          << ", " << landmarks.at<float>(i, 1) << ")" << QT_ENDL;
0141 */
0142     }
0143
0144     qCDebug(DIGIKAM_FACEDB_LOG) << "Full object detection and landmard computation finished";
0145
0146     // qCDebug(DIGIKAM_FACEDB_LOG) << "Finish computing landmark in " << timer.restart() << " ms";
0147
0148     cv::Mat affineTransformMatrix = cv::getAffineTransform(landmarks, faceTemplate);
0149     cv::Mat alignedFace;
0150     cv::warpAffine(image, alignedFace, affineTransformMatrix, outImageSize);
0151
0152     if (alignedFace.empty())
0153     {
0154         qCDebug(DIGIKAM_FACEDB_LOG) << "Face alignment failed!";
0155         return image;
0156     }
0157     else
0158     {
0159         qCDebug(DIGIKAM_FACEDB_LOG) << "Align face finished";
0160     }
0161
0162     return alignedFace;
0163 }
0164
0165 } // namespace Digikam